diff --git a/module/Makefile.am b/module/Makefile.am index bae73168d..ca3852417 100644 --- a/module/Makefile.am +++ b/module/Makefile.am @@ -117,7 +117,6 @@ VALUE_LANG_SOURCES = \ language/value/spec.scm ECMASCRIPT_LANG_SOURCES = \ - language/ecmascript/parse-lalr.scm \ language/ecmascript/tokenize.scm \ language/ecmascript/parse.scm \ language/ecmascript/impl.scm \ diff --git a/module/language/ecmascript/parse-lalr.scm b/module/language/ecmascript/parse-lalr.scm deleted file mode 100644 index b702511ca..000000000 --- a/module/language/ecmascript/parse-lalr.scm +++ /dev/null @@ -1,1731 +0,0 @@ -;; (language ecmascript parse-lalr) -- yacc's parser generator, in Guile -;; Copyright (C) 1984,1989,1990 Free Software Foundation, Inc. -;; Copyright (C) 1996-2002 Dominique Boucher - -;;;; This library is free software; you can redistribute it and/or -;;;; modify it under the terms of the GNU Lesser General Public -;;;; License as published by the Free Software Foundation; either -;;;; version 3 of the License, or (at your option) any later version. -;;;; -;;;; This library is distributed in the hope that it will be useful, -;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -;;;; Lesser General Public License for more details. -;;;; -;;;; You should have received a copy of the GNU Lesser General Public -;;;; License along with this library; if not, write to the Free Software -;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - -;; ---------------------------------------------------------------------- ;; -#! -;;; Commentary: -This file contains yet another LALR(1) parser generator written in -Scheme. In contrast to other such parser generators, this one -implements a more efficient algorithm for computing the lookahead sets. -The algorithm is the same as used in Bison (GNU yacc) and is described -in the following paper: - -"Efficient Computation of LALR(1) Look-Ahead Set", F. DeRemer and -T. Pennello, TOPLAS, vol. 4, no. 4, october 1982. - -As a consequence, it is not written in a fully functional style. -In fact, much of the code is a direct translation from C to Scheme -of the Bison sources. - -@section Defining a parser - -The module @code{(language ecmascript parse-lalr)} declares a macro -called @code{lalr-parser}: - -@lisp - (lalr-parser tokens rules ...) -@end lisp - -This macro, when given appropriate arguments, generates an LALR(1) -syntax analyzer. The macro accepts at least two arguments. The first -is a list of symbols which represent the terminal symbols of the -grammar. The remaining arguments are the grammar production rules. - -@section Running the parser - -The parser generated by the @code{lalr-parser} macro is a function that -takes two parameters. The first parameter is a lexical analyzer while -the second is an error procedure. - -The lexical analyzer is zero-argument function (a thunk) -invoked each time the parser needs to look-ahead in the token stream. -A token is usually a pair whose @code{car} is the symbol corresponding to -the token (the same symbol as used in the grammar definition). The -@code{cdr} of the pair is the semantic value associated with the token. For -example, a string token would have the @code{car} set to @code{'string} -while the @code{cdr} is set to the string value @code{"hello"}. - -Once the end of file is encountered, the lexical analyzer must always -return the symbol @code{'*eoi*} each time it is invoked. - -The error procedure must be a function that accepts at least two -parameters. - -@section The grammar format - -The grammar is specified by first giving the list of terminals and the -list of non-terminal definitions. Each non-terminal definition -is a list where the first element is the non-terminal and the other -elements are the right-hand sides (lists of grammar symbols). In -addition to this, each rhs can be followed by a semantic action. - -For example, consider the following (yacc) grammar for a very simple -expression language: -@example - e : e '+' t - | e '-' t - | t - ; - t : t '*' f - : t '/' f - | f - ; - f : ID - ; -@end example -The same grammar, written for the scheme parser generator, would look -like this (with semantic actions) -@lisp -(define expr-parser - (lalr-parser - ; Terminal symbols - (ID + - * /) - ; Productions - (e (e + t) -> (+ $1 $3) - (e - t) -> (- $1 $3) - (t) -> $1) - (t (t * f) -> (* $1 $3) - (t / f) -> (/ $1 $3) - (f) -> $1) - (f (ID) -> $1))) -@end lisp -In semantic actions, the symbol @code{$n} refers to the synthesized -attribute value of the nth symbol in the production. The value -associated with the non-terminal on the left is the result of -evaluating the semantic action (it defaults to @code{#f}). - -The above grammar implicitly handles operator precedences. It is also -possible to explicitly assign precedences and associativity to -terminal symbols and productions a la Yacc. Here is a modified -(and augmented) version of the grammar: -@lisp -(define expr-parser - (lalr-parser - ; Terminal symbols - (ID - (left: + -) - (left: * /) - (nonassoc: uminus)) - (e (e + e) -> (+ $1 $3) - (e - e) -> (- $1 $3) - (e * e) -> (* $1 $3) - (e / e) -> (/ $1 $3) - (- e (prec: uminus)) -> (- $2) - (ID) -> $1))) -@end lisp -The @code{left:} directive is used to specify a set of left-associative -operators of the same precedence level, the @code{right:} directive for -right-associative operators, and @code{nonassoc:} for operators that -are not associative. Note the use of the (apparently) useless -terminal @code{uminus}. It is only defined in order to assign to the -penultimate rule a precedence level higher than that of @code{*} and -@code{/}. The @code{prec:} directive can only appear as the last element of a -rule. Finally, note that precedence levels are incremented from -left to right, i.e. the precedence level of @code{+} and @code{-} is less -than the precedence level of @code{*} and @code{/} since the formers appear -first in the list of terminal symbols (token definitions). - -@section A final note on conflict resolution - -Conflicts in the grammar are handled in a conventional way. -In the absence of precedence directives, -Shift/Reduce conflicts are resolved by shifting, and Reduce/Reduce -conflicts are resolved by choosing the rule listed first in the -grammar definition. - -You can print the states of the generated parser by evaluating -@code{(print-states)}. The format of the output is similar to the one -produced by bison when given the -v command-line option. -;;; Code: -!# - -;;; ---------- SYSTEM DEPENDENT SECTION ----------------- -;; put in a module by Richard Todd -(define-module (language ecmascript parse-lalr) - #:export (lalr-parser - print-states)) - -;; this code is by Thien-Thi Nguyen, found in a google search -(begin - (defmacro def-macro (form . body) - `(defmacro ,(car form) ,(cdr form) ,@body)) - (def-macro (BITS-PER-WORD) 28) - (def-macro (lalr-error msg obj) `(throw 'lalr-error ,msg ,obj)) - (def-macro (logical-or x . y) `(logior ,x ,@y))) - -;;; ---------- END OF SYSTEM DEPENDENT SECTION ------------ - -;; - Macros pour la gestion des vecteurs de bits - -(def-macro (set-bit v b) - `(let ((x (quotient ,b (BITS-PER-WORD))) - (y (expt 2 (remainder ,b (BITS-PER-WORD))))) - (vector-set! ,v x (logical-or (vector-ref ,v x) y)))) - -(def-macro (bit-union v1 v2 n) - `(do ((i 0 (+ i 1))) - ((= i ,n)) - (vector-set! ,v1 i (logical-or (vector-ref ,v1 i) - (vector-ref ,v2 i))))) - -;; - Macro pour les structures de donnees - -(def-macro (new-core) `(make-vector 4 0)) -(def-macro (set-core-number! c n) `(vector-set! ,c 0 ,n)) -(def-macro (set-core-acc-sym! c s) `(vector-set! ,c 1 ,s)) -(def-macro (set-core-nitems! c n) `(vector-set! ,c 2 ,n)) -(def-macro (set-core-items! c i) `(vector-set! ,c 3 ,i)) -(def-macro (core-number c) `(vector-ref ,c 0)) -(def-macro (core-acc-sym c) `(vector-ref ,c 1)) -(def-macro (core-nitems c) `(vector-ref ,c 2)) -(def-macro (core-items c) `(vector-ref ,c 3)) - -(def-macro (new-shift) `(make-vector 3 0)) -(def-macro (set-shift-number! c x) `(vector-set! ,c 0 ,x)) -(def-macro (set-shift-nshifts! c x) `(vector-set! ,c 1 ,x)) -(def-macro (set-shift-shifts! c x) `(vector-set! ,c 2 ,x)) -(def-macro (shift-number s) `(vector-ref ,s 0)) -(def-macro (shift-nshifts s) `(vector-ref ,s 1)) -(def-macro (shift-shifts s) `(vector-ref ,s 2)) - -(def-macro (new-red) `(make-vector 3 0)) -(def-macro (set-red-number! c x) `(vector-set! ,c 0 ,x)) -(def-macro (set-red-nreds! c x) `(vector-set! ,c 1 ,x)) -(def-macro (set-red-rules! c x) `(vector-set! ,c 2 ,x)) -(def-macro (red-number c) `(vector-ref ,c 0)) -(def-macro (red-nreds c) `(vector-ref ,c 1)) -(def-macro (red-rules c) `(vector-ref ,c 2)) - - - -(def-macro (new-set nelem) - `(make-vector ,nelem 0)) - - -(def-macro (vector-map f v) - `(let ((vm-n (- (vector-length ,v) 1))) - (let loop ((vm-low 0) (vm-high vm-n)) - (if (= vm-low vm-high) - (vector-set! ,v vm-low (,f (vector-ref ,v vm-low) vm-low)) - (let ((vm-middle (quotient (+ vm-low vm-high) 2))) - (loop vm-low vm-middle) - (loop (+ vm-middle 1) vm-high)))))) - - -;; - Constantes -(define STATE-TABLE-SIZE 1009) - - -;; - Tableaux -(define rrhs #f) -(define rlhs #f) -(define ritem #f) -(define nullable #f) -(define derives #f) -(define fderives #f) -(define firsts #f) -(define kernel-base #f) -(define kernel-end #f) -(define shift-symbol #f) -(define shift-set #f) -(define red-set #f) -(define state-table #f) -(define acces-symbol #f) -(define reduction-table #f) -(define shift-table #f) -(define consistent #f) -(define lookaheads #f) -(define LA #f) -(define LAruleno #f) -(define lookback #f) -(define goto-map #f) -(define from-state #f) -(define to-state #f) -(define includes #f) -(define F #f) -(define action-table #f) - -;; - Variables -(define nitems #f) -(define nrules #f) -(define nvars #f) -(define nterms #f) -(define nsyms #f) -(define nstates #f) -(define first-state #f) -(define last-state #f) -(define final-state #f) -(define first-shift #f) -(define last-shift #f) -(define first-reduction #f) -(define last-reduction #f) -(define nshifts #f) -(define maxrhs #f) -(define ngotos #f) -(define token-set-size #f) - -(define (gen-tables! tokens gram) - (initialize-all) - (rewrite-grammar - tokens - gram - (lambda (terms terms/prec vars gram gram/actions) - (set! the-terminals/prec (list->vector terms/prec)) - (set! the-terminals (list->vector terms)) - (set! the-nonterminals (list->vector vars)) - (set! nterms (length terms)) - (set! nvars (length vars)) - (set! nsyms (+ nterms nvars)) - (let ((no-of-rules (length gram/actions)) - (no-of-items (let loop ((l gram/actions) (count 0)) - (if (null? l) - count - (loop (cdr l) (+ count (length (caar l)))))))) - (pack-grammar no-of-rules no-of-items gram) - (set-derives) - (set-nullable) - (generate-states) - (lalr) - (build-tables) - (compact-action-table terms) - gram/actions)))) - - -(define (initialize-all) - (set! rrhs #f) - (set! rlhs #f) - (set! ritem #f) - (set! nullable #f) - (set! derives #f) - (set! fderives #f) - (set! firsts #f) - (set! kernel-base #f) - (set! kernel-end #f) - (set! shift-symbol #f) - (set! shift-set #f) - (set! red-set #f) - (set! state-table (make-vector STATE-TABLE-SIZE '())) - (set! acces-symbol #f) - (set! reduction-table #f) - (set! shift-table #f) - (set! consistent #f) - (set! lookaheads #f) - (set! LA #f) - (set! LAruleno #f) - (set! lookback #f) - (set! goto-map #f) - (set! from-state #f) - (set! to-state #f) - (set! includes #f) - (set! F #f) - (set! action-table #f) - (set! nstates #f) - (set! first-state #f) - (set! last-state #f) - (set! final-state #f) - (set! first-shift #f) - (set! last-shift #f) - (set! first-reduction #f) - (set! last-reduction #f) - (set! nshifts #f) - (set! maxrhs #f) - (set! ngotos #f) - (set! token-set-size #f) - (set! rule-precedences '())) - - -(define (pack-grammar no-of-rules no-of-items gram) - (set! nrules (+ no-of-rules 1)) - (set! nitems no-of-items) - (set! rlhs (make-vector nrules #f)) - (set! rrhs (make-vector nrules #f)) - (set! ritem (make-vector (+ 1 nitems) #f)) - - (let loop ((p gram) (item-no 0) (rule-no 1)) - (if (not (null? p)) - (let ((nt (caar p))) - (let loop2 ((prods (cdar p)) (it-no2 item-no) (rl-no2 rule-no)) - (if (null? prods) - (loop (cdr p) it-no2 rl-no2) - (begin - (vector-set! rlhs rl-no2 nt) - (vector-set! rrhs rl-no2 it-no2) - (let loop3 ((rhs (car prods)) (it-no3 it-no2)) - (if (null? rhs) - (begin - (vector-set! ritem it-no3 (- rl-no2)) - (loop2 (cdr prods) (+ it-no3 1) (+ rl-no2 1))) - (begin - (vector-set! ritem it-no3 (car rhs)) - (loop3 (cdr rhs) (+ it-no3 1)))))))))))) - - -;; Fonction set-derives -;; -------------------- -(define (set-derives) - (define delts (make-vector (+ nrules 1) 0)) - (define dset (make-vector nvars -1)) - - (let loop ((i 1) (j 0)) ; i = 0 - (if (< i nrules) - (let ((lhs (vector-ref rlhs i))) - (if (>= lhs 0) - (begin - (vector-set! delts j (cons i (vector-ref dset lhs))) - (vector-set! dset lhs j) - (loop (+ i 1) (+ j 1))) - (loop (+ i 1) j))))) - - (set! derives (make-vector nvars 0)) - - (let loop ((i 0)) - (if (< i nvars) - (let ((q (let loop2 ((j (vector-ref dset i)) (s '())) - (if (< j 0) - s - (let ((x (vector-ref delts j))) - (loop2 (cdr x) (cons (car x) s))))))) - (vector-set! derives i q) - (loop (+ i 1)))))) - - - -(define (set-nullable) - (set! nullable (make-vector nvars #f)) - (let ((squeue (make-vector nvars #f)) - (rcount (make-vector (+ nrules 1) 0)) - (rsets (make-vector nvars #f)) - (relts (make-vector (+ nitems nvars 1) #f))) - (let loop ((r 0) (s2 0) (p 0)) - (let ((*r (vector-ref ritem r))) - (if *r - (if (< *r 0) - (let ((symbol (vector-ref rlhs (- *r)))) - (if (and (>= symbol 0) - (not (vector-ref nullable symbol))) - (begin - (vector-set! nullable symbol #t) - (vector-set! squeue s2 symbol) - (loop (+ r 1) (+ s2 1) p)))) - (let loop2 ((r1 r) (any-tokens #f)) - (let* ((symbol (vector-ref ritem r1))) - (if (> symbol 0) - (loop2 (+ r1 1) (or any-tokens (>= symbol nvars))) - (if (not any-tokens) - (let ((ruleno (- symbol))) - (let loop3 ((r2 r) (p2 p)) - (let ((symbol (vector-ref ritem r2))) - (if (> symbol 0) - (begin - (vector-set! rcount ruleno - (+ (vector-ref rcount ruleno) 1)) - (vector-set! relts p2 - (cons (vector-ref rsets symbol) - ruleno)) - (vector-set! rsets symbol p2) - (loop3 (+ r2 1) (+ p2 1))) - (loop (+ r2 1) s2 p2))))) - (loop (+ r1 1) s2 p)))))) - (let loop ((s1 0) (s3 s2)) - (if (< s1 s3) - (let loop2 ((p (vector-ref rsets (vector-ref squeue s1))) (s4 s3)) - (if p - (let* ((x (vector-ref relts p)) - (ruleno (cdr x)) - (y (- (vector-ref rcount ruleno) 1))) - (vector-set! rcount ruleno y) - (if (= y 0) - (let ((symbol (vector-ref rlhs ruleno))) - (if (and (>= symbol 0) - (not (vector-ref nullable symbol))) - (begin - (vector-set! nullable symbol #t) - (vector-set! squeue s4 symbol) - (loop2 (car x) (+ s4 1))) - (loop2 (car x) s4))) - (loop2 (car x) s4)))) - (loop (+ s1 1) s4))))))))) - - - -; Fonction set-firsts qui calcule un tableau de taille -; nvars et qui donne, pour chaque non-terminal X, une liste des -; non-terminaux pouvant apparaitre au debut d'une derivation a -; partir de X. - -(define (set-firsts) - (set! firsts (make-vector nvars '())) - - ;; -- initialization - (let loop ((i 0)) - (if (< i nvars) - (let loop2 ((sp (vector-ref derives i))) - (if (null? sp) - (loop (+ i 1)) - (let ((sym (vector-ref ritem (vector-ref rrhs (car sp))))) - (if (< -1 sym nvars) - (vector-set! firsts i (sinsert sym (vector-ref firsts i)))) - (loop2 (cdr sp))))))) - - ;; -- reflexive and transitive closure - (let loop ((continue #t)) - (if continue - (let loop2 ((i 0) (cont #f)) - (if (>= i nvars) - (loop cont) - (let* ((x (vector-ref firsts i)) - (y (let loop3 ((l x) (z x)) - (if (null? l) - z - (loop3 (cdr l) - (sunion (vector-ref firsts (car l)) z)))))) - (if (equal? x y) - (loop2 (+ i 1) cont) - (begin - (vector-set! firsts i y) - (loop2 (+ i 1) #t)))))))) - - (let loop ((i 0)) - (if (< i nvars) - (begin - (vector-set! firsts i (sinsert i (vector-ref firsts i))) - (loop (+ i 1)))))) - - - - -; Fonction set-fderives qui calcule un tableau de taille -; nvars et qui donne, pour chaque non-terminal, une liste des regles pouvant -; etre derivees a partir de ce non-terminal. (se sert de firsts) - -(define (set-fderives) - (set! fderives (make-vector nvars #f)) - - (set-firsts) - - (let loop ((i 0)) - (if (< i nvars) - (let ((x (let loop2 ((l (vector-ref firsts i)) (fd '())) - (if (null? l) - fd - (loop2 (cdr l) - (sunion (vector-ref derives (car l)) fd)))))) - (vector-set! fderives i x) - (loop (+ i 1)))))) - - -; Fonction calculant la fermeture d'un ensemble d'items LR0 -; ou core est une liste d'items - -(define (closure core) - ;; Initialization - (define ruleset (make-vector nrules #f)) - - (let loop ((csp core)) - (if (not (null? csp)) - (let ((sym (vector-ref ritem (car csp)))) - (if (< -1 sym nvars) - (let loop2 ((dsp (vector-ref fderives sym))) - (if (not (null? dsp)) - (begin - (vector-set! ruleset (car dsp) #t) - (loop2 (cdr dsp)))))) - (loop (cdr csp))))) - - (let loop ((ruleno 1) (csp core) (itemsetv '())) ; ruleno = 0 - (if (< ruleno nrules) - (if (vector-ref ruleset ruleno) - (let ((itemno (vector-ref rrhs ruleno))) - (let loop2 ((c csp) (itemsetv2 itemsetv)) - (if (and (pair? c) - (< (car c) itemno)) - (loop2 (cdr c) (cons (car c) itemsetv2)) - (loop (+ ruleno 1) c (cons itemno itemsetv2))))) - (loop (+ ruleno 1) csp itemsetv)) - (let loop2 ((c csp) (itemsetv2 itemsetv)) - (if (pair? c) - (loop2 (cdr c) (cons (car c) itemsetv2)) - (reverse itemsetv2)))))) - - - -(define (allocate-item-sets) - (set! kernel-base (make-vector nsyms 0)) - (set! kernel-end (make-vector nsyms #f))) - - -(define (allocate-storage) - (allocate-item-sets) - (set! red-set (make-vector (+ nrules 1) 0))) - -;; -- - - -(define (initialize-states) - (let ((p (new-core))) - (set-core-number! p 0) - (set-core-acc-sym! p #f) - (set-core-nitems! p 1) - (set-core-items! p '(0)) - - (set! first-state (list p)) - (set! last-state first-state) - (set! nstates 1))) - - - -(define (generate-states) - (allocate-storage) - (set-fderives) - (initialize-states) - (let loop ((this-state first-state)) - (if (pair? this-state) - (let* ((x (car this-state)) - (is (closure (core-items x)))) - (save-reductions x is) - (new-itemsets is) - (append-states) - (if (> nshifts 0) - (save-shifts x)) - (loop (cdr this-state)))))) - - -;; Fonction calculant les symboles sur lesquels il faut "shifter" -;; et regroupe les items en fonction de ces symboles - -(define (new-itemsets itemset) - ;; - Initialization - (set! shift-symbol '()) - (let loop ((i 0)) - (if (< i nsyms) - (begin - (vector-set! kernel-end i '()) - (loop (+ i 1))))) - - (let loop ((isp itemset)) - (if (pair? isp) - (let* ((i (car isp)) - (sym (vector-ref ritem i))) - (if (>= sym 0) - (begin - (set! shift-symbol (sinsert sym shift-symbol)) - (let ((x (vector-ref kernel-end sym))) - (if (null? x) - (begin - (vector-set! kernel-base sym (cons (+ i 1) x)) - (vector-set! kernel-end sym (vector-ref kernel-base sym))) - (begin - (set-cdr! x (list (+ i 1))) - (vector-set! kernel-end sym (cdr x))))))) - (loop (cdr isp))))) - - (set! nshifts (length shift-symbol))) - - - -(define (get-state sym) - (let* ((isp (vector-ref kernel-base sym)) - (n (length isp)) - (key (let loop ((isp1 isp) (k 0)) - (if (null? isp1) - (modulo k STATE-TABLE-SIZE) - (loop (cdr isp1) (+ k (car isp1)))))) - (sp (vector-ref state-table key))) - (if (null? sp) - (let ((x (new-state sym))) - (vector-set! state-table key (list x)) - (core-number x)) - (let loop ((sp1 sp)) - (if (and (= n (core-nitems (car sp1))) - (let loop2 ((i1 isp) (t (core-items (car sp1)))) - (if (and (pair? i1) - (= (car i1) - (car t))) - (loop2 (cdr i1) (cdr t)) - (null? i1)))) - (core-number (car sp1)) - (if (null? (cdr sp1)) - (let ((x (new-state sym))) - (set-cdr! sp1 (list x)) - (core-number x)) - (loop (cdr sp1)))))))) - - -(define (new-state sym) - (let* ((isp (vector-ref kernel-base sym)) - (n (length isp)) - (p (new-core))) - (set-core-number! p nstates) - (set-core-acc-sym! p sym) - (if (= sym nvars) (set! final-state nstates)) - (set-core-nitems! p n) - (set-core-items! p isp) - (set-cdr! last-state (list p)) - (set! last-state (cdr last-state)) - (set! nstates (+ nstates 1)) - p)) - - -;; -- - -(define (append-states) - (set! shift-set - (let loop ((l (reverse shift-symbol))) - (if (null? l) - '() - (cons (get-state (car l)) (loop (cdr l))))))) - -;; -- - -(define (save-shifts core) - (let ((p (new-shift))) - (set-shift-number! p (core-number core)) - (set-shift-nshifts! p nshifts) - (set-shift-shifts! p shift-set) - (if last-shift - (begin - (set-cdr! last-shift (list p)) - (set! last-shift (cdr last-shift))) - (begin - (set! first-shift (list p)) - (set! last-shift first-shift))))) - -(define (save-reductions core itemset) - (let ((rs (let loop ((l itemset)) - (if (null? l) - '() - (let ((item (vector-ref ritem (car l)))) - (if (< item 0) - (cons (- item) (loop (cdr l))) - (loop (cdr l)))))))) - (if (pair? rs) - (let ((p (new-red))) - (set-red-number! p (core-number core)) - (set-red-nreds! p (length rs)) - (set-red-rules! p rs) - (if last-reduction - (begin - (set-cdr! last-reduction (list p)) - (set! last-reduction (cdr last-reduction))) - (begin - (set! first-reduction (list p)) - (set! last-reduction first-reduction))))))) - - -;; -- - -(define (lalr) - (set! token-set-size (+ 1 (quotient nterms (BITS-PER-WORD)))) - (set-accessing-symbol) - (set-shift-table) - (set-reduction-table) - (set-max-rhs) - (initialize-LA) - (set-goto-map) - (initialize-F) - (build-relations) - (digraph includes) - (compute-lookaheads)) - -(define (set-accessing-symbol) - (set! acces-symbol (make-vector nstates #f)) - (let loop ((l first-state)) - (if (pair? l) - (let ((x (car l))) - (vector-set! acces-symbol (core-number x) (core-acc-sym x)) - (loop (cdr l)))))) - -(define (set-shift-table) - (set! shift-table (make-vector nstates #f)) - (let loop ((l first-shift)) - (if (pair? l) - (let ((x (car l))) - (vector-set! shift-table (shift-number x) x) - (loop (cdr l)))))) - -(define (set-reduction-table) - (set! reduction-table (make-vector nstates #f)) - (let loop ((l first-reduction)) - (if (pair? l) - (let ((x (car l))) - (vector-set! reduction-table (red-number x) x) - (loop (cdr l)))))) - -(define (set-max-rhs) - (let loop ((p 0) (curmax 0) (length 0)) - (let ((x (vector-ref ritem p))) - (if x - (if (>= x 0) - (loop (+ p 1) curmax (+ length 1)) - (loop (+ p 1) (max curmax length) 0)) - (set! maxrhs curmax))))) - -(define (initialize-LA) - (define (last l) - (if (null? (cdr l)) - (car l) - (last (cdr l)))) - - (set! consistent (make-vector nstates #f)) - (set! lookaheads (make-vector (+ nstates 1) #f)) - - (let loop ((count 0) (i 0)) - (if (< i nstates) - (begin - (vector-set! lookaheads i count) - (let ((rp (vector-ref reduction-table i)) - (sp (vector-ref shift-table i))) - (if (and rp - (or (> (red-nreds rp) 1) - (and sp - (not - (< (vector-ref acces-symbol - (last (shift-shifts sp))) - nvars))))) - (loop (+ count (red-nreds rp)) (+ i 1)) - (begin - (vector-set! consistent i #t) - (loop count (+ i 1)))))) - - (begin - (vector-set! lookaheads nstates count) - (let ((c (max count 1))) - (set! LA (make-vector c #f)) - (do ((j 0 (+ j 1))) ((= j c)) (vector-set! LA j (new-set token-set-size))) - (set! LAruleno (make-vector c -1)) - (set! lookback (make-vector c #f))) - (let loop ((i 0) (np 0)) - (if (< i nstates) - (if (vector-ref consistent i) - (loop (+ i 1) np) - (let ((rp (vector-ref reduction-table i))) - (if rp - (let loop2 ((j (red-rules rp)) (np2 np)) - (if (null? j) - (loop (+ i 1) np2) - (begin - (vector-set! LAruleno np2 (car j)) - (loop2 (cdr j) (+ np2 1))))) - (loop (+ i 1) np)))))))))) - - -(define (set-goto-map) - (set! goto-map (make-vector (+ nvars 1) 0)) - (let ((temp-map (make-vector (+ nvars 1) 0))) - (let loop ((ng 0) (sp first-shift)) - (if (pair? sp) - (let loop2 ((i (reverse (shift-shifts (car sp)))) (ng2 ng)) - (if (pair? i) - (let ((symbol (vector-ref acces-symbol (car i)))) - (if (< symbol nvars) - (begin - (vector-set! goto-map symbol - (+ 1 (vector-ref goto-map symbol))) - (loop2 (cdr i) (+ ng2 1))) - (loop2 (cdr i) ng2))) - (loop ng2 (cdr sp)))) - - (let loop ((k 0) (i 0)) - (if (< i nvars) - (begin - (vector-set! temp-map i k) - (loop (+ k (vector-ref goto-map i)) (+ i 1))) - - (begin - (do ((i 0 (+ i 1))) - ((>= i nvars)) - (vector-set! goto-map i (vector-ref temp-map i))) - - (set! ngotos ng) - (vector-set! goto-map nvars ngotos) - (vector-set! temp-map nvars ngotos) - (set! from-state (make-vector ngotos #f)) - (set! to-state (make-vector ngotos #f)) - - (do ((sp first-shift (cdr sp))) - ((null? sp)) - (let* ((x (car sp)) - (state1 (shift-number x))) - (do ((i (shift-shifts x) (cdr i))) - ((null? i)) - (let* ((state2 (car i)) - (symbol (vector-ref acces-symbol state2))) - (if (< symbol nvars) - (let ((k (vector-ref temp-map symbol))) - (vector-set! temp-map symbol (+ k 1)) - (vector-set! from-state k state1) - (vector-set! to-state k state2)))))))))))))) - - -(define (map-goto state symbol) - (let loop ((low (vector-ref goto-map symbol)) - (high (- (vector-ref goto-map (+ symbol 1)) 1))) - (if (> low high) - (begin - (display (list "Error in map-goto" state symbol) (current-error-port)) - (newline (current-error-port)) - 0) - (let* ((middle (quotient (+ low high) 2)) - (s (vector-ref from-state middle))) - (cond - ((= s state) - middle) - ((< s state) - (loop (+ middle 1) high)) - (else - (loop low (- middle 1)))))))) - - -(define (initialize-F) - (set! F (make-vector ngotos #f)) - (do ((i 0 (+ i 1))) ((= i ngotos)) (vector-set! F i (new-set token-set-size))) - - (let ((reads (make-vector ngotos #f))) - - (let loop ((i 0) (rowp 0)) - (if (< i ngotos) - (let* ((rowf (vector-ref F rowp)) - (stateno (vector-ref to-state i)) - (sp (vector-ref shift-table stateno))) - (if sp - (let loop2 ((j (shift-shifts sp)) (edges '())) - (if (pair? j) - (let ((symbol (vector-ref acces-symbol (car j)))) - (if (< symbol nvars) - (if (vector-ref nullable symbol) - (loop2 (cdr j) (cons (map-goto stateno symbol) - edges)) - (loop2 (cdr j) edges)) - (begin - (set-bit rowf (- symbol nvars)) - (loop2 (cdr j) edges)))) - (if (pair? edges) - (vector-set! reads i (reverse edges)))))) - (loop (+ i 1) (+ rowp 1))))) - (digraph reads))) - -(define (add-lookback-edge stateno ruleno gotono) - (let ((k (vector-ref lookaheads (+ stateno 1)))) - (let loop ((found #f) (i (vector-ref lookaheads stateno))) - (if (and (not found) (< i k)) - (if (= (vector-ref LAruleno i) ruleno) - (loop #t i) - (loop found (+ i 1))) - - (if (not found) - (begin (display "Error in add-lookback-edge : " (current-error-port)) - (display (list stateno ruleno gotono) (current-error-port)) - (newline (current-error-port))) - (vector-set! lookback i - (cons gotono (vector-ref lookback i)))))))) - - -(define (transpose r-arg n) - (let ((new-end (make-vector n #f)) - (new-R (make-vector n #f))) - (do ((i 0 (+ i 1))) - ((= i n)) - (let ((x (list 'bidon))) - (vector-set! new-R i x) - (vector-set! new-end i x))) - (do ((i 0 (+ i 1))) - ((= i n)) - (let ((sp (vector-ref r-arg i))) - (if (pair? sp) - (let loop ((sp2 sp)) - (if (pair? sp2) - (let* ((x (car sp2)) - (y (vector-ref new-end x))) - (set-cdr! y (cons i (cdr y))) - (vector-set! new-end x (cdr y)) - (loop (cdr sp2)))))))) - (do ((i 0 (+ i 1))) - ((= i n)) - (vector-set! new-R i (cdr (vector-ref new-R i)))) - - new-R)) - - - -(define (build-relations) - - (define (get-state stateno symbol) - (let loop ((j (shift-shifts (vector-ref shift-table stateno))) - (stno stateno)) - (if (null? j) - stno - (let ((st2 (car j))) - (if (= (vector-ref acces-symbol st2) symbol) - st2 - (loop (cdr j) st2)))))) - - (set! includes (make-vector ngotos #f)) - (do ((i 0 (+ i 1))) - ((= i ngotos)) - (let ((state1 (vector-ref from-state i)) - (symbol1 (vector-ref acces-symbol (vector-ref to-state i)))) - (let loop ((rulep (vector-ref derives symbol1)) - (edges '())) - (if (pair? rulep) - (let ((*rulep (car rulep))) - (let loop2 ((rp (vector-ref rrhs *rulep)) - (stateno state1) - (states (list state1))) - (let ((*rp (vector-ref ritem rp))) - (if (> *rp 0) - (let ((st (get-state stateno *rp))) - (loop2 (+ rp 1) st (cons st states))) - (begin - - (if (not (vector-ref consistent stateno)) - (add-lookback-edge stateno *rulep i)) - - (let loop2 ((done #f) - (stp (cdr states)) - (rp2 (- rp 1)) - (edgp edges)) - (if (not done) - (let ((*rp (vector-ref ritem rp2))) - (if (< -1 *rp nvars) - (loop2 (not (vector-ref nullable *rp)) - (cdr stp) - (- rp2 1) - (cons (map-goto (car stp) *rp) edgp)) - (loop2 #t stp rp2 edgp))) - - (loop (cdr rulep) edgp)))))))) - (vector-set! includes i edges))))) - (set! includes (transpose includes ngotos))) - - - -(define (compute-lookaheads) - (let ((n (vector-ref lookaheads nstates))) - (let loop ((i 0)) - (if (< i n) - (let loop2 ((sp (vector-ref lookback i))) - (if (pair? sp) - (let ((LA-i (vector-ref LA i)) - (F-j (vector-ref F (car sp)))) - (bit-union LA-i F-j token-set-size) - (loop2 (cdr sp))) - (loop (+ i 1)))))))) - - - -(define (digraph relation) - (define infinity (+ ngotos 2)) - (define INDEX (make-vector (+ ngotos 1) 0)) - (define VERTICES (make-vector (+ ngotos 1) 0)) - (define top 0) - (define R relation) - - (define (traverse i) - (set! top (+ 1 top)) - (vector-set! VERTICES top i) - (let ((height top)) - (vector-set! INDEX i height) - (let ((rp (vector-ref R i))) - (if (pair? rp) - (let loop ((rp2 rp)) - (if (pair? rp2) - (let ((j (car rp2))) - (if (= 0 (vector-ref INDEX j)) - (traverse j)) - (if (> (vector-ref INDEX i) - (vector-ref INDEX j)) - (vector-set! INDEX i (vector-ref INDEX j))) - (let ((F-i (vector-ref F i)) - (F-j (vector-ref F j))) - (bit-union F-i F-j token-set-size)) - (loop (cdr rp2)))))) - (if (= (vector-ref INDEX i) height) - (let loop () - (let ((j (vector-ref VERTICES top))) - (set! top (- top 1)) - (vector-set! INDEX j infinity) - (if (not (= i j)) - (begin - (bit-union (vector-ref F i) - (vector-ref F j) - token-set-size) - (loop))))))))) - - (let loop ((i 0)) - (if (< i ngotos) - (begin - (if (and (= 0 (vector-ref INDEX i)) - (pair? (vector-ref R i))) - (traverse i)) - (loop (+ i 1)))))) - - -;; ---------------------------------------------------------------------- ;; -;; operator precedence management ;; -;; ---------------------------------------------------------------------- ;; - -; a vector of precedence descriptors where each element -; is of the form (terminal type precedence) -(define the-terminals/prec #f) ; terminal symbols with precedence -; the precedence is an integer >= 0 -(define (get-symbol-precedence sym) - (caddr (vector-ref the-terminals/prec sym))) -; the operator type is either 'none, 'left, 'right, or 'nonassoc -(define (get-symbol-assoc sym) - (cadr (vector-ref the-terminals/prec sym))) - -(define rule-precedences '()) -(define (add-rule-precedence! rule sym) - (set! rule-precedences - (cons (cons rule sym) rule-precedences))) - -(define (get-rule-precedence ruleno) - (cond - ((assq ruleno rule-precedences) - => (lambda (p) - (get-symbol-precedence (cdr p)))) - (else - ;; process the rule symbols from left to right - (let loop ((i (vector-ref rrhs ruleno)) - (prec 0)) - (let ((item (vector-ref ritem i))) - ;; end of rule - (if (< item 0) - prec - (let ((i1 (+ i 1))) - (if (>= item nvars) - ;; it's a terminal symbol - (loop i1 (get-symbol-precedence (- item nvars))) - (loop i1 prec))))))))) - -;; ---------------------------------------------------------------------- ;; -;; Build the various tables ;; -;; ---------------------------------------------------------------------- ;; -(define (build-tables) - - (define (resolve-conflict sym rule) - (let ((sym-prec (get-symbol-precedence sym)) - (sym-assoc (get-symbol-assoc sym)) - (rule-prec (get-rule-precedence rule))) - (cond - ((> sym-prec rule-prec) 'shift) - ((< sym-prec rule-prec) 'reduce) - ((eq? sym-assoc 'left) 'reduce) - ((eq? sym-assoc 'right) 'shift) - (else 'shift)))) - - ;; --- Add an action to the action table ------------------------------ ;; - (define (add-action St Sym Act) - (let* ((x (vector-ref action-table St)) - (y (assv Sym x))) - (if y - (if (not (= Act (cdr y))) - ;; -- there is a conflict - (begin - (if (and (<= (cdr y) 0) - (<= Act 0)) - ;; --- reduce/reduce conflict ----------------------- ;; - (begin - (display "%% Reduce/Reduce conflict " (current-error-port)) - (display "(reduce " (current-error-port)) - (display (- Act) (current-error-port)) - (display ", reduce " (current-error-port)) - (display (- (cdr y)) (current-error-port)) - (display ") on " (current-error-port)) - (print-symbol (+ Sym nvars) (current-error-port)) - (display " in state " (current-error-port)) - (display St (current-error-port)) - (newline (current-error-port)) - (set-cdr! y (max (cdr y) Act))) - ;; --- shift/reduce conflict ------------------------ ;; - ;; can we resolve the conflict using precedences? - (case (resolve-conflict Sym (- (cdr y))) - ;; -- shift - ((shift) - (set-cdr! y Act)) - ;; -- reduce - ((reduce) - #f) ; well, nothing to do... - ;; -- signal a conflict! - (else - (display "%% Shift/Reduce conflict " (current-error-port)) - (display "(shift " (current-error-port)) - (display Act (current-error-port)) - (display ", reduce " (current-error-port)) - (display (- (cdr y)) (current-error-port)) - (display ") on " (current-error-port)) - (print-symbol (+ Sym nvars) (current-error-port)) - (display " in state " (current-error-port)) - (display St (current-error-port)) - (newline (current-error-port)) - (set-cdr! y Act)))))) - - (vector-set! action-table St (cons (cons Sym Act) x))))) - - (set! action-table (make-vector nstates '())) - - (do ((i 0 (+ i 1))) ; i = state - ((= i nstates)) - (let ((red (vector-ref reduction-table i))) - (if (and red (>= (red-nreds red) 1)) - (if (and (= (red-nreds red) 1) (vector-ref consistent i)) - (add-action i 'default (- (car (red-rules red)))) - (let ((k (vector-ref lookaheads (+ i 1)))) - (let loop ((j (vector-ref lookaheads i))) - (if (< j k) - (let ((rule (- (vector-ref LAruleno j))) - (lav (vector-ref LA j))) - (let loop2 ((token 0) (x (vector-ref lav 0)) (y 1) (z 0)) - (if (< token nterms) - (begin - (let ((in-la-set? (modulo x 2))) - (if (= in-la-set? 1) - (add-action i token rule))) - (if (= y (BITS-PER-WORD)) - (loop2 (+ token 1) - (vector-ref lav (+ z 1)) - 1 - (+ z 1)) - (loop2 (+ token 1) (quotient x 2) (+ y 1) z))))) - (loop (+ j 1))))))))) - - (let ((shiftp (vector-ref shift-table i))) - (if shiftp - (let loop ((k (shift-shifts shiftp))) - (if (pair? k) - (let* ((state (car k)) - (symbol (vector-ref acces-symbol state))) - (if (>= symbol nvars) - (add-action i (- symbol nvars) state)) - (loop (cdr k)))))))) - - (add-action final-state 0 'accept)) - -(define (compact-action-table terms) - (define (most-common-action acts) - (let ((accums '())) - (let loop ((l acts)) - (if (pair? l) - (let* ((x (cdar l)) - (y (assv x accums))) - (if (and (number? x) (< x 0)) - (if y - (set-cdr! y (+ 1 (cdr y))) - (set! accums (cons `(,x . 1) accums)))) - (loop (cdr l))))) - - (let loop ((l accums) (max 0) (sym #f)) - (if (null? l) - sym - (let ((x (car l))) - (if (> (cdr x) max) - (loop (cdr l) (cdr x) (car x)) - (loop (cdr l) max sym))))))) - - (define (translate-terms acts) - (map (lambda (act) - (cons (list-ref terms (car act)) - (cdr act))) - acts)) - - (do ((i 0 (+ i 1))) - ((= i nstates)) - (let ((acts (vector-ref action-table i))) - (if (vector? (vector-ref reduction-table i)) - (let ((act (most-common-action acts))) - (vector-set! action-table i - (cons `(*default* . ,(if act act 'error)) - (translate-terms - (lalr-filter (lambda (x) - (not (eq? (cdr x) act))) - acts))))) - (vector-set! action-table i - (cons `(*default* . *error*) - (translate-terms acts))))))) - - - -;; -- - -(define (rewrite-grammar tokens grammar k) - - (define eoi '*eoi*) - - (define (check-terminal term terms) - (cond - ((not (valid-terminal? term)) - (lalr-error "invalid terminal: " term)) - ((member term terms) - (lalr-error "duplicate definition of terminal: " term)))) - - (define (prec->type prec) - (cdr (assq prec '((left: . left) - (right: . right) - (nonassoc: . nonassoc))))) - - (cond - ;; --- a few error conditions ---------------------------------------- ;; - ((not (list? tokens)) - (lalr-error "Invalid token list: " tokens)) - ((not (pair? grammar)) - (lalr-error "Grammar definition must have a non-empty list of productions" '())) - - (else - ;; --- check the terminals ---------------------------------------- ;; - (let loop1 ((lst tokens) - (rev-terms '()) - (rev-terms/prec '()) - (prec-level 0)) - (if (pair? lst) - (let ((term (car lst))) - (cond - ((pair? term) - (if (and (memq (car term) '(left: right: nonassoc:)) - (not (null? (cdr term)))) - (let ((prec (+ prec-level 1)) - (optype (prec->type (car term)))) - (let loop-toks ((l (cdr term)) - (rev-terms rev-terms) - (rev-terms/prec rev-terms/prec)) - (if (null? l) - (loop1 (cdr lst) rev-terms rev-terms/prec prec) - (let ((term (car l))) - (check-terminal term rev-terms) - (loop-toks - (cdr l) - (cons term rev-terms) - (cons (list term optype prec) rev-terms/prec)))))) - - (lalr-error "invalid operator precedence specification: " term))) - - (else - (check-terminal term rev-terms) - (loop1 (cdr lst) - (cons term rev-terms) - (cons (list term 'none 0) rev-terms/prec) - prec-level)))) - - ;; --- check the grammar rules ------------------------------ ;; - (let loop2 ((lst grammar) (rev-nonterm-defs '())) - (if (pair? lst) - (let ((def (car lst))) - (if (not (pair? def)) - (lalr-error "Nonterminal definition must be a non-empty list" '()) - (let ((nonterm (car def))) - (cond ((not (valid-nonterminal? nonterm)) - (lalr-error "Invalid nonterminal:" nonterm)) - ((or (member nonterm rev-terms) - (assoc nonterm rev-nonterm-defs)) - (lalr-error "Nonterminal previously defined:" nonterm)) - (else - (loop2 (cdr lst) - (cons def rev-nonterm-defs))))))) - (let* ((terms (cons eoi (reverse rev-terms))) - (terms/prec (cons '(eoi none 0) (reverse rev-terms/prec))) - (nonterm-defs (reverse rev-nonterm-defs)) - (nonterms (cons '*start* (map car nonterm-defs)))) - (if (= (length nonterms) 1) - (lalr-error "Grammar must contain at least one nonterminal" '()) - (let loop-defs ((defs (cons `(*start* (,(cadr nonterms) ,eoi) -> $1) - nonterm-defs)) - (ruleno 0) - (comp-defs '())) - (if (pair? defs) - (let* ((nonterm-def (car defs)) - (compiled-def (rewrite-nonterm-def - nonterm-def - ruleno - terms nonterms))) - (loop-defs (cdr defs) - (+ ruleno (length compiled-def)) - (cons compiled-def comp-defs))) - - (let ((compiled-nonterm-defs (reverse comp-defs))) - (k terms - terms/prec - nonterms - (map (lambda (x) (cons (caaar x) (map cdar x))) - compiled-nonterm-defs) - (apply append compiled-nonterm-defs)))))))))))))) - - -(define *arrow* '->) - -(define (rewrite-nonterm-def nonterm-def ruleno terms nonterms) - - (define No-NT (length nonterms)) - - (define (encode x) - (let ((PosInNT (pos-in-list x nonterms))) - (if PosInNT - PosInNT - (let ((PosInT (pos-in-list x terms))) - (if PosInT - (+ No-NT PosInT) - (lalr-error "undefined symbol : " x)))))) - - (define (process-prec-directive rhs ruleno) - (let loop ((l rhs)) - (if (null? l) - '() - (let ((first (car l)) - (rest (cdr l))) - (cond - ((or (member first terms) (member first nonterms)) - (cons first (loop rest))) - ((and (pair? first) - (eq? (car first) 'prec:)) - (pair? (cdr first)) - (if (and (pair? (cdr first)) - (member (cadr first) terms)) - (if (null? (cddr first)) - (begin - (add-rule-precedence! ruleno (pos-in-list (cadr first) terms)) - (loop rest)) - (lalr-error "prec: directive should be at end of rule: " rhs)) - (lalr-error "Invalid prec: directive: " first))) - (else - (lalr-error "Invalid terminal or nonterminal: " first))))))) - - - (if (not (pair? (cdr nonterm-def))) - (lalr-error "At least one production needed for nonterminal" (car nonterm-def)) - (let ((name (symbol->string (car nonterm-def)))) - (let loop1 ((lst (cdr nonterm-def)) - (i 1) - (rev-productions-and-actions '())) - (if (not (pair? lst)) - (reverse rev-productions-and-actions) - (let* ((rhs (process-prec-directive (car lst) (+ ruleno i -1))) - (rest (cdr lst)) - (prod (map encode (cons (car nonterm-def) rhs)))) - (for-each (lambda (x) - (if (not (or (member x terms) (member x nonterms))) - (lalr-error "Invalid terminal or nonterminal" x))) - rhs) - (if (and (pair? rest) - (eq? (car rest) *arrow*) - (pair? (cdr rest))) - (loop1 (cddr rest) - (+ i 1) - (cons (cons prod (cadr rest)) - rev-productions-and-actions)) - (let* ((rhs-length (length rhs)) - (action - (cons 'vector - (cons (list 'quote (string->symbol - (string-append - name - "-" - (number->string i)))) - (let loop-j ((j 1)) - (if (> j rhs-length) - '() - (cons (string->symbol - (string-append - "$" - (number->string j))) - (loop-j (+ j 1))))))))) - (loop1 rest - (+ i 1) - (cons (cons prod action) - rev-productions-and-actions)))))))))) - -(define (valid-nonterminal? x) - (symbol? x)) - -(define (valid-terminal? x) - (symbol? x)) ; DB - -;; ---------------------------------------------------------------------- ;; -;; Miscellaneous ;; -;; ---------------------------------------------------------------------- ;; -(define (pos-in-list x lst) - (let loop ((lst lst) (i 0)) - (cond ((not (pair? lst)) #f) - ((equal? (car lst) x) i) - (else (loop (cdr lst) (+ i 1)))))) - -(define (sunion lst1 lst2) ; union of sorted lists - (let loop ((L1 lst1) - (L2 lst2)) - (cond ((null? L1) L2) - ((null? L2) L1) - (else - (let ((x (car L1)) (y (car L2))) - (cond - ((> x y) - (cons y (loop L1 (cdr L2)))) - ((< x y) - (cons x (loop (cdr L1) L2))) - (else - (loop (cdr L1) L2)) - )))))) - -(define (sinsert elem lst) - (let loop ((l1 lst)) - (if (null? l1) - (cons elem l1) - (let ((x (car l1))) - (cond ((< elem x) - (cons elem l1)) - ((> elem x) - (cons x (loop (cdr l1)))) - (else - l1)))))) - -(define (lalr-filter p lst) - (let loop ((l lst)) - (if (null? l) - '() - (let ((x (car l)) (y (cdr l))) - (if (p x) - (cons x (loop y)) - (loop y)))))) - -;; ---------------------------------------------------------------------- ;; -;; Debugging tools ... ;; -;; ---------------------------------------------------------------------- ;; -(define the-terminals #f) ; names of terminal symbols -(define the-nonterminals #f) ; non-terminals - -(define (print-item item-no) - (let loop ((i item-no)) - (let ((v (vector-ref ritem i))) - (if (>= v 0) - (loop (+ i 1)) - (let* ((rlno (- v)) - (nt (vector-ref rlhs rlno))) - (display (vector-ref the-nonterminals nt)) (display " --> ") - (let loop ((i (vector-ref rrhs rlno))) - (let ((v (vector-ref ritem i))) - (if (= i item-no) - (display ". ")) - (if (>= v 0) - (begin - (print-symbol v) - (display " ") - (loop (+ i 1))) - (begin - (display " (rule ") - (display (- v)) - (display ")") - (newline)))))))))) - -(define (print-symbol n . port) - (display (if (>= n nvars) - (vector-ref the-terminals (- n nvars)) - (vector-ref the-nonterminals n)) - (if (null? port) - (current-output-port) - (car port)))) - -(define (print-states) -"Print the states of a generated parser." - (define (print-action act) - (cond - ((eq? act '*error*) - (display " : Error")) - ((eq? act 'accept) - (display " : Accept input")) - ((< act 0) - (display " : reduce using rule ") - (display (- act))) - (else - (display " : shift and goto state ") - (display act))) - (newline) - #t) - - (define (print-actions acts) - (let loop ((l acts)) - (if (null? l) - #t - (let ((sym (caar l)) - (act (cdar l))) - (display " ") - (cond - ((eq? sym 'default) - (display "default action")) - (else - (if (number? sym) - (print-symbol (+ sym nvars)) - (display sym)))) - (print-action act) - (loop (cdr l)))))) - - (if (not action-table) - (begin - (display "No generated parser available!") - (newline) - #f) - (begin - (display "State table") (newline) - (display "-----------") (newline) (newline) - - (let loop ((l first-state)) - (if (null? l) - #t - (let* ((core (car l)) - (i (core-number core)) - (items (core-items core)) - (actions (vector-ref action-table i))) - (display "state ") (display i) (newline) - (newline) - (for-each (lambda (x) (display " ") (print-item x)) - items) - (newline) - (print-actions actions) - (newline) - (loop (cdr l)))))))) - - - -;; ---------------------------------------------------------------------- ;; - -(define build-goto-table - (lambda () - `(vector - ,@(map - (lambda (shifts) - (list 'quote - (if shifts - (let loop ((l (shift-shifts shifts))) - (if (null? l) - '() - (let* ((state (car l)) - (symbol (vector-ref acces-symbol state))) - (if (< symbol nvars) - (cons `(,symbol . ,state) - (loop (cdr l))) - (loop (cdr l)))))) - '()))) - (vector->list shift-table))))) - - -(define build-reduction-table - (lambda (gram/actions) - `(vector - '() - ,@(map - (lambda (p) - (let ((act (cdr p))) - `(lambda (___stack ___sp ___goto-table ___k) - ,(let* ((nt (caar p)) (rhs (cdar p)) (n (length rhs))) - `(let* (,@(if act - (let loop ((i 1) (l rhs)) - (if (pair? l) - (let ((rest (cdr l))) - (cons - `(,(string->symbol - (string-append - "$" - (number->string - (+ (- n i) 1)))) - (vector-ref ___stack (- ___sp ,(- (* i 2) 1)))) - (loop (+ i 1) rest))) - '())) - '())) - ,(if (= nt 0) - '$1 - `(___push ___stack (- ___sp ,(* 2 n)) - ,nt ___goto-table ,(cdr p) ___k))))))) - - gram/actions)))) - - -;; @section (api "API") - -(define-macro (lalr-parser tokens . rules) - (let* ((gram/actions (gen-tables! tokens rules)) - (code - `(letrec ((___max-stack-size 500) - - (___atable ',action-table) - (___gtable ,(build-goto-table)) - (___grow-stack (lambda (stack) - ;; make a new stack twice as big as the original - (let ((new-stack (make-vector (* 2 (vector-length stack)) #f))) - ;; then copy the elements... - (let loop ((i (- (vector-length stack) 1))) - (if (< i 0) - new-stack - (begin - (vector-set! new-stack i (vector-ref stack i)) - (loop (- i 1)))))))) - - (___push (lambda (stack sp new-cat goto-table lval k) - (let* ((state (vector-ref stack sp)) - (new-state (cdr (assq new-cat (vector-ref goto-table state)))) - (new-sp (+ sp 2)) - (stack (if (< new-sp (vector-length stack)) - stack - (___grow-stack stack)))) - (vector-set! stack new-sp new-state) - (vector-set! stack (- new-sp 1) lval) - (k stack new-sp)))) - - (___action (lambda (x l) - (let ((y (assq x l))) - (if y (cdr y) (cdar l))))) - - (___rtable ,(build-reduction-table gram/actions))) - - (lambda (lexerp errorp) - - (let ((stack (make-vector ___max-stack-size 0))) - (let loop ((stack stack) (sp 0) (input (lexerp))) - (let* ((state (vector-ref stack sp)) - (i (if (pair? input) (car input) input)) - (attr (if (pair? input) (cdr input) #f)) - (act (___action i (vector-ref ___atable state)))) - - (if (not (symbol? i)) - (errorp "PARSE ERROR: invalid token: " input)) - - (cond - - ;; Input succesfully parsed - ((eq? act 'accept) - (vector-ref stack 1)) - - ;; Syntax error in input - ((eq? act '*error*) - (if (eq? i '*eoi*) - (errorp "PARSE ERROR : unexpected end of input ") - (errorp "PARSE ERROR : unexpected token : " input))) - - ;; Shift current token on top of the stack - ((>= act 0) - (let ((stack (if (< (+ sp 2) (vector-length stack)) - stack - (___grow-stack stack)))) - (vector-set! stack (+ sp 1) attr) - (vector-set! stack (+ sp 2) act) - (loop stack (+ sp 2) (lexerp)))) - - ;; Reduce by rule (- act) - (else - ((vector-ref ___rtable (- act)) - stack sp ___gtable - (lambda (stack sp) - (loop stack sp input)))))))))))) - code)) - -;; arch-tag: 4FE771DE-F56D-11D8-8B77-000A95B4C7DC diff --git a/module/language/ecmascript/parse.scm b/module/language/ecmascript/parse.scm index ce731a736..e9d6673ce 100644 --- a/module/language/ecmascript/parse.scm +++ b/module/language/ecmascript/parse.scm @@ -1,6 +1,6 @@ ;;; ECMAScript for Guile -;; Copyright (C) 2009 Free Software Foundation, Inc. +;; Copyright (C) 2009, 2010 Free Software Foundation, Inc. ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -19,23 +19,29 @@ ;;; Code: (define-module (language ecmascript parse) - #:use-module (language ecmascript parse-lalr) + #:use-module (system base lalr) #:use-module (language ecmascript tokenize) - #:export (read-ecmascript read-ecmascript/1 parse-ecmascript)) + #:export (read-ecmascript read-ecmascript/1 make-parser)) (define (syntax-error message . args) (apply throw 'SyntaxError message args)) (define (read-ecmascript port) - (parse-ecmascript (make-tokenizer port) syntax-error)) + (let ((parse (make-parser))) + (parse (make-tokenizer port) syntax-error))) (define (read-ecmascript/1 port) - (parse-ecmascript (make-tokenizer/1 port) syntax-error)) + (let ((parse (make-parser))) + (parse (make-tokenizer/1 port) syntax-error))) (define *eof-object* (call-with-input-string "" read-char)) -(define parse-ecmascript +(define (make-parser) + ;; Return a fresh ECMAScript parser. Parsers produced by `lalr-scm' are now + ;; stateful (e.g., they won't invoke the tokenizer any more once it has + ;; returned `*eoi*'), hence the need to instantiate new parsers. + (lalr-parser ;; terminal (i.e. input) token types (lbrace rbrace lparen rparen lbracket rbracket dot semicolon comma < @@ -49,289 +55,289 @@ Identifier StringLiteral NumericLiteral RegexpLiteral) - (Program (SourceElements) -> $1 - (*eoi*) -> *eof-object*) + (Program (SourceElements) : $1 + (*eoi*) : *eof-object*) ;; ;; Verily, here we define statements. Expressions are defined ;; afterwards. ;; - (SourceElement (Statement) -> $1 - (FunctionDeclaration) -> $1) + (SourceElement (Statement) : $1 + (FunctionDeclaration) : $1) - (FunctionDeclaration (function Identifier lparen rparen lbrace FunctionBody rbrace) -> `(var (,$2 (lambda () ,$6))) - (function Identifier lparen FormalParameterList rparen lbrace FunctionBody rbrace) -> `(var (,$2 (lambda ,$4 ,$7)))) - (FunctionExpression (function lparen rparen lbrace FunctionBody rbrace) -> `(lambda () ,$5) - (function Identifier lparen rparen lbrace FunctionBody rbrace) -> `(lambda () ,$6) - (function lparen FormalParameterList rparen lbrace FunctionBody rbrace) -> `(lambda ,$3 ,$6) - (function Identifier lparen FormalParameterList rparen lbrace FunctionBody rbrace) -> `(lambda ,$4 ,$7)) - (FormalParameterList (Identifier) -> `(,$1) - (FormalParameterList comma Identifier) -> `(,@$1 ,$3)) - (SourceElements (SourceElement) -> $1 - (SourceElements SourceElement) -> (if (and (pair? $1) (eq? (car $1) 'begin)) + (FunctionDeclaration (function Identifier lparen rparen lbrace FunctionBody rbrace) : `(var (,$2 (lambda () ,$6))) + (function Identifier lparen FormalParameterList rparen lbrace FunctionBody rbrace) : `(var (,$2 (lambda ,$4 ,$7)))) + (FunctionExpression (function lparen rparen lbrace FunctionBody rbrace) : `(lambda () ,$5) + (function Identifier lparen rparen lbrace FunctionBody rbrace) : `(lambda () ,$6) + (function lparen FormalParameterList rparen lbrace FunctionBody rbrace) : `(lambda ,$3 ,$6) + (function Identifier lparen FormalParameterList rparen lbrace FunctionBody rbrace) : `(lambda ,$4 ,$7)) + (FormalParameterList (Identifier) : `(,$1) + (FormalParameterList comma Identifier) : `(,@$1 ,$3)) + (SourceElements (SourceElement) : $1 + (SourceElements SourceElement) : (if (and (pair? $1) (eq? (car $1) 'begin)) `(begin ,@(cdr $1) ,$2) `(begin ,$1 ,$2))) - (FunctionBody (SourceElements) -> $1) + (FunctionBody (SourceElements) : $1) - (Statement (Block) -> $1 - (VariableStatement) -> $1 - (EmptyStatement) -> $1 - (ExpressionStatement) -> $1 - (IfStatement) -> $1 - (IterationStatement) -> $1 - (ContinueStatement) -> $1 - (BreakStatement) -> $1 - (ReturnStatement) -> $1 - (WithStatement) -> $1 - (LabelledStatement) -> $1 - (SwitchStatement) -> $1 - (ThrowStatement) -> $1 - (TryStatement) -> $1) + (Statement (Block) : $1 + (VariableStatement) : $1 + (EmptyStatement) : $1 + (ExpressionStatement) : $1 + (IfStatement) : $1 + (IterationStatement) : $1 + (ContinueStatement) : $1 + (BreakStatement) : $1 + (ReturnStatement) : $1 + (WithStatement) : $1 + (LabelledStatement) : $1 + (SwitchStatement) : $1 + (ThrowStatement) : $1 + (TryStatement) : $1) - (Block (lbrace StatementList rbrace) -> `(block ,$2)) - (StatementList (Statement) -> $1 - (StatementList Statement) -> (if (and (pair? $1) (eq? (car $1) 'begin)) + (Block (lbrace StatementList rbrace) : `(block ,$2)) + (StatementList (Statement) : $1 + (StatementList Statement) : (if (and (pair? $1) (eq? (car $1) 'begin)) `(begin ,@(cdr $1) ,$2) `(begin ,$1 ,$2))) - (VariableStatement (var VariableDeclarationList) -> `(var ,@$2)) - (VariableDeclarationList (VariableDeclaration) -> `(,$1) - (VariableDeclarationList comma VariableDeclaration) -> `(,@$1 ,$2)) - (VariableDeclarationListNoIn (VariableDeclarationNoIn) -> `(,$1) - (VariableDeclarationListNoIn comma VariableDeclarationNoIn) -> `(,@$1 ,$2)) - (VariableDeclaration (Identifier) -> `(,$1) - (Identifier Initialiser) -> `(,$1 ,$2)) - (VariableDeclarationNoIn (Identifier) -> `(,$1) - (Identifier Initialiser) -> `(,$1 ,$2)) - (Initialiser (= AssignmentExpression) -> $2) - (InitialiserNoIn (= AssignmentExpressionNoIn) -> $2) + (VariableStatement (var VariableDeclarationList) : `(var ,@$2)) + (VariableDeclarationList (VariableDeclaration) : `(,$1) + (VariableDeclarationList comma VariableDeclaration) : `(,@$1 ,$2)) + (VariableDeclarationListNoIn (VariableDeclarationNoIn) : `(,$1) + (VariableDeclarationListNoIn comma VariableDeclarationNoIn) : `(,@$1 ,$2)) + (VariableDeclaration (Identifier) : `(,$1) + (Identifier Initialiser) : `(,$1 ,$2)) + (VariableDeclarationNoIn (Identifier) : `(,$1) + (Identifier Initialiser) : `(,$1 ,$2)) + (Initialiser (= AssignmentExpression) : $2) + (InitialiserNoIn (= AssignmentExpressionNoIn) : $2) - (EmptyStatement (semicolon) -> '(begin)) + (EmptyStatement (semicolon) : '(begin)) - (ExpressionStatement (Expression semicolon) -> $1) + (ExpressionStatement (Expression semicolon) : $1) - (IfStatement (if lparen Expression rparen Statement else Statement) -> `(if ,$3 ,$5 ,$7) - (if lparen Expression rparen Statement) -> `(if ,$3 ,$5)) + (IfStatement (if lparen Expression rparen Statement else Statement) : `(if ,$3 ,$5 ,$7) + (if lparen Expression rparen Statement) : `(if ,$3 ,$5)) - (IterationStatement (do Statement while lparen Expression rparen semicolon) -> `(do ,$2 ,$5) + (IterationStatement (do Statement while lparen Expression rparen semicolon) : `(do ,$2 ,$5) - (while lparen Expression rparen Statement) -> `(while ,$3 ,$5) + (while lparen Expression rparen Statement) : `(while ,$3 ,$5) - (for lparen semicolon semicolon rparen Statement) -> `(for #f #f #f ,$6) - (for lparen semicolon semicolon Expression rparen Statement) -> `(for #f #f ,$5 ,$7) - (for lparen semicolon Expression semicolon rparen Statement) -> `(for #f ,$4 #f ,$7) - (for lparen semicolon Expression semicolon Expression rparen Statement) -> `(for #f ,$4 ,$6 ,$8) + (for lparen semicolon semicolon rparen Statement) : `(for #f #f #f ,$6) + (for lparen semicolon semicolon Expression rparen Statement) : `(for #f #f ,$5 ,$7) + (for lparen semicolon Expression semicolon rparen Statement) : `(for #f ,$4 #f ,$7) + (for lparen semicolon Expression semicolon Expression rparen Statement) : `(for #f ,$4 ,$6 ,$8) - (for lparen ExpressionNoIn semicolon semicolon rparen Statement) -> `(for ,$3 #f #f ,$7) - (for lparen ExpressionNoIn semicolon semicolon Expression rparen Statement) -> `(for ,$3 #f ,$6 ,$8) - (for lparen ExpressionNoIn semicolon Expression semicolon rparen Statement) -> `(for ,$3 ,$5 #f ,$8) - (for lparen ExpressionNoIn semicolon Expression semicolon Expression rparen Statement) -> `(for ,$3 ,$5 ,$7 ,$9) + (for lparen ExpressionNoIn semicolon semicolon rparen Statement) : `(for ,$3 #f #f ,$7) + (for lparen ExpressionNoIn semicolon semicolon Expression rparen Statement) : `(for ,$3 #f ,$6 ,$8) + (for lparen ExpressionNoIn semicolon Expression semicolon rparen Statement) : `(for ,$3 ,$5 #f ,$8) + (for lparen ExpressionNoIn semicolon Expression semicolon Expression rparen Statement) : `(for ,$3 ,$5 ,$7 ,$9) - (for lparen var VariableDeclarationListNoIn semicolon semicolon rparen Statement) -> `(for (var ,@$4) #f #f ,$8) - (for lparen var VariableDeclarationListNoIn semicolon semicolon Expression rparen Statement) -> `(for (var ,@$4) #f ,$7 ,$9) - (for lparen var VariableDeclarationListNoIn semicolon Expression semicolon rparen Statement) -> `(for (var ,@$4) ,$6 #f ,$9) - (for lparen var VariableDeclarationListNoIn semicolon Expression semicolon Expression rparen Statement) -> `(for (var ,@$4) ,$6 ,$8 ,$10) + (for lparen var VariableDeclarationListNoIn semicolon semicolon rparen Statement) : `(for (var ,@$4) #f #f ,$8) + (for lparen var VariableDeclarationListNoIn semicolon semicolon Expression rparen Statement) : `(for (var ,@$4) #f ,$7 ,$9) + (for lparen var VariableDeclarationListNoIn semicolon Expression semicolon rparen Statement) : `(for (var ,@$4) ,$6 #f ,$9) + (for lparen var VariableDeclarationListNoIn semicolon Expression semicolon Expression rparen Statement) : `(for (var ,@$4) ,$6 ,$8 ,$10) - (for lparen LeftHandSideExpression in Expression rparen Statement) -> `(for-in ,$3 ,$5 ,$7) - (for lparen var VariableDeclarationNoIn in Expression rparen Statement) -> `(begin (var ,$4) (for-in (ref ,@$4) ,$6 ,$8))) + (for lparen LeftHandSideExpression in Expression rparen Statement) : `(for-in ,$3 ,$5 ,$7) + (for lparen var VariableDeclarationNoIn in Expression rparen Statement) : `(begin (var ,$4) (for-in (ref ,@$4) ,$6 ,$8))) - (ContinueStatement (continue Identifier semicolon) -> `(continue ,$2) - (continue semicolon) -> `(continue)) + (ContinueStatement (continue Identifier semicolon) : `(continue ,$2) + (continue semicolon) : `(continue)) - (BreakStatement (break Identifier semicolon) -> `(break ,$2) - (break semicolon) -> `(break)) + (BreakStatement (break Identifier semicolon) : `(break ,$2) + (break semicolon) : `(break)) - (ReturnStatement (return Expression semicolon) -> `(return ,$2) - (return semicolon) -> `(return)) + (ReturnStatement (return Expression semicolon) : `(return ,$2) + (return semicolon) : `(return)) - (WithStatement (with lparen Expression rparen Statement) -> `(with ,$3 ,$5)) + (WithStatement (with lparen Expression rparen Statement) : `(with ,$3 ,$5)) - (SwitchStatement (switch lparen Expression rparen CaseBlock) -> `(switch ,$3 ,@$5)) - (CaseBlock (lbrace rbrace) -> '() - (lbrace CaseClauses rbrace) -> $2 - (lbrace CaseClauses DefaultClause rbrace) -> `(,@$2 ,@$3) - (lbrace DefaultClause rbrace) -> `(,$2) - (lbrace DefaultClause CaseClauses rbrace) -> `(,@$2 ,@$3)) - (CaseClauses (CaseClause) -> `(,$1) - (CaseClauses CaseClause) -> `(,@$1 ,$2)) - (CaseClause (case Expression colon) -> `(case ,$2) - (case Expression colon StatementList) -> `(case ,$2 ,$4)) - (DefaultClause (default colon) -> `(default) - (default colon StatementList) -> `(default ,$3)) + (SwitchStatement (switch lparen Expression rparen CaseBlock) : `(switch ,$3 ,@$5)) + (CaseBlock (lbrace rbrace) : '() + (lbrace CaseClauses rbrace) : $2 + (lbrace CaseClauses DefaultClause rbrace) : `(,@$2 ,@$3) + (lbrace DefaultClause rbrace) : `(,$2) + (lbrace DefaultClause CaseClauses rbrace) : `(,@$2 ,@$3)) + (CaseClauses (CaseClause) : `(,$1) + (CaseClauses CaseClause) : `(,@$1 ,$2)) + (CaseClause (case Expression colon) : `(case ,$2) + (case Expression colon StatementList) : `(case ,$2 ,$4)) + (DefaultClause (default colon) : `(default) + (default colon StatementList) : `(default ,$3)) - (LabelledStatement (Identifier colon Statement) -> `(label ,$1 ,$3)) + (LabelledStatement (Identifier colon Statement) : `(label ,$1 ,$3)) - (ThrowStatement (throw Expression semicolon) -> `(throw ,$2)) + (ThrowStatement (throw Expression semicolon) : `(throw ,$2)) - (TryStatement (try Block Catch) -> `(try ,$2 ,$3 #f) - (try Block Finally) -> `(try ,$2 #f ,$3) - (try Block Catch Finally) -> `(try ,$2 ,$3 ,$4)) - (Catch (catch lparen Identifier rparen Block) -> `(catch ,$3 ,$5)) - (Finally (finally Block) -> `(finally ,$2)) + (TryStatement (try Block Catch) : `(try ,$2 ,$3 #f) + (try Block Finally) : `(try ,$2 #f ,$3) + (try Block Catch Finally) : `(try ,$2 ,$3 ,$4)) + (Catch (catch lparen Identifier rparen Block) : `(catch ,$3 ,$5)) + (Finally (finally Block) : `(finally ,$2)) ;; ;; As promised, expressions. We build up to Expression bottom-up, so ;; as to get operator precedence right. ;; - (PrimaryExpression (this) -> 'this - (null) -> 'null - (true) -> 'true - (false) -> 'false - (Identifier) -> `(ref ,$1) - (StringLiteral) -> `(string ,$1) - (RegexpLiteral) -> `(regexp ,$1) - (NumericLiteral) -> `(number ,$1) - (ArrayLiteral) -> $1 - (ObjectLiteral) -> $1 - (lparen Expression rparen) -> $2) + (PrimaryExpression (this) : 'this + (null) : 'null + (true) : 'true + (false) : 'false + (Identifier) : `(ref ,$1) + (StringLiteral) : `(string ,$1) + (RegexpLiteral) : `(regexp ,$1) + (NumericLiteral) : `(number ,$1) + (ArrayLiteral) : $1 + (ObjectLiteral) : $1 + (lparen Expression rparen) : $2) - (ArrayLiteral (lbracket rbracket) -> '(array) - (lbracket Elision rbracket) -> '(array ,@$2) - (lbracket ElementList rbracket) -> `(array ,@$2) - (lbracket ElementList comma rbracket) -> `(array ,@$2) - (lbracket ElementList comma Elision rbracket) -> `(array ,@$2)) - (ElementList (AssignmentExpression) -> `(,$1) - (Elision AssignmentExpression) -> `(,@$1 ,$2) - (ElementList comma AssignmentExpression) -> `(,@$1 ,$3) - (ElementList comma Elision AssignmentExpression) -> `(,@$1 ,@$3 ,$4)) - (Elision (comma) -> '((number 0)) - (Elision comma) -> `(,@$1 (number 0))) + (ArrayLiteral (lbracket rbracket) : '(array) + (lbracket Elision rbracket) : '(array ,@$2) + (lbracket ElementList rbracket) : `(array ,@$2) + (lbracket ElementList comma rbracket) : `(array ,@$2) + (lbracket ElementList comma Elision rbracket) : `(array ,@$2)) + (ElementList (AssignmentExpression) : `(,$1) + (Elision AssignmentExpression) : `(,@$1 ,$2) + (ElementList comma AssignmentExpression) : `(,@$1 ,$3) + (ElementList comma Elision AssignmentExpression) : `(,@$1 ,@$3 ,$4)) + (Elision (comma) : '((number 0)) + (Elision comma) : `(,@$1 (number 0))) - (ObjectLiteral (lbrace rbrace) -> `(object) - (lbrace PropertyNameAndValueList rbrace) -> `(object ,@$2)) - (PropertyNameAndValueList (PropertyName colon AssignmentExpression) -> `((,$1 ,$3)) - (PropertyNameAndValueList comma PropertyName colon AssignmentExpression) -> `(,@$1 (,$3 ,$5))) - (PropertyName (Identifier) -> $1 - (StringLiteral) -> (string->symbol $1) - (NumericLiteral) -> $1) + (ObjectLiteral (lbrace rbrace) : `(object) + (lbrace PropertyNameAndValueList rbrace) : `(object ,@$2)) + (PropertyNameAndValueList (PropertyName colon AssignmentExpression) : `((,$1 ,$3)) + (PropertyNameAndValueList comma PropertyName colon AssignmentExpression) : `(,@$1 (,$3 ,$5))) + (PropertyName (Identifier) : $1 + (StringLiteral) : (string->symbol $1) + (NumericLiteral) : $1) - (MemberExpression (PrimaryExpression) -> $1 - (FunctionExpression) -> $1 - (MemberExpression lbracket Expression rbracket) -> `(aref ,$1 ,$3) - (MemberExpression dot Identifier) -> `(pref ,$1 ,$3) - (new MemberExpression Arguments) -> `(new ,$2 ,$3)) + (MemberExpression (PrimaryExpression) : $1 + (FunctionExpression) : $1 + (MemberExpression lbracket Expression rbracket) : `(aref ,$1 ,$3) + (MemberExpression dot Identifier) : `(pref ,$1 ,$3) + (new MemberExpression Arguments) : `(new ,$2 ,$3)) - (NewExpression (MemberExpression) -> $1 - (new NewExpression) -> `(new ,$2 ())) + (NewExpression (MemberExpression) : $1 + (new NewExpression) : `(new ,$2 ())) - (CallExpression (MemberExpression Arguments) -> `(call ,$1 ,$2) - (CallExpression Arguments) -> `(call ,$1 ,$2) - (CallExpression lbracket Expression rbracket) -> `(aref ,$1 ,$3) - (CallExpression dot Identifier) -> `(pref ,$1 ,$3)) - (Arguments (lparen rparen) -> '() - (lparen ArgumentList rparen) -> $2) - (ArgumentList (AssignmentExpression) -> `(,$1) - (ArgumentList comma AssignmentExpression) -> `(,@$1 ,$3)) + (CallExpression (MemberExpression Arguments) : `(call ,$1 ,$2) + (CallExpression Arguments) : `(call ,$1 ,$2) + (CallExpression lbracket Expression rbracket) : `(aref ,$1 ,$3) + (CallExpression dot Identifier) : `(pref ,$1 ,$3)) + (Arguments (lparen rparen) : '() + (lparen ArgumentList rparen) : $2) + (ArgumentList (AssignmentExpression) : `(,$1) + (ArgumentList comma AssignmentExpression) : `(,@$1 ,$3)) - (LeftHandSideExpression (NewExpression) -> $1 - (CallExpression) -> $1) + (LeftHandSideExpression (NewExpression) : $1 + (CallExpression) : $1) - (PostfixExpression (LeftHandSideExpression) -> $1 - (LeftHandSideExpression ++) -> `(postinc ,$1) - (LeftHandSideExpression --) -> `(postdec ,$1)) + (PostfixExpression (LeftHandSideExpression) : $1 + (LeftHandSideExpression ++) : `(postinc ,$1) + (LeftHandSideExpression --) : `(postdec ,$1)) - (UnaryExpression (PostfixExpression) -> $1 - (delete UnaryExpression) -> `(delete ,$2) - (void UnaryExpression) -> `(void ,$2) - (typeof UnaryExpression) -> `(typeof ,$2) - (++ UnaryExpression) -> `(preinc ,$2) - (-- UnaryExpression) -> `(predec ,$2) - (+ UnaryExpression) -> `(+ ,$2) - (- UnaryExpression) -> `(- ,$2) - (~ UnaryExpression) -> `(~ ,$2) - (! UnaryExpression) -> `(! ,$2)) + (UnaryExpression (PostfixExpression) : $1 + (delete UnaryExpression) : `(delete ,$2) + (void UnaryExpression) : `(void ,$2) + (typeof UnaryExpression) : `(typeof ,$2) + (++ UnaryExpression) : `(preinc ,$2) + (-- UnaryExpression) : `(predec ,$2) + (+ UnaryExpression) : `(+ ,$2) + (- UnaryExpression) : `(- ,$2) + (~ UnaryExpression) : `(~ ,$2) + (! UnaryExpression) : `(! ,$2)) - (MultiplicativeExpression (UnaryExpression) -> $1 - (MultiplicativeExpression * UnaryExpression) -> `(* ,$1 ,$3) - (MultiplicativeExpression / UnaryExpression) -> `(/ ,$1 ,$3) - (MultiplicativeExpression % UnaryExpression) -> `(% ,$1 ,$3)) + (MultiplicativeExpression (UnaryExpression) : $1 + (MultiplicativeExpression * UnaryExpression) : `(* ,$1 ,$3) + (MultiplicativeExpression / UnaryExpression) : `(/ ,$1 ,$3) + (MultiplicativeExpression % UnaryExpression) : `(% ,$1 ,$3)) - (AdditiveExpression (MultiplicativeExpression) -> $1 - (AdditiveExpression + MultiplicativeExpression) -> `(+ ,$1 ,$3) - (AdditiveExpression - MultiplicativeExpression) -> `(- ,$1 ,$3)) + (AdditiveExpression (MultiplicativeExpression) : $1 + (AdditiveExpression + MultiplicativeExpression) : `(+ ,$1 ,$3) + (AdditiveExpression - MultiplicativeExpression) : `(- ,$1 ,$3)) - (ShiftExpression (AdditiveExpression) -> $1 - (ShiftExpression << MultiplicativeExpression) -> `(<< ,$1 ,$3) - (ShiftExpression >> MultiplicativeExpression) -> `(>> ,$1 ,$3) - (ShiftExpression >>> MultiplicativeExpression) -> `(>>> ,$1 ,$3)) + (ShiftExpression (AdditiveExpression) : $1 + (ShiftExpression << MultiplicativeExpression) : `(<< ,$1 ,$3) + (ShiftExpression >> MultiplicativeExpression) : `(>> ,$1 ,$3) + (ShiftExpression >>> MultiplicativeExpression) : `(>>> ,$1 ,$3)) - (RelationalExpression (ShiftExpression) -> $1 - (RelationalExpression < ShiftExpression) -> `(< ,$1 ,$3) - (RelationalExpression > ShiftExpression) -> `(> ,$1 ,$3) - (RelationalExpression <= ShiftExpression) -> `(<= ,$1 ,$3) - (RelationalExpression >= ShiftExpression) -> `(>= ,$1 ,$3) - (RelationalExpression instanceof ShiftExpression) -> `(instanceof ,$1 ,$3) - (RelationalExpression in ShiftExpression) -> `(in ,$1 ,$3)) + (RelationalExpression (ShiftExpression) : $1 + (RelationalExpression < ShiftExpression) : `(< ,$1 ,$3) + (RelationalExpression > ShiftExpression) : `(> ,$1 ,$3) + (RelationalExpression <= ShiftExpression) : `(<= ,$1 ,$3) + (RelationalExpression >= ShiftExpression) : `(>= ,$1 ,$3) + (RelationalExpression instanceof ShiftExpression) : `(instanceof ,$1 ,$3) + (RelationalExpression in ShiftExpression) : `(in ,$1 ,$3)) - (RelationalExpressionNoIn (ShiftExpression) -> $1 - (RelationalExpressionNoIn < ShiftExpression) -> `(< ,$1 ,$3) - (RelationalExpressionNoIn > ShiftExpression) -> `(> ,$1 ,$3) - (RelationalExpressionNoIn <= ShiftExpression) -> `(<= ,$1 ,$3) - (RelationalExpressionNoIn >= ShiftExpression) -> `(>= ,$1 ,$3) - (RelationalExpressionNoIn instanceof ShiftExpression) -> `(instanceof ,$1 ,$3)) + (RelationalExpressionNoIn (ShiftExpression) : $1 + (RelationalExpressionNoIn < ShiftExpression) : `(< ,$1 ,$3) + (RelationalExpressionNoIn > ShiftExpression) : `(> ,$1 ,$3) + (RelationalExpressionNoIn <= ShiftExpression) : `(<= ,$1 ,$3) + (RelationalExpressionNoIn >= ShiftExpression) : `(>= ,$1 ,$3) + (RelationalExpressionNoIn instanceof ShiftExpression) : `(instanceof ,$1 ,$3)) - (EqualityExpression (RelationalExpression) -> $1 - (EqualityExpression == RelationalExpression) -> `(== ,$1 ,$3) - (EqualityExpression != RelationalExpression) -> `(!= ,$1 ,$3) - (EqualityExpression === RelationalExpression) -> `(=== ,$1 ,$3) - (EqualityExpression !== RelationalExpression) -> `(!== ,$1 ,$3)) + (EqualityExpression (RelationalExpression) : $1 + (EqualityExpression == RelationalExpression) : `(== ,$1 ,$3) + (EqualityExpression != RelationalExpression) : `(!= ,$1 ,$3) + (EqualityExpression === RelationalExpression) : `(=== ,$1 ,$3) + (EqualityExpression !== RelationalExpression) : `(!== ,$1 ,$3)) - (EqualityExpressionNoIn (RelationalExpressionNoIn) -> $1 - (EqualityExpressionNoIn == RelationalExpressionNoIn) -> `(== ,$1 ,$3) - (EqualityExpressionNoIn != RelationalExpressionNoIn) -> `(!= ,$1 ,$3) - (EqualityExpressionNoIn === RelationalExpressionNoIn) -> `(=== ,$1 ,$3) - (EqualityExpressionNoIn !== RelationalExpressionNoIn) -> `(!== ,$1 ,$3)) + (EqualityExpressionNoIn (RelationalExpressionNoIn) : $1 + (EqualityExpressionNoIn == RelationalExpressionNoIn) : `(== ,$1 ,$3) + (EqualityExpressionNoIn != RelationalExpressionNoIn) : `(!= ,$1 ,$3) + (EqualityExpressionNoIn === RelationalExpressionNoIn) : `(=== ,$1 ,$3) + (EqualityExpressionNoIn !== RelationalExpressionNoIn) : `(!== ,$1 ,$3)) - (BitwiseANDExpression (EqualityExpression) -> $1 - (BitwiseANDExpression & EqualityExpression) -> `(& ,$1 ,$3)) - (BitwiseANDExpressionNoIn (EqualityExpressionNoIn) -> $1 - (BitwiseANDExpressionNoIn & EqualityExpressionNoIn) -> `(& ,$1 ,$3)) + (BitwiseANDExpression (EqualityExpression) : $1 + (BitwiseANDExpression & EqualityExpression) : `(& ,$1 ,$3)) + (BitwiseANDExpressionNoIn (EqualityExpressionNoIn) : $1 + (BitwiseANDExpressionNoIn & EqualityExpressionNoIn) : `(& ,$1 ,$3)) - (BitwiseXORExpression (BitwiseANDExpression) -> $1 - (BitwiseXORExpression ^ BitwiseANDExpression) -> `(^ ,$1 ,$3)) - (BitwiseXORExpressionNoIn (BitwiseANDExpressionNoIn) -> $1 - (BitwiseXORExpressionNoIn ^ BitwiseANDExpressionNoIn) -> `(^ ,$1 ,$3)) + (BitwiseXORExpression (BitwiseANDExpression) : $1 + (BitwiseXORExpression ^ BitwiseANDExpression) : `(^ ,$1 ,$3)) + (BitwiseXORExpressionNoIn (BitwiseANDExpressionNoIn) : $1 + (BitwiseXORExpressionNoIn ^ BitwiseANDExpressionNoIn) : `(^ ,$1 ,$3)) - (BitwiseORExpression (BitwiseXORExpression) -> $1 - (BitwiseORExpression bor BitwiseXORExpression) -> `(bor ,$1 ,$3)) - (BitwiseORExpressionNoIn (BitwiseXORExpressionNoIn) -> $1 - (BitwiseORExpressionNoIn bor BitwiseXORExpressionNoIn) -> `(bor ,$1 ,$3)) + (BitwiseORExpression (BitwiseXORExpression) : $1 + (BitwiseORExpression bor BitwiseXORExpression) : `(bor ,$1 ,$3)) + (BitwiseORExpressionNoIn (BitwiseXORExpressionNoIn) : $1 + (BitwiseORExpressionNoIn bor BitwiseXORExpressionNoIn) : `(bor ,$1 ,$3)) - (LogicalANDExpression (BitwiseORExpression) -> $1 - (LogicalANDExpression && BitwiseORExpression) -> `(and ,$1 ,$3)) - (LogicalANDExpressionNoIn (BitwiseORExpressionNoIn) -> $1 - (LogicalANDExpressionNoIn && BitwiseORExpressionNoIn) -> `(and ,$1 ,$3)) + (LogicalANDExpression (BitwiseORExpression) : $1 + (LogicalANDExpression && BitwiseORExpression) : `(and ,$1 ,$3)) + (LogicalANDExpressionNoIn (BitwiseORExpressionNoIn) : $1 + (LogicalANDExpressionNoIn && BitwiseORExpressionNoIn) : `(and ,$1 ,$3)) - (LogicalORExpression (LogicalANDExpression) -> $1 - (LogicalORExpression or LogicalANDExpression) -> `(or ,$1 ,$3)) - (LogicalORExpressionNoIn (LogicalANDExpressionNoIn) -> $1 - (LogicalORExpressionNoIn or LogicalANDExpressionNoIn) -> `(or ,$1 ,$3)) + (LogicalORExpression (LogicalANDExpression) : $1 + (LogicalORExpression or LogicalANDExpression) : `(or ,$1 ,$3)) + (LogicalORExpressionNoIn (LogicalANDExpressionNoIn) : $1 + (LogicalORExpressionNoIn or LogicalANDExpressionNoIn) : `(or ,$1 ,$3)) - (ConditionalExpression (LogicalORExpression) -> $1 - (LogicalORExpression ? AssignmentExpression colon AssignmentExpression) -> `(if ,$1 ,$3 ,$5)) - (ConditionalExpressionNoIn (LogicalORExpressionNoIn) -> $1 - (LogicalORExpressionNoIn ? AssignmentExpressionNoIn colon AssignmentExpressionNoIn) -> `(if ,$1 ,$3 ,$5)) + (ConditionalExpression (LogicalORExpression) : $1 + (LogicalORExpression ? AssignmentExpression colon AssignmentExpression) : `(if ,$1 ,$3 ,$5)) + (ConditionalExpressionNoIn (LogicalORExpressionNoIn) : $1 + (LogicalORExpressionNoIn ? AssignmentExpressionNoIn colon AssignmentExpressionNoIn) : `(if ,$1 ,$3 ,$5)) - (AssignmentExpression (ConditionalExpression) -> $1 - (LeftHandSideExpression AssignmentOperator AssignmentExpression) -> `(,$2 ,$1 ,$3)) - (AssignmentExpressionNoIn (ConditionalExpressionNoIn) -> $1 - (LeftHandSideExpression AssignmentOperator AssignmentExpressionNoIn) -> `(,$2 ,$1 ,$3)) - (AssignmentOperator (=) -> '= - (*=) -> '*= - (/=) -> '/= - (%=) -> '%= - (+=) -> '+= - (-=) -> '-= - (<<=) -> '<<= - (>>=) -> '>>= - (>>>=) -> '>>>= - (&=) -> '&= - (^=) -> '^= - (bor=) -> 'bor=) + (AssignmentExpression (ConditionalExpression) : $1 + (LeftHandSideExpression AssignmentOperator AssignmentExpression) : `(,$2 ,$1 ,$3)) + (AssignmentExpressionNoIn (ConditionalExpressionNoIn) : $1 + (LeftHandSideExpression AssignmentOperator AssignmentExpressionNoIn) : `(,$2 ,$1 ,$3)) + (AssignmentOperator (=) : '= + (*=) : '*= + (/=) : '/= + (%=) : '%= + (+=) : '+= + (-=) : '-= + (<<=) : '<<= + (>>=) : '>>= + (>>>=) : '>>>= + (&=) : '&= + (^=) : '^= + (bor=) : 'bor=) - (Expression (AssignmentExpression) -> $1 - (Expression comma AssignmentExpression) -> `(begin ,$1 ,$3)) - (ExpressionNoIn (AssignmentExpressionNoIn) -> $1 - (ExpressionNoIn comma AssignmentExpressionNoIn) -> `(begin ,$1 ,$3)))) + (Expression (AssignmentExpression) : $1 + (Expression comma AssignmentExpression) : `(begin ,$1 ,$3)) + (ExpressionNoIn (AssignmentExpressionNoIn) : $1 + (ExpressionNoIn comma AssignmentExpressionNoIn) : `(begin ,$1 ,$3)))) diff --git a/module/language/ecmascript/tokenize.scm b/module/language/ecmascript/tokenize.scm index 2ab8045cc..65a8b1e62 100644 --- a/module/language/ecmascript/tokenize.scm +++ b/module/language/ecmascript/tokenize.scm @@ -1,6 +1,6 @@ ;;; ECMAScript for Guile -;; Copyright (C) 2009 Free Software Foundation, Inc. +;; Copyright (C) 2009, 2010 Free Software Foundation, Inc. ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -21,6 +21,7 @@ (define-module (language ecmascript tokenize) #:use-module (ice-9 rdelim) #:use-module ((srfi srfi-1) #:select (unfold-right)) + #:use-module (system base lalr) #:export (next-token make-tokenizer make-tokenizer/1 tokenize tokenize/1)) (define (syntax-error message . args) @@ -75,8 +76,8 @@ (lp (read-char port)))))) (div? (case c1 - ((#\=) (read-char port) `(/= . #f)) - (else `(/ . #f)))) + ((#\=) (read-char port) (make-lexical-token '/= #f #f)) + (else (make-lexical-token '/ #f #f)))) (else (read-regexp port))))) @@ -95,7 +96,9 @@ (char-numeric? c) (char=? c #\$) (char=? c #\_)))) - `(RegexpLiteral . (,(string-append head str) . ,(reverse flags))) + (make-lexical-token 'RegexpLiteral #f + (cons (string-append head str) + (reverse flags))) (begin (read-char port) (lp (peek-char port) (cons c flags)))))) ((char=? terminator #\\) @@ -216,7 +219,7 @@ ("import" . import) ("public" . public))) -(define (read-identifier port) +(define (read-identifier port loc) (let lp ((c (peek-char port)) (chars '())) (if (or (eof-object? c) (not (or (char-alphabetic? c) @@ -225,10 +228,11 @@ (char=? c #\_)))) (let ((word (list->string (reverse chars)))) (cond ((assoc-ref *keywords* word) - => (lambda (x) `(,x . #f))) + => (lambda (x) (make-lexical-token x loc #f))) ((assoc-ref *future-reserved-words* word) (syntax-error "word is reserved for the future, dude." word)) - (else `(Identifier . ,(string->symbol word))))) + (else (make-lexical-token 'Identifier loc + (string->symbol word))))) (begin (read-char port) (lp (peek-char port) (cons c chars)))))) @@ -368,7 +372,7 @@ (else (lp (cons (list (string-ref (caar puncs) 0) #f) nodes) puncs)))))) - (lambda (port) + (lambda (port loc) (let lp ((c (peek-char port)) (tree punc-tree) (candidate #f)) (cond ((assv-ref tree c) @@ -376,15 +380,17 @@ (read-char port) (lp (peek-char port) (cdr node-tail) (car node-tail)))) (candidate - `(,candidate . #f)) + (make-lexical-token candidate loc #f)) (else (syntax-error "bad syntax: character not allowed" c))))))) (define (next-token port div?) - (let ((c (peek-char port)) - (props `((filename . ,(port-filename port)) - (line . ,(port-line port)) - (column . ,(port-column port))))) + (let ((c (peek-char port)) + (loc (make-source-location (port-filename port) + (port-line port) + (port-column port) + (false-if-exception (seek port 0 SEEK_CUR)) + #f))) (let ((tok (case c ((#\ht #\vt #\np #\space) @@ -400,7 +406,7 @@ (read-slash port div?)) ((#\" #\') ; string literal - `(StringLiteral . ,(read-string port))) + (make-lexical-token 'StringLiteral loc (read-string port))) (else (cond ((eof-object? c) @@ -409,15 +415,14 @@ (char=? c #\$) (char=? c #\_)) ;; reserved word or identifier - (read-identifier port)) + (read-identifier port loc)) ((char-numeric? c) ;; numeric -- also accept . FIXME, requires lookahead - `(NumericLiteral . ,(read-numeric port))) + (make-lexical-token 'NumericLiteral loc (read-numeric port))) (else ;; punctuation - (read-punctuation port))))))) - (if (pair? tok) - (set-source-properties! tok props)) + (read-punctuation port loc))))))) + tok))) (define (make-tokenizer port) @@ -435,31 +440,32 @@ (if eoi? '*eoi* (let ((tok (next-token port div?))) - (case (if (pair? tok) (car tok) tok) + (case (if (lexical-token? tok) (lexical-token-category tok) tok) ((lparen) - (set! stack (cons 'lparen stack))) + (set! stack (make-lexical-token 'lparen #f stack))) ((rparen) (if (and (pair? stack) (eq? (car stack) 'lparen)) (set! stack (cdr stack)) (syntax-error "unexpected right parenthesis"))) ((lbracket) - (set! stack (cons 'lbracket stack))) + (set! stack (make-lexical-token 'lbracket #f stack))) ((rbracket) (if (and (pair? stack) (eq? (car stack) 'lbracket)) (set! stack (cdr stack)) (syntax-error "unexpected right bracket" stack))) ((lbrace) - (set! stack (cons 'lbrace stack))) + (set! stack (make-lexical-token 'lbrace #f stack))) ((rbrace) (if (and (pair? stack) (eq? (car stack) 'lbrace)) (set! stack (cdr stack)) (syntax-error "unexpected right brace" stack))) ((semicolon) (set! eoi? (null? stack)))) - (set! div? (and (pair? tok) - (or (eq? (car tok) 'Identifier) - (eq? (car tok) 'NumericLiteral) - (eq? (car tok) 'StringLiteral)))) + (set! div? (and (lexical-token? tok) + (let ((cat (lexical-token-category tok))) + (or (eq? cat 'Identifier) + (eq? cat 'NumericLiteral) + (eq? cat 'StringLiteral))))) tok))))) (define (tokenize port)