;;; Guile Emacs Lisp ;;; Copyright (C) 2009 Free Software Foundation, Inc. ;;; ;;; This library is free software; you can redistribute it and/or ;;; modify it under the terms of the GNU Lesser General Public ;;; License as published by the Free Software Foundation; either ;;; version 3 of the License, or (at your option) any later version. ;;; ;;; This library is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;; Lesser General Public License for more details. ;;; ;;; You should have received a copy of the GNU Lesser General Public ;;; License along with this library; if not, write to the Free Software ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;;; Code: (define-module (language elisp parser) #:use-module (language elisp lexer) #:export (read-elisp)) ; The parser (reader) for elisp expressions. ; Is is hand-written (just as the lexer is) instead of using some parser ; generator because this allows easier transfer of source properties from the ; lexer ((text parse-lalr) seems not to allow access to the original lexer ; token-pair) and is easy enough anyways. ; Report a parse error. The first argument is some current lexer token ; where source information is available should it be useful. (define (parse-error token msg . args) (apply error msg args)) ; For parsing circular structures, we keep track of definitions in a ; hash-map that maps the id's to their values. ; When defining a new id, though, we immediatly fill the slot with a promise ; before parsing and setting the real value, because it must already be ; available at that time in case of a circular reference. The promise refers ; to a local variable that will be set when the real value is available through ; a closure. After parsing the expression is completed, we work through it ; again and force all promises we find. ; The definitions themselves are stored in a fluid and their scope is one ; call to read-elisp (but not only the currently parsed expression!). (define circular-definitions (make-fluid)) (define (make-circular-definitions) (make-hash-table)) (define (circular-ref token) (if (not (eq? (car token) 'circular-ref)) (error "invalid token for circular-ref" token)) (let* ((id (cdr token)) (value (hashq-ref (fluid-ref circular-definitions) id))) (if value value (parse-error token "undefined circular reference" id)))) ; Returned is a closure that, when invoked, will set the final value. ; This means both the variable the promise will return and the hash-table ; slot so we don't generate promises any longer. (define (circular-define! token) (if (not (eq? (car token) 'circular-def)) (error "invalid token for circular-define!" token)) (let ((value #f) (table (fluid-ref circular-definitions)) (id (cdr token))) (hashq-set! table id (delay value)) (lambda (real-value) (set! value real-value) (hashq-set! table id real-value)))) ; Work through a parsed data structure and force the promises there. ; After a promise is forced, the resulting value must not be recursed on; ; this may lead to infinite recursion with a circular structure, and ; additionally this value was already processed when it was defined. ; All deep data structures that can be parsed must be handled here! (define (force-promises! data) (cond ((pair? data) (begin (if (promise? (car data)) (set-car! data (force (car data))) (force-promises! (car data))) (if (promise? (cdr data)) (set-cdr! data (force (cdr data))) (force-promises! (cdr data))))) ((vector? data) (let ((len (vector-length data))) (let iterate ((i 0)) (if (< i len) (let ((el (vector-ref data i))) (if (promise? el) (vector-set! data i (force el)) (force-promises! el)) (iterate (1+ i))))))) ; Else nothing needs to be done. )) ; We need peek-functionality for the next lexer token, this is done with some ; single token look-ahead storage. This is handled by a closure which allows ; getting or peeking the next token. ; When one expression is fully parsed, we don't want a look-ahead stored here ; because it would miss from future parsing. This is verified by the finish ; action. (define (make-lexer-buffer lex) (let ((look-ahead #f)) (lambda (action) (if (eq? action 'finish) (if look-ahead (error "lexer-buffer is not empty when finished") #f) (begin (if (not look-ahead) (set! look-ahead (lex))) (case action ((peek) look-ahead) ((get) (let ((result look-ahead)) (set! look-ahead #f) result)) (else (error "invalid lexer-buffer action" action)))))))) ; Get the contents of a list, where the opening parentheses has already been ; found. The same code is used for vectors and lists, where lists allow the ; dotted tail syntax and vectors not; additionally, the closing parenthesis ; must of course match. ; The implementation here is not tail-recursive, but I think it is clearer ; and simpler this way. (define (get-list lex allow-dot close-square) (let* ((next (lex 'peek)) (type (car next))) (cond ((eq? type (if close-square 'square-close 'paren-close)) (begin (if (not (eq? (car (lex 'get)) type)) (error "got different token than peeked")) '())) ((and allow-dot (eq? type 'dot)) (begin (if (not (eq? (car (lex 'get)) type)) (error "got different token than peeked")) (let ((tail (get-list lex #f close-square))) (if (not (= (length tail) 1)) (parse-error next "expected exactly one element after dot")) (car tail)))) (else ; Do both parses in exactly this sequence! (let* ((head (get-expression lex)) (tail (get-list lex allow-dot close-square))) (cons head tail)))))) ; Parse a single expression from a lexer-buffer. This is the main routine in ; our recursive-descent parser. (define quotation-symbols '((quote . quote) (backquote . \`) (unquote . \,) (unquote-splicing . \,@))) (define (get-expression lex) (let* ((token (lex 'get)) (type (car token)) (return (lambda (result) (if (pair? result) (set-source-properties! result (source-properties token))) result))) (case type ((integer float symbol character string) (return (cdr token))) ((quote backquote unquote unquote-splicing) (return (list (assq-ref quotation-symbols type) (get-expression lex)))) ((paren-open) (return (get-list lex #t #f))) ((square-open) (return (list->vector (get-list lex #f #t)))) ((circular-ref) (circular-ref token)) ((circular-def) ; The order of definitions is important! (let* ((setter (circular-define! token)) (expr (get-expression lex))) (setter expr) (force-promises! expr) expr)) (else (parse-error token "expected expression, got" token))))) ; Define the reader function based on this; build a lexer, a lexer-buffer, ; and then parse a single expression to return. ; We also define a circular-definitions data structure to use. (define (read-elisp port) (with-fluids ((circular-definitions (make-circular-definitions))) (let* ((lexer (get-lexer port)) (lexbuf (make-lexer-buffer lexer)) (result (get-expression lexbuf))) (lexbuf 'finish) result)))