mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-10 14:00:21 +02:00
Replaced generated elisp parser with hand-written one to fix source properties.
* module/language/elisp/parser.scm: Hand-written parser. * test-suite/tests/elisp-reader.test: Test for source properties.
This commit is contained in:
parent
15eeabfd53
commit
9e90010f07
2 changed files with 112 additions and 48 deletions
|
@ -1,6 +1,6 @@
|
|||
;;; Guile Emac Lisp
|
||||
|
||||
;; Copyright (C) 2001 Free Software Foundation, Inc.
|
||||
;; Copyright (C) 2009 Free Software Foundation, Inc.
|
||||
|
||||
;; This program is free software; you can redistribute it and/or modify
|
||||
;; it under the terms of the GNU General Public License as published by
|
||||
|
@ -21,56 +21,112 @@
|
|||
|
||||
(define-module (language elisp parser)
|
||||
#:use-module (language elisp lexer)
|
||||
#:use-module (language ecmascript parse-lalr)
|
||||
#:export (read-elisp))
|
||||
|
||||
; The parser (reader) for elisp expressions. It is implemented using the
|
||||
; (text parse-lalr) parser generator and uses my hand-written lexer as
|
||||
; the tokenizer.
|
||||
; The parser (reader) for elisp expressions.
|
||||
; Is is hand-written (just as the lexer is) instead of using some parser
|
||||
; generator because this allows easier transfer of source properties from the
|
||||
; lexer, makes the circular syntax parsing easier (as it would be with
|
||||
; (text parse-lalr) and is easy enough anyways.
|
||||
|
||||
|
||||
; Build the parser itself using parse-lalr.
|
||||
; Report a parse error. The first argument is some current lexer token
|
||||
; where source information is available should it be useful.
|
||||
|
||||
(define elisp-parser
|
||||
(lalr-parser (integer float symbol character string
|
||||
paren-open paren-close square-open square-close
|
||||
dot quote backquote unquote unquote-splicing)
|
||||
|
||||
; Expressions are our main interest.
|
||||
; It seems the symbol we're interested for return from the parser must
|
||||
; come very first, so here it is.
|
||||
(expression (integer) -> $1
|
||||
(float) -> $1
|
||||
(symbol) -> $1
|
||||
(character) -> $1
|
||||
(string) -> $1
|
||||
(list) -> $1
|
||||
(quotation) -> $1
|
||||
(vector) -> $1)
|
||||
|
||||
; Pairs, lists and dotted lists.
|
||||
(partial-list (expression) -> (list $1)
|
||||
(expression dot expression) -> (cons $1 $3)
|
||||
(expression partial-list) -> (cons $1 $2))
|
||||
(list (paren-open paren-close) -> '()
|
||||
(paren-open dot expression paren-close) -> $3
|
||||
(paren-open partial-list paren-close) -> $2)
|
||||
|
||||
; Quotation and unquotation expressions.
|
||||
(quotation (quote expression) -> `(quote ,$2)
|
||||
(backquote expression) -> `(\` ,$2)
|
||||
(unquote expression) -> `(\, ,$2)
|
||||
(unquote-splicing expression) -> `(\,@ ,$2))
|
||||
|
||||
; Vectors.
|
||||
(vector-elements (expression) -> (list $1)
|
||||
(expression vector-elements) -> (cons $1 $2))
|
||||
(vector (square-open square-close) -> (make-vector 0)
|
||||
(square-open vector-elements square-close) -> (list->vector $2))))
|
||||
(define (parse-error token msg . args)
|
||||
(apply error msg args))
|
||||
|
||||
|
||||
; Use the parser to define the elisp reader function.
|
||||
; We only want to read a single expression at a time, so use get-lexer/1.
|
||||
; We need peek-functionality for the next lexer token, this is done with some
|
||||
; single token look-ahead storage. This is handled by a closure which allows
|
||||
; getting or peeking the next token.
|
||||
; When one expression is fully parsed, we don't want a look-ahead stored here
|
||||
; because it would miss from future parsing. This is verified by the finish
|
||||
; action.
|
||||
|
||||
(define (make-lexer-buffer lex)
|
||||
(let ((look-ahead #f))
|
||||
(lambda (action)
|
||||
(if (eq? action 'finish)
|
||||
(if look-ahead
|
||||
(error "lexer-buffer is not empty when finished")
|
||||
#f)
|
||||
(begin
|
||||
(if (not look-ahead)
|
||||
(set! look-ahead (lex)))
|
||||
(case action
|
||||
((peek) look-ahead)
|
||||
((get)
|
||||
(let ((result look-ahead))
|
||||
(set! look-ahead #f)
|
||||
result))
|
||||
(else (error "invalid lexer-buffer action" action))))))))
|
||||
|
||||
|
||||
; Get the contents of a list, where the opening parentheses has already been
|
||||
; found. The same code is used for vectors and lists, where lists allow the
|
||||
; dotted tail syntax and vectors not; additionally, the closing parenthesis
|
||||
; must of course match.
|
||||
|
||||
(define (get-list lex allow-dot close-square)
|
||||
(let* ((next (lex 'peek))
|
||||
(type (car next)))
|
||||
(cond
|
||||
((eq? type (if close-square 'square-close 'paren-close))
|
||||
(begin
|
||||
(if (not (eq? (car (lex 'get)) type))
|
||||
(error "got different token than peeked"))
|
||||
'()))
|
||||
((and allow-dot (eq? type 'dot))
|
||||
(begin
|
||||
(if (not (eq? (car (lex 'get)) type))
|
||||
(error "got different token than peeked"))
|
||||
(let ((tail (get-list lex #f close-square)))
|
||||
(if (not (= (length tail) 1))
|
||||
(parse-error next "expected exactly one element after dot"))
|
||||
(car tail))))
|
||||
(else
|
||||
; Do both parses in exactly this sequence!
|
||||
(let* ((head (get-expression lex))
|
||||
(tail (get-list lex allow-dot close-square)))
|
||||
(cons head tail))))))
|
||||
|
||||
|
||||
|
||||
; Parse a single expression from a lexer-buffer. This is the main routine in
|
||||
; our recursive-descent parser.
|
||||
|
||||
(define quotation-symbols '((quote . quote)
|
||||
(backquote . \`)
|
||||
(unquote . \,)
|
||||
(unquote-splicing . \,@)))
|
||||
|
||||
(define (get-expression lex)
|
||||
(let* ((token (lex 'get))
|
||||
(type (car token))
|
||||
(return (lambda (result)
|
||||
(if (pair? result)
|
||||
(set-source-properties! result (source-properties token)))
|
||||
result)))
|
||||
(case type
|
||||
((integer float symbol character string)
|
||||
(return (cdr token)))
|
||||
((quote backquote unquote unquote-splicing)
|
||||
(return (list (assq-ref quotation-symbols type) (get-expression lex))))
|
||||
((paren-open)
|
||||
(return (get-list lex #t #f)))
|
||||
((square-open)
|
||||
(return (list->vector (get-list lex #f #t))))
|
||||
(else
|
||||
(parse-error token "expected expression, got" token)))))
|
||||
|
||||
|
||||
; Define the reader function based on this; build a lexer, a lexer-buffer,
|
||||
; and then parse a single expression to return.
|
||||
|
||||
(define (read-elisp port)
|
||||
(elisp-parser (get-lexer/1 port) error))
|
||||
(let* ((lexer (get-lexer port))
|
||||
(lexbuf (make-lexer-buffer lexer))
|
||||
(result (get-expression lexbuf)))
|
||||
(lexbuf 'finish)
|
||||
result))
|
||||
|
|
|
@ -26,9 +26,6 @@
|
|||
; ==============================================================================
|
||||
; Test the lexer.
|
||||
|
||||
; This is of course somewhat redundant with the full parser checks, but probably
|
||||
; can't hurt and is useful in developing the lexer itself.
|
||||
|
||||
(define (get-string-lexer str)
|
||||
(call-with-input-string str get-lexer))
|
||||
|
||||
|
@ -139,6 +136,17 @@ test\"ab\"\\ abcd
|
|||
(pass-if "only next expression"
|
||||
(equal? (parse-str "1 2 3") 1))
|
||||
|
||||
(pass-if "source properties"
|
||||
(let* ((list1 (parse-str "\n\n (\n(7) (42))"))
|
||||
(list2 (car list1))
|
||||
(list3 (cadr list1)))
|
||||
(and (= (source-property list1 'line) 3)
|
||||
(= (source-property list1 'column) 4)
|
||||
(= (source-property list2 'line) 4)
|
||||
(= (source-property list2 'column) 1)
|
||||
(= (source-property list3 'line) 4)
|
||||
(= (source-property list3 'column) 6))))
|
||||
|
||||
(pass-if "constants"
|
||||
(and (equal? (parse-str "-12") -12)
|
||||
(equal? (parse-str ".123") 0.123)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue