mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-20 19:50:24 +02:00
A first, rough lexer for elisp still missing some stuff.
* module/language/elisp/lexer.scm: New lexer file. * test-suite/Makefile.am: Register elisp-reader.test as new test. * test-suite/tests/elisp-reader.test: New test-case.
This commit is contained in:
parent
157ffbd797
commit
25512a940b
3 changed files with 250 additions and 0 deletions
162
module/language/elisp/lexer.scm
Normal file
162
module/language/elisp/lexer.scm
Normal file
|
@ -0,0 +1,162 @@
|
||||||
|
;;; Guile Emac Lisp
|
||||||
|
|
||||||
|
;; Copyright (C) 2001 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
;; This program is free software; you can redistribute it and/or modify
|
||||||
|
;; it under the terms of the GNU General Public License as published by
|
||||||
|
;; the Free Software Foundation; either version 2, or (at your option)
|
||||||
|
;; any later version.
|
||||||
|
;;
|
||||||
|
;; This program is distributed in the hope that it will be useful,
|
||||||
|
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
;; GNU General Public License for more details.
|
||||||
|
;;
|
||||||
|
;; You should have received a copy of the GNU General Public License
|
||||||
|
;; along with this program; see the file COPYING. If not, write to
|
||||||
|
;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
;; Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
;;; Code:
|
||||||
|
|
||||||
|
(define-module (language elisp lexer)
|
||||||
|
#:use-module (ice-9 regex)
|
||||||
|
#:export (get-lexer))
|
||||||
|
|
||||||
|
; This is the lexical analyzer for the elisp reader. It is hand-written
|
||||||
|
; instead of using some generator because I think that's most viable in this
|
||||||
|
; case and easy enough.
|
||||||
|
|
||||||
|
|
||||||
|
; Read a symbol or number from a port until something follows that marks the
|
||||||
|
; start of a new token (like whitespace or parentheses). The data read is
|
||||||
|
; returned as a string for further conversion to the correct type, but we also
|
||||||
|
; return what this is (integer/float/symbol).
|
||||||
|
; If any escaped character is found, it must be a symbol. Otherwise we
|
||||||
|
; at the end check the result-string against regular expressions to determine
|
||||||
|
; if it is possibly an integer or a float.
|
||||||
|
|
||||||
|
(define integer-regex (make-regexp "^[+-]?[0-9]+\\.?$"))
|
||||||
|
(define float-regex
|
||||||
|
(make-regexp "^[+-]?([0-9]+\\.?[0-9]*|[0-9]*\\.?[0-9]+)(e[+-]?[0-9]+)?$"))
|
||||||
|
|
||||||
|
; A dot is also allowed literally, only a single dort alone is parsed as the
|
||||||
|
; 'dot' terminal for dotted lists.
|
||||||
|
(define no-escape-punctuation (string->char-set "-+=*/_~!@$%^&:<>{}?."))
|
||||||
|
|
||||||
|
(define (get-symbol-or-number port)
|
||||||
|
(let iterate ((result-chars '())
|
||||||
|
(had-escape #f))
|
||||||
|
(let* ((c (read-char port))
|
||||||
|
(finish (lambda ()
|
||||||
|
(let ((result (list->string (reverse result-chars))))
|
||||||
|
(values
|
||||||
|
(cond
|
||||||
|
((and (not had-escape)
|
||||||
|
(regexp-exec integer-regex result))
|
||||||
|
'integer)
|
||||||
|
((and (not had-escape)
|
||||||
|
(regexp-exec float-regex result))
|
||||||
|
'float)
|
||||||
|
(else 'symbol))
|
||||||
|
result))))
|
||||||
|
(need-no-escape? (lambda (c)
|
||||||
|
(or (char-numeric? c)
|
||||||
|
(char-alphabetic? c)
|
||||||
|
(char-set-contains? no-escape-punctuation
|
||||||
|
c)))))
|
||||||
|
(cond
|
||||||
|
((eof-object? c) (finish))
|
||||||
|
((need-no-escape? c) (iterate (cons c result-chars) had-escape))
|
||||||
|
((char=? c #\\) (iterate (cons (read-char port) result-chars) #t))
|
||||||
|
(else
|
||||||
|
(unread-char c port)
|
||||||
|
(finish))))))
|
||||||
|
|
||||||
|
|
||||||
|
; Main lexer routine, which is given a port and does look for the next token.
|
||||||
|
|
||||||
|
(define (lex port)
|
||||||
|
(let ((return (let ((file (if (file-port? port) (port-filename port) #f))
|
||||||
|
(line (1+ (port-line port)))
|
||||||
|
(column (1+ (port-column port))))
|
||||||
|
(lambda (token value)
|
||||||
|
(let ((obj (cons token value)))
|
||||||
|
(set-source-property! obj 'filename file)
|
||||||
|
(set-source-property! obj 'line line)
|
||||||
|
(set-source-property! obj 'column column)
|
||||||
|
obj))))
|
||||||
|
; Read afterwards so the source-properties are correct above
|
||||||
|
; and actually point to the very character to be read.
|
||||||
|
(c (read-char port)))
|
||||||
|
(cond
|
||||||
|
|
||||||
|
; End of input must be specially marked to the parser.
|
||||||
|
((eof-object? c) '*eoi*)
|
||||||
|
|
||||||
|
; Whitespace, just skip it.
|
||||||
|
((char-whitespace? c) (lex port))
|
||||||
|
|
||||||
|
; The dot is only the one for dotted lists if followed by
|
||||||
|
; whitespace. Otherwise it is considered part of a number of symbol.
|
||||||
|
((and (char=? c #\.)
|
||||||
|
(char-whitespace? (peek-char port)))
|
||||||
|
(return 'dot #f))
|
||||||
|
|
||||||
|
|
||||||
|
; Continue checking for literal character values.
|
||||||
|
(else
|
||||||
|
(case c
|
||||||
|
|
||||||
|
; A line comment, skip until end-of-line is found.
|
||||||
|
((#\;)
|
||||||
|
(let iterate ()
|
||||||
|
(let ((cur (read-char port)))
|
||||||
|
(if (or (eof-object? cur) (char=? cur #\newline))
|
||||||
|
(lex port)
|
||||||
|
(iterate)))))
|
||||||
|
|
||||||
|
; Parentheses and other special-meaning single characters.
|
||||||
|
((#\() (return 'paren-open #f))
|
||||||
|
((#\)) (return 'paren-close #f))
|
||||||
|
((#\[) (return 'square-open #f))
|
||||||
|
((#\]) (return 'square-close #f))
|
||||||
|
((#\') (return 'quote #f))
|
||||||
|
((#\`) (return 'backquote #f))
|
||||||
|
((#\,) (return 'unquote #f))
|
||||||
|
|
||||||
|
; Remaining are numbers and symbols. Process input until next
|
||||||
|
; whitespace is found, and see if it looks like a number
|
||||||
|
; (float/integer) or symbol and return accordingly.
|
||||||
|
(else
|
||||||
|
(unread-char c port)
|
||||||
|
(call-with-values
|
||||||
|
(lambda ()
|
||||||
|
(get-symbol-or-number port))
|
||||||
|
(lambda (type str)
|
||||||
|
(case type
|
||||||
|
((symbol) (return 'symbol (string->symbol str)))
|
||||||
|
((integer)
|
||||||
|
; In elisp, something like "1." is an integer, while
|
||||||
|
; string->number returns an inexact real. Thus we
|
||||||
|
; need a conversion here, but it should always result in
|
||||||
|
; an integer!
|
||||||
|
(return 'integer
|
||||||
|
(let ((num (inexact->exact (string->number str))))
|
||||||
|
(if (not (integer? num))
|
||||||
|
(error "Expected integer" str num))
|
||||||
|
num)))
|
||||||
|
((float)
|
||||||
|
(return 'float (let ((num (string->number str)))
|
||||||
|
(if (exact? num)
|
||||||
|
(error "Expected inexact float" str num))
|
||||||
|
num)))
|
||||||
|
(else (error "Wrong number/symbol type" type)))))))))))
|
||||||
|
|
||||||
|
|
||||||
|
; Build a lexer thunk for a port. This is the exported routine which can be
|
||||||
|
; used to create a lexer for the parser to use.
|
||||||
|
|
||||||
|
(define (get-lexer port)
|
||||||
|
(lambda ()
|
||||||
|
(lex port)))
|
|
@ -33,6 +33,7 @@ SCM_TESTS = tests/alist.test \
|
||||||
tests/continuations.test \
|
tests/continuations.test \
|
||||||
tests/elisp.test \
|
tests/elisp.test \
|
||||||
tests/elisp-compiler.text \
|
tests/elisp-compiler.text \
|
||||||
|
tests/elisp-reader.text \
|
||||||
tests/environments.test \
|
tests/environments.test \
|
||||||
tests/eval.test \
|
tests/eval.test \
|
||||||
tests/exceptions.test \
|
tests/exceptions.test \
|
||||||
|
|
87
test-suite/tests/elisp-reader.test
Normal file
87
test-suite/tests/elisp-reader.test
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
;;;; elisp-reader.test --- Test the reader used by the Elisp compiler.
|
||||||
|
;;;;
|
||||||
|
;;;; Copyright (C) 2009 Free Software Foundation, Inc.
|
||||||
|
;;;; Daniel Kraft
|
||||||
|
;;;;
|
||||||
|
;;;; This library is free software; you can redistribute it and/or
|
||||||
|
;;;; modify it under the terms of the GNU Lesser General Public
|
||||||
|
;;;; License as published by the Free Software Foundation; either
|
||||||
|
;;;; version 3 of the License, or (at your option) any later version.
|
||||||
|
;;;;
|
||||||
|
;;;; This library is distributed in the hope that it will be useful,
|
||||||
|
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;;;; Lesser General Public License for more details.
|
||||||
|
;;;;
|
||||||
|
;;;; You should have received a copy of the GNU Lesser General Public
|
||||||
|
;;;; License along with this library; if not, write to the Free Software
|
||||||
|
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
|
||||||
|
(define-module (test-elisp-reader)
|
||||||
|
:use-module (test-suite lib)
|
||||||
|
:use-module (language elisp lexer))
|
||||||
|
|
||||||
|
|
||||||
|
; ==============================================================================
|
||||||
|
; Test the lexer.
|
||||||
|
|
||||||
|
; This is of course somewhat redundant with the full parser checks, but probably
|
||||||
|
; can't hurt and is useful in developing the lexer itself.
|
||||||
|
|
||||||
|
(define (get-string-lexer str)
|
||||||
|
(call-with-input-string str get-lexer))
|
||||||
|
|
||||||
|
(define (lex-string str)
|
||||||
|
(let ((lexer (get-string-lexer str)))
|
||||||
|
(let iterate ((result '()))
|
||||||
|
(let ((token (lexer)))
|
||||||
|
(if (eq? token '*eoi*)
|
||||||
|
(reverse result)
|
||||||
|
(iterate (cons token result)))))))
|
||||||
|
|
||||||
|
(with-test-prefix "Lexer"
|
||||||
|
|
||||||
|
(let ((lexer (get-string-lexer "")))
|
||||||
|
(pass-if "end-of-input"
|
||||||
|
(and (eq? (lexer) '*eoi*)
|
||||||
|
(eq? (lexer) '*eoi*)
|
||||||
|
(eq? (lexer) '*eoi*))))
|
||||||
|
|
||||||
|
(pass-if "single character tokens"
|
||||||
|
(equal? (lex-string "()[]'`, . ")
|
||||||
|
'((paren-open . #f) (paren-close . #f)
|
||||||
|
(square-open . #f) (square-close . #f)
|
||||||
|
(quote . #f) (backquote . #f) (unquote . #f) (dot . #f))))
|
||||||
|
|
||||||
|
(pass-if "whitespace and comments"
|
||||||
|
(equal? (lex-string " (\n\t) ; this is a comment\n. ; until eof")
|
||||||
|
'((paren-open . #f) (paren-close . #f) (dot . #f))))
|
||||||
|
|
||||||
|
(pass-if "source properties"
|
||||||
|
(let ((x (car (lex-string "\n\n \n . \n"))))
|
||||||
|
(and (= (source-property x 'line) 4)
|
||||||
|
(= (source-property x 'column) 3))))
|
||||||
|
|
||||||
|
(pass-if "symbols"
|
||||||
|
(equal? (lex-string "foo FOO char-to-string 1+ \\+1
|
||||||
|
\\(*\\ 1\\ 2\\)
|
||||||
|
+-*/_~!@$%^&=:<>{}
|
||||||
|
abc(def)ghi .e5")
|
||||||
|
`((symbol . foo) (symbol . FOO) (symbol . char-to-string)
|
||||||
|
(symbol . 1+) (symbol . ,(string->symbol "+1"))
|
||||||
|
(symbol . ,(string->symbol "(* 1 2)"))
|
||||||
|
(symbol . +-*/_~!@$%^&=:<>{})
|
||||||
|
(symbol . abc) (paren-open . #f) (symbol . def)
|
||||||
|
(paren-close . #f) (symbol . ghi) (symbol . .e5))))
|
||||||
|
|
||||||
|
; Here we make use of the property that exact/inexact numbers are not equal?
|
||||||
|
; even when they have the same numeric value!
|
||||||
|
(pass-if "integers"
|
||||||
|
(equal? (lex-string "-1 1 1. +1 01234")
|
||||||
|
'((integer . -1) (integer . 1) (integer . 1) (integer . 1)
|
||||||
|
(integer . 1234))))
|
||||||
|
(pass-if "floats"
|
||||||
|
(equal? (lex-string "1500.0 15e2 15.e2 1.5e3 .15e4 -.345e-2")
|
||||||
|
'((float . 1500.0) (float . 1500.0) (float . 1500.0)
|
||||||
|
(float . 1500.0) (float . 1500.0)
|
||||||
|
(float . -0.00345)))))
|
Loading…
Add table
Add a link
Reference in a new issue