1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-01 12:20:26 +02:00

lua/lexer tweaks

* module/language/lua/lexer.scm: Some tweaks and reindentations. Remove
  the define/init lexer interface; I don't like separating declaration
  and initialization.

* module/language/lua/parser.scm:
* test-suite/tests/lua-lexer.test: Adapt to lexer interface change.
This commit is contained in:
Andy Wingo 2010-12-10 18:27:46 +01:00 committed by Ian Price
parent d87639dfe4
commit 04175c7dda
3 changed files with 72 additions and 81 deletions

View file

@ -24,21 +24,16 @@
;; characters like #\+ when they're only a character long. Identifiers
;; are returned as symbols
(define-module (language lua lexer)
#:use-module (srfi srfi-8)
#:use-module (srfi srfi-14)
#:use-module (srfi srfi-39)
#:use-module (language lua common)
#:export (make-lexer)
#:export-syntax (define-lua-lexer initialize-lua-lexer!))
#:export (make-lexer))
(define stdout (current-output-port))
(define (source-info port)
`((backtrace . #f)
(filename . ,(port-filename port))
`((filename . ,(port-filename port))
(line . ,(port-line port))
(column . ,(port-column port))))
@ -47,18 +42,22 @@
;; Lua only accepts ASCII characters as of 5.2, so we define our own
;; charsets here
(define (char-predicate string)
(define char-set (string->char-set string))
(lambda (c)
(and (not (eof-object? c)) (char-set-contains? char-set c))))
(let ((char-set (string->char-set string)))
(lambda (c)
(and (not (eof-object? c)) (char-set-contains? char-set c)))))
(define is-digit? (char-predicate "0123456789"))
(define is-name-first? (char-predicate "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"))
(define (is-name? c) (or (is-name-first? c) (is-digit? c)))
(define (is-newline? c) (and (char? c) (or (char=? c #\newline) (char=? c #\cr))))
(define *reserved-words*
'(return function end if then elseif else true false nil or and
do while repeat until local for break in not))
(define (possible-keyword token)
"Convert a symbol to a keyword if it is a reserved word in Lua"
(if (memq token '(return function end if then elseif else true false nil or and do while repeat until local for break in not))
(if (memq token *reserved-words*)
(symbol->keyword token)
token))
@ -72,9 +71,9 @@
(define (clear-buffer)
"Clear the buffer and return a string of the contents"
(define string (get-output-string buffer))
(drop-buffer)
string)
(let ((string (get-output-string buffer)))
(drop-buffer)
string))
;; Source code information
(define saved-source-info #f)
@ -86,8 +85,7 @@ of an identifier"
(define (get-source-info)
"Get source code information"
(if saved-source-info
saved-source-info
(or saved-source-info
(source-info port)))
(define (save-and-next!)
@ -98,7 +96,7 @@ of an identifier"
"Consume a comment"
(let consume ((c (read-char)))
(cond ((eof-object? c) #f)
((eq? c #\newline) #f)
((eqv? c #\newline) #f)
(else (consume (read-char))))))
(define (get-long-string-nesting-level)
@ -106,12 +104,12 @@ of an identifier"
(define delimiter (read-char))
(let* ((count
(let loop ((count 0))
(if (eq? (peek-char) #\=)
(if (eqv? (peek-char) #\=)
(begin
(read-char)
(loop (+ count 1)))
count))))
(if (eq? (peek-char) delimiter) count -1)))
(if (eqv? (peek-char) delimiter) count -1)))
(define (read-long-string string? nest)
"Read a long string or comment"
@ -125,7 +123,9 @@ of an identifier"
(cond
;; Error out if end-of-file is encountered
((eof-object? c)
(syntax-error (get-source-info) (string-append "unfinished long " (if string? "string" "comment"))))
(syntax-error (get-source-info)
(string-append "unfinished long "
(if string? "string" "comment"))))
;; Check to see if we've reached the end
((char=? c #\])
(let* ((nest2 (get-long-string-nesting-level)))
@ -156,7 +156,7 @@ of an identifier"
(let loop ((c (peek-char)))
(cond
;; string ends early
((or (eof-object? c) (eq? c #\cr) (eq? c #\newline))
((or (eof-object? c) (eqv? c #\cr) (eqv? c #\newline))
(syntax-error (get-source-info) "unfinished string ~S" c))
;; string escape
((char=? c #\\)
@ -171,12 +171,14 @@ of an identifier"
((#\r) #\return)
((#\t) #\tab)
((#\v) #\vtab)
((#\x) (syntax-error (get-source-info) "hex escapes unsupported"))
((#\d) (syntax-error (get-source-info) "decimal escapes unsupported"))
((#\x) (syntax-error (get-source-info)
"hex escapes unsupported"))
((#\d) (syntax-error (get-source-info)
"decimal escapes unsupported"))
(else escape)))
(loop (peek-char))))
(else
(if (eq? c delimiter)
(if (eqv? c delimiter)
(read-char) ;; terminate loop and discard delimiter
(begin
(save-and-next!)
@ -185,28 +187,31 @@ of an identifier"
(define (read-number string)
(save-source-info)
(let* ((main (string-append (or string "") (begin
(while (or (is-digit? (peek-char)) (eq? (peek-char) #\.))
(save-and-next!))
(clear-buffer))))
(let* ((main (string-append
(or string "")
(begin
(while (or (is-digit? (peek-char)) (eqv? (peek-char) #\.))
(save-and-next!))
(clear-buffer))))
(exponent
(if (or (eq? (peek-char) #\e) (eq? (peek-char) #\E))
(if (or (eqv? (peek-char) #\e) (eqv? (peek-char) #\E))
(begin
(read-char)
(if (eq? (peek-char) #\+)
(if (eqv? (peek-char) #\+)
(read-char)
(if (eq? (peek-char) #\-)
(if (eqv? (peek-char) #\-)
(save-and-next!)))
(if (not (is-digit? (peek-char)))
(syntax-error (get-source-info) "expecting number after exponent sign"))
(syntax-error (get-source-info)
"expecting number after exponent sign"))
(while (is-digit? (peek-char))
(save-and-next!))
(clear-buffer))
#f))
(final (string->number main)))
(if exponent
(* final (expt 10 (string->number exponent)))
final)))
(if exponent
(* final (expt 10 (string->number exponent)))
final)))
(define (lex)
(parameterize ((current-input-port port)
@ -218,15 +223,16 @@ of an identifier"
;; Skip spaces
((#\newline #\return #\space #\page #\tab #\vtab) (read-char) (loop))
;; Either a minus (-), or a long comment, which is a - followed by a bracketed string
;; Either a minus (-), or a long comment, which is a -
;; followed by a bracketed string
((#\-)
(read-char)
(if (eq? (peek-char) #\-)
(if (eqv? (peek-char) #\-)
;; It's a comment
(begin
(read-char)
;; Long comment
(if (eq? (peek-char) #\[)
(if (eqv? (peek-char) #\[)
(let* ((nest (get-long-string-nesting-level)))
(drop-buffer)
(if (not (negative? nest))
@ -243,37 +249,39 @@ of an identifier"
;; ~=
((#\~)
(read-char)
(if (eq? (peek-char) #\=)
(if (eqv? (peek-char) #\=)
(begin (read-char) #:~=)
(syntax-error (get-source-info) "expected = after ~ but got ~c" c)))
(syntax-error (get-source-info)
"expected = after ~ but got ~c" c)))
;; < or <=
((#\<)
(read-char)
(if (eq? (peek-char) #\=) (begin (read-char) #:<=) #\<))
(if (eqv? (peek-char) #\=) (begin (read-char) #:<=) #\<))
;; > or >=
((#\>)
(read-char)
(if (eq? (peek-char) #\=) (begin (read-char) #:>=) #\>))
(if (eqv? (peek-char) #\=) (begin (read-char) #:>=) #\>))
;; = or ==
((#\=)
(read-char)
(if (eq? (peek-char) #\=)
(if (eqv? (peek-char) #\=)
(begin (read-char) #:==)
#:=))
;; . can mean one of: floating point number (.12345), table field access (plain .),
;; concatenation operator (..) or the variable argument indicator (...)
;; . can mean one of: floating point number (.12345), table
;; field access (plain .), concatenation operator (..) or the
;; variable argument indicator (...)
((#\.)
(read-char)
(if (is-digit? (peek-char))
(read-number ".")
(if (eq? (peek-char) #\.)
(if (eqv? (peek-char) #\.)
(begin
(read-char)
(if (eq? (peek-char) #\.)
(if (eqv? (peek-char) #\.)
(begin (read-char) #:dots)
#:concat))
#\.)))
@ -288,11 +296,12 @@ of an identifier"
((#\[)
(save-source-info)
(let* ((nest (get-long-string-nesting-level)))
(if (eq? nest -1)
(if (eqv? nest -1)
#\[
(read-long-string #t nest))))
;; Characters that are allowed to fall through directly to the parser
;; Characters that are allowed to fall through directly to the
;; parser
((#\; #\( #\) #\,
#\+ #\/ #\*
#\^ #\{ #\} #\] #\: #\#) (read-char))
@ -312,24 +321,8 @@ of an identifier"
(while (is-name? (peek-char))
(save-and-next!))
(possible-keyword (string->symbol (clear-buffer))))
(else (syntax-error (get-source-info) "disallowed character ~c" c))))
) ; case
) ; loop
) ; parameterize
) ; lex
(values get-source-info lex)) ; make-lexer
(else
(syntax-error (get-source-info)
"disallowed character ~c" c))))))))
(define-syntax define-lua-lexer
(syntax-rules ()
((_ a b)
(begin
(define a #f)
(define b #f)))))
(define-syntax initialize-lua-lexer!
(syntax-rules ()
((_ port a b)
(receive (get-source-info lex)
(make-lexer port)
(set! a get-source-info)
(set! b lex)))))
(values get-source-info lex))

View file

@ -197,8 +197,11 @@
;;;;; PARSER
(define (make-parser port)
;; Variables that will be set to the results of MAKE-LEXER.
(define-lua-lexer get-source-info lexer)
(define lexer-pair
(call-with-values (lambda () (make-lexer port)) cons))
(define get-source-info (car lexer-pair))
(define lexer (cdr lexer-pair))
;; We need two tokens of lookahead
(define token2 #f)
@ -843,8 +846,6 @@
(receive (is-last node) (statement)
(loop (or (end-of-chunk? token) is-last) (cons node tree)))))))
(initialize-lua-lexer! port get-source-info lexer)
;; toplevel local environment
(enter-environment!)
;; read first token

View file

@ -25,13 +25,10 @@
(with-test-prefix "lua-lexer"
(define (from-string string)
(define-lua-lexer get-source-info lex)
(call-with-input-string
string
(lambda (port)
(initialize-lua-lexer! port get-source-info lex)
(lex))))
(call-with-values (lambda ()
(call-with-input-string string make-lexer))
(lambda (get-source-info lex)
(lex))))
(let-syntax
((test