mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-01 20:30:28 +02:00
lua/lexer tweaks
* module/language/lua/lexer.scm: Some tweaks and reindentations. Remove the define/init lexer interface; I don't like separating declaration and initialization. * module/language/lua/parser.scm: * test-suite/tests/lua-lexer.test: Adapt to lexer interface change.
This commit is contained in:
parent
d87639dfe4
commit
04175c7dda
3 changed files with 72 additions and 81 deletions
|
@ -24,21 +24,16 @@
|
||||||
;; characters like #\+ when they're only a character long. Identifiers
|
;; characters like #\+ when they're only a character long. Identifiers
|
||||||
;; are returned as symbols
|
;; are returned as symbols
|
||||||
(define-module (language lua lexer)
|
(define-module (language lua lexer)
|
||||||
|
|
||||||
#:use-module (srfi srfi-8)
|
#:use-module (srfi srfi-8)
|
||||||
#:use-module (srfi srfi-14)
|
#:use-module (srfi srfi-14)
|
||||||
#:use-module (srfi srfi-39)
|
#:use-module (srfi srfi-39)
|
||||||
|
|
||||||
#:use-module (language lua common)
|
#:use-module (language lua common)
|
||||||
|
#:export (make-lexer))
|
||||||
#:export (make-lexer)
|
|
||||||
#:export-syntax (define-lua-lexer initialize-lua-lexer!))
|
|
||||||
|
|
||||||
(define stdout (current-output-port))
|
(define stdout (current-output-port))
|
||||||
|
|
||||||
(define (source-info port)
|
(define (source-info port)
|
||||||
`((backtrace . #f)
|
`((filename . ,(port-filename port))
|
||||||
(filename . ,(port-filename port))
|
|
||||||
(line . ,(port-line port))
|
(line . ,(port-line port))
|
||||||
(column . ,(port-column port))))
|
(column . ,(port-column port))))
|
||||||
|
|
||||||
|
@ -47,18 +42,22 @@
|
||||||
;; Lua only accepts ASCII characters as of 5.2, so we define our own
|
;; Lua only accepts ASCII characters as of 5.2, so we define our own
|
||||||
;; charsets here
|
;; charsets here
|
||||||
(define (char-predicate string)
|
(define (char-predicate string)
|
||||||
(define char-set (string->char-set string))
|
(let ((char-set (string->char-set string)))
|
||||||
(lambda (c)
|
(lambda (c)
|
||||||
(and (not (eof-object? c)) (char-set-contains? char-set c))))
|
(and (not (eof-object? c)) (char-set-contains? char-set c)))))
|
||||||
|
|
||||||
(define is-digit? (char-predicate "0123456789"))
|
(define is-digit? (char-predicate "0123456789"))
|
||||||
(define is-name-first? (char-predicate "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"))
|
(define is-name-first? (char-predicate "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"))
|
||||||
(define (is-name? c) (or (is-name-first? c) (is-digit? c)))
|
(define (is-name? c) (or (is-name-first? c) (is-digit? c)))
|
||||||
(define (is-newline? c) (and (char? c) (or (char=? c #\newline) (char=? c #\cr))))
|
(define (is-newline? c) (and (char? c) (or (char=? c #\newline) (char=? c #\cr))))
|
||||||
|
|
||||||
|
(define *reserved-words*
|
||||||
|
'(return function end if then elseif else true false nil or and
|
||||||
|
do while repeat until local for break in not))
|
||||||
|
|
||||||
(define (possible-keyword token)
|
(define (possible-keyword token)
|
||||||
"Convert a symbol to a keyword if it is a reserved word in Lua"
|
"Convert a symbol to a keyword if it is a reserved word in Lua"
|
||||||
(if (memq token '(return function end if then elseif else true false nil or and do while repeat until local for break in not))
|
(if (memq token *reserved-words*)
|
||||||
(symbol->keyword token)
|
(symbol->keyword token)
|
||||||
token))
|
token))
|
||||||
|
|
||||||
|
@ -72,9 +71,9 @@
|
||||||
|
|
||||||
(define (clear-buffer)
|
(define (clear-buffer)
|
||||||
"Clear the buffer and return a string of the contents"
|
"Clear the buffer and return a string of the contents"
|
||||||
(define string (get-output-string buffer))
|
(let ((string (get-output-string buffer)))
|
||||||
(drop-buffer)
|
(drop-buffer)
|
||||||
string)
|
string))
|
||||||
|
|
||||||
;; Source code information
|
;; Source code information
|
||||||
(define saved-source-info #f)
|
(define saved-source-info #f)
|
||||||
|
@ -86,8 +85,7 @@ of an identifier"
|
||||||
|
|
||||||
(define (get-source-info)
|
(define (get-source-info)
|
||||||
"Get source code information"
|
"Get source code information"
|
||||||
(if saved-source-info
|
(or saved-source-info
|
||||||
saved-source-info
|
|
||||||
(source-info port)))
|
(source-info port)))
|
||||||
|
|
||||||
(define (save-and-next!)
|
(define (save-and-next!)
|
||||||
|
@ -98,7 +96,7 @@ of an identifier"
|
||||||
"Consume a comment"
|
"Consume a comment"
|
||||||
(let consume ((c (read-char)))
|
(let consume ((c (read-char)))
|
||||||
(cond ((eof-object? c) #f)
|
(cond ((eof-object? c) #f)
|
||||||
((eq? c #\newline) #f)
|
((eqv? c #\newline) #f)
|
||||||
(else (consume (read-char))))))
|
(else (consume (read-char))))))
|
||||||
|
|
||||||
(define (get-long-string-nesting-level)
|
(define (get-long-string-nesting-level)
|
||||||
|
@ -106,12 +104,12 @@ of an identifier"
|
||||||
(define delimiter (read-char))
|
(define delimiter (read-char))
|
||||||
(let* ((count
|
(let* ((count
|
||||||
(let loop ((count 0))
|
(let loop ((count 0))
|
||||||
(if (eq? (peek-char) #\=)
|
(if (eqv? (peek-char) #\=)
|
||||||
(begin
|
(begin
|
||||||
(read-char)
|
(read-char)
|
||||||
(loop (+ count 1)))
|
(loop (+ count 1)))
|
||||||
count))))
|
count))))
|
||||||
(if (eq? (peek-char) delimiter) count -1)))
|
(if (eqv? (peek-char) delimiter) count -1)))
|
||||||
|
|
||||||
(define (read-long-string string? nest)
|
(define (read-long-string string? nest)
|
||||||
"Read a long string or comment"
|
"Read a long string or comment"
|
||||||
|
@ -125,7 +123,9 @@ of an identifier"
|
||||||
(cond
|
(cond
|
||||||
;; Error out if end-of-file is encountered
|
;; Error out if end-of-file is encountered
|
||||||
((eof-object? c)
|
((eof-object? c)
|
||||||
(syntax-error (get-source-info) (string-append "unfinished long " (if string? "string" "comment"))))
|
(syntax-error (get-source-info)
|
||||||
|
(string-append "unfinished long "
|
||||||
|
(if string? "string" "comment"))))
|
||||||
;; Check to see if we've reached the end
|
;; Check to see if we've reached the end
|
||||||
((char=? c #\])
|
((char=? c #\])
|
||||||
(let* ((nest2 (get-long-string-nesting-level)))
|
(let* ((nest2 (get-long-string-nesting-level)))
|
||||||
|
@ -156,7 +156,7 @@ of an identifier"
|
||||||
(let loop ((c (peek-char)))
|
(let loop ((c (peek-char)))
|
||||||
(cond
|
(cond
|
||||||
;; string ends early
|
;; string ends early
|
||||||
((or (eof-object? c) (eq? c #\cr) (eq? c #\newline))
|
((or (eof-object? c) (eqv? c #\cr) (eqv? c #\newline))
|
||||||
(syntax-error (get-source-info) "unfinished string ~S" c))
|
(syntax-error (get-source-info) "unfinished string ~S" c))
|
||||||
;; string escape
|
;; string escape
|
||||||
((char=? c #\\)
|
((char=? c #\\)
|
||||||
|
@ -171,12 +171,14 @@ of an identifier"
|
||||||
((#\r) #\return)
|
((#\r) #\return)
|
||||||
((#\t) #\tab)
|
((#\t) #\tab)
|
||||||
((#\v) #\vtab)
|
((#\v) #\vtab)
|
||||||
((#\x) (syntax-error (get-source-info) "hex escapes unsupported"))
|
((#\x) (syntax-error (get-source-info)
|
||||||
((#\d) (syntax-error (get-source-info) "decimal escapes unsupported"))
|
"hex escapes unsupported"))
|
||||||
|
((#\d) (syntax-error (get-source-info)
|
||||||
|
"decimal escapes unsupported"))
|
||||||
(else escape)))
|
(else escape)))
|
||||||
(loop (peek-char))))
|
(loop (peek-char))))
|
||||||
(else
|
(else
|
||||||
(if (eq? c delimiter)
|
(if (eqv? c delimiter)
|
||||||
(read-char) ;; terminate loop and discard delimiter
|
(read-char) ;; terminate loop and discard delimiter
|
||||||
(begin
|
(begin
|
||||||
(save-and-next!)
|
(save-and-next!)
|
||||||
|
@ -185,28 +187,31 @@ of an identifier"
|
||||||
|
|
||||||
(define (read-number string)
|
(define (read-number string)
|
||||||
(save-source-info)
|
(save-source-info)
|
||||||
(let* ((main (string-append (or string "") (begin
|
(let* ((main (string-append
|
||||||
(while (or (is-digit? (peek-char)) (eq? (peek-char) #\.))
|
(or string "")
|
||||||
(save-and-next!))
|
(begin
|
||||||
(clear-buffer))))
|
(while (or (is-digit? (peek-char)) (eqv? (peek-char) #\.))
|
||||||
|
(save-and-next!))
|
||||||
|
(clear-buffer))))
|
||||||
(exponent
|
(exponent
|
||||||
(if (or (eq? (peek-char) #\e) (eq? (peek-char) #\E))
|
(if (or (eqv? (peek-char) #\e) (eqv? (peek-char) #\E))
|
||||||
(begin
|
(begin
|
||||||
(read-char)
|
(read-char)
|
||||||
(if (eq? (peek-char) #\+)
|
(if (eqv? (peek-char) #\+)
|
||||||
(read-char)
|
(read-char)
|
||||||
(if (eq? (peek-char) #\-)
|
(if (eqv? (peek-char) #\-)
|
||||||
(save-and-next!)))
|
(save-and-next!)))
|
||||||
(if (not (is-digit? (peek-char)))
|
(if (not (is-digit? (peek-char)))
|
||||||
(syntax-error (get-source-info) "expecting number after exponent sign"))
|
(syntax-error (get-source-info)
|
||||||
|
"expecting number after exponent sign"))
|
||||||
(while (is-digit? (peek-char))
|
(while (is-digit? (peek-char))
|
||||||
(save-and-next!))
|
(save-and-next!))
|
||||||
(clear-buffer))
|
(clear-buffer))
|
||||||
#f))
|
#f))
|
||||||
(final (string->number main)))
|
(final (string->number main)))
|
||||||
(if exponent
|
(if exponent
|
||||||
(* final (expt 10 (string->number exponent)))
|
(* final (expt 10 (string->number exponent)))
|
||||||
final)))
|
final)))
|
||||||
|
|
||||||
(define (lex)
|
(define (lex)
|
||||||
(parameterize ((current-input-port port)
|
(parameterize ((current-input-port port)
|
||||||
|
@ -218,15 +223,16 @@ of an identifier"
|
||||||
;; Skip spaces
|
;; Skip spaces
|
||||||
((#\newline #\return #\space #\page #\tab #\vtab) (read-char) (loop))
|
((#\newline #\return #\space #\page #\tab #\vtab) (read-char) (loop))
|
||||||
|
|
||||||
;; Either a minus (-), or a long comment, which is a - followed by a bracketed string
|
;; Either a minus (-), or a long comment, which is a -
|
||||||
|
;; followed by a bracketed string
|
||||||
((#\-)
|
((#\-)
|
||||||
(read-char)
|
(read-char)
|
||||||
(if (eq? (peek-char) #\-)
|
(if (eqv? (peek-char) #\-)
|
||||||
;; It's a comment
|
;; It's a comment
|
||||||
(begin
|
(begin
|
||||||
(read-char)
|
(read-char)
|
||||||
;; Long comment
|
;; Long comment
|
||||||
(if (eq? (peek-char) #\[)
|
(if (eqv? (peek-char) #\[)
|
||||||
(let* ((nest (get-long-string-nesting-level)))
|
(let* ((nest (get-long-string-nesting-level)))
|
||||||
(drop-buffer)
|
(drop-buffer)
|
||||||
(if (not (negative? nest))
|
(if (not (negative? nest))
|
||||||
|
@ -243,37 +249,39 @@ of an identifier"
|
||||||
;; ~=
|
;; ~=
|
||||||
((#\~)
|
((#\~)
|
||||||
(read-char)
|
(read-char)
|
||||||
(if (eq? (peek-char) #\=)
|
(if (eqv? (peek-char) #\=)
|
||||||
(begin (read-char) #:~=)
|
(begin (read-char) #:~=)
|
||||||
(syntax-error (get-source-info) "expected = after ~ but got ~c" c)))
|
(syntax-error (get-source-info)
|
||||||
|
"expected = after ~ but got ~c" c)))
|
||||||
|
|
||||||
;; < or <=
|
;; < or <=
|
||||||
((#\<)
|
((#\<)
|
||||||
(read-char)
|
(read-char)
|
||||||
(if (eq? (peek-char) #\=) (begin (read-char) #:<=) #\<))
|
(if (eqv? (peek-char) #\=) (begin (read-char) #:<=) #\<))
|
||||||
|
|
||||||
;; > or >=
|
;; > or >=
|
||||||
((#\>)
|
((#\>)
|
||||||
(read-char)
|
(read-char)
|
||||||
(if (eq? (peek-char) #\=) (begin (read-char) #:>=) #\>))
|
(if (eqv? (peek-char) #\=) (begin (read-char) #:>=) #\>))
|
||||||
|
|
||||||
;; = or ==
|
;; = or ==
|
||||||
((#\=)
|
((#\=)
|
||||||
(read-char)
|
(read-char)
|
||||||
(if (eq? (peek-char) #\=)
|
(if (eqv? (peek-char) #\=)
|
||||||
(begin (read-char) #:==)
|
(begin (read-char) #:==)
|
||||||
#:=))
|
#:=))
|
||||||
|
|
||||||
;; . can mean one of: floating point number (.12345), table field access (plain .),
|
;; . can mean one of: floating point number (.12345), table
|
||||||
;; concatenation operator (..) or the variable argument indicator (...)
|
;; field access (plain .), concatenation operator (..) or the
|
||||||
|
;; variable argument indicator (...)
|
||||||
((#\.)
|
((#\.)
|
||||||
(read-char)
|
(read-char)
|
||||||
(if (is-digit? (peek-char))
|
(if (is-digit? (peek-char))
|
||||||
(read-number ".")
|
(read-number ".")
|
||||||
(if (eq? (peek-char) #\.)
|
(if (eqv? (peek-char) #\.)
|
||||||
(begin
|
(begin
|
||||||
(read-char)
|
(read-char)
|
||||||
(if (eq? (peek-char) #\.)
|
(if (eqv? (peek-char) #\.)
|
||||||
(begin (read-char) #:dots)
|
(begin (read-char) #:dots)
|
||||||
#:concat))
|
#:concat))
|
||||||
#\.)))
|
#\.)))
|
||||||
|
@ -288,11 +296,12 @@ of an identifier"
|
||||||
((#\[)
|
((#\[)
|
||||||
(save-source-info)
|
(save-source-info)
|
||||||
(let* ((nest (get-long-string-nesting-level)))
|
(let* ((nest (get-long-string-nesting-level)))
|
||||||
(if (eq? nest -1)
|
(if (eqv? nest -1)
|
||||||
#\[
|
#\[
|
||||||
(read-long-string #t nest))))
|
(read-long-string #t nest))))
|
||||||
|
|
||||||
;; Characters that are allowed to fall through directly to the parser
|
;; Characters that are allowed to fall through directly to the
|
||||||
|
;; parser
|
||||||
((#\; #\( #\) #\,
|
((#\; #\( #\) #\,
|
||||||
#\+ #\/ #\*
|
#\+ #\/ #\*
|
||||||
#\^ #\{ #\} #\] #\: #\#) (read-char))
|
#\^ #\{ #\} #\] #\: #\#) (read-char))
|
||||||
|
@ -312,24 +321,8 @@ of an identifier"
|
||||||
(while (is-name? (peek-char))
|
(while (is-name? (peek-char))
|
||||||
(save-and-next!))
|
(save-and-next!))
|
||||||
(possible-keyword (string->symbol (clear-buffer))))
|
(possible-keyword (string->symbol (clear-buffer))))
|
||||||
(else (syntax-error (get-source-info) "disallowed character ~c" c))))
|
(else
|
||||||
) ; case
|
(syntax-error (get-source-info)
|
||||||
) ; loop
|
"disallowed character ~c" c))))))))
|
||||||
) ; parameterize
|
|
||||||
) ; lex
|
|
||||||
(values get-source-info lex)) ; make-lexer
|
|
||||||
|
|
||||||
(define-syntax define-lua-lexer
|
(values get-source-info lex))
|
||||||
(syntax-rules ()
|
|
||||||
((_ a b)
|
|
||||||
(begin
|
|
||||||
(define a #f)
|
|
||||||
(define b #f)))))
|
|
||||||
|
|
||||||
(define-syntax initialize-lua-lexer!
|
|
||||||
(syntax-rules ()
|
|
||||||
((_ port a b)
|
|
||||||
(receive (get-source-info lex)
|
|
||||||
(make-lexer port)
|
|
||||||
(set! a get-source-info)
|
|
||||||
(set! b lex)))))
|
|
||||||
|
|
|
@ -197,8 +197,11 @@
|
||||||
;;;;; PARSER
|
;;;;; PARSER
|
||||||
|
|
||||||
(define (make-parser port)
|
(define (make-parser port)
|
||||||
;; Variables that will be set to the results of MAKE-LEXER.
|
(define lexer-pair
|
||||||
(define-lua-lexer get-source-info lexer)
|
(call-with-values (lambda () (make-lexer port)) cons))
|
||||||
|
|
||||||
|
(define get-source-info (car lexer-pair))
|
||||||
|
(define lexer (cdr lexer-pair))
|
||||||
|
|
||||||
;; We need two tokens of lookahead
|
;; We need two tokens of lookahead
|
||||||
(define token2 #f)
|
(define token2 #f)
|
||||||
|
@ -843,8 +846,6 @@
|
||||||
(receive (is-last node) (statement)
|
(receive (is-last node) (statement)
|
||||||
(loop (or (end-of-chunk? token) is-last) (cons node tree)))))))
|
(loop (or (end-of-chunk? token) is-last) (cons node tree)))))))
|
||||||
|
|
||||||
(initialize-lua-lexer! port get-source-info lexer)
|
|
||||||
|
|
||||||
;; toplevel local environment
|
;; toplevel local environment
|
||||||
(enter-environment!)
|
(enter-environment!)
|
||||||
;; read first token
|
;; read first token
|
||||||
|
|
|
@ -25,13 +25,10 @@
|
||||||
|
|
||||||
(with-test-prefix "lua-lexer"
|
(with-test-prefix "lua-lexer"
|
||||||
(define (from-string string)
|
(define (from-string string)
|
||||||
(define-lua-lexer get-source-info lex)
|
(call-with-values (lambda ()
|
||||||
(call-with-input-string
|
(call-with-input-string string make-lexer))
|
||||||
string
|
(lambda (get-source-info lex)
|
||||||
(lambda (port)
|
(lex))))
|
||||||
|
|
||||||
(initialize-lua-lexer! port get-source-info lex)
|
|
||||||
(lex))))
|
|
||||||
|
|
||||||
(let-syntax
|
(let-syntax
|
||||||
((test
|
((test
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue