1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-01 20:30:28 +02:00

lua/lexer tweaks

* module/language/lua/lexer.scm: Some tweaks and reindentations. Remove
  the define/init lexer interface; I don't like separating declaration
  and initialization.

* module/language/lua/parser.scm:
* test-suite/tests/lua-lexer.test: Adapt to lexer interface change.
This commit is contained in:
Andy Wingo 2010-12-10 18:27:46 +01:00 committed by Ian Price
parent d87639dfe4
commit 04175c7dda
3 changed files with 72 additions and 81 deletions

View file

@ -24,21 +24,16 @@
;; characters like #\+ when they're only a character long. Identifiers ;; characters like #\+ when they're only a character long. Identifiers
;; are returned as symbols ;; are returned as symbols
(define-module (language lua lexer) (define-module (language lua lexer)
#:use-module (srfi srfi-8) #:use-module (srfi srfi-8)
#:use-module (srfi srfi-14) #:use-module (srfi srfi-14)
#:use-module (srfi srfi-39) #:use-module (srfi srfi-39)
#:use-module (language lua common) #:use-module (language lua common)
#:export (make-lexer))
#:export (make-lexer)
#:export-syntax (define-lua-lexer initialize-lua-lexer!))
(define stdout (current-output-port)) (define stdout (current-output-port))
(define (source-info port) (define (source-info port)
`((backtrace . #f) `((filename . ,(port-filename port))
(filename . ,(port-filename port))
(line . ,(port-line port)) (line . ,(port-line port))
(column . ,(port-column port)))) (column . ,(port-column port))))
@ -47,18 +42,22 @@
;; Lua only accepts ASCII characters as of 5.2, so we define our own ;; Lua only accepts ASCII characters as of 5.2, so we define our own
;; charsets here ;; charsets here
(define (char-predicate string) (define (char-predicate string)
(define char-set (string->char-set string)) (let ((char-set (string->char-set string)))
(lambda (c) (lambda (c)
(and (not (eof-object? c)) (char-set-contains? char-set c)))) (and (not (eof-object? c)) (char-set-contains? char-set c)))))
(define is-digit? (char-predicate "0123456789")) (define is-digit? (char-predicate "0123456789"))
(define is-name-first? (char-predicate "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_")) (define is-name-first? (char-predicate "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"))
(define (is-name? c) (or (is-name-first? c) (is-digit? c))) (define (is-name? c) (or (is-name-first? c) (is-digit? c)))
(define (is-newline? c) (and (char? c) (or (char=? c #\newline) (char=? c #\cr)))) (define (is-newline? c) (and (char? c) (or (char=? c #\newline) (char=? c #\cr))))
(define *reserved-words*
'(return function end if then elseif else true false nil or and
do while repeat until local for break in not))
(define (possible-keyword token) (define (possible-keyword token)
"Convert a symbol to a keyword if it is a reserved word in Lua" "Convert a symbol to a keyword if it is a reserved word in Lua"
(if (memq token '(return function end if then elseif else true false nil or and do while repeat until local for break in not)) (if (memq token *reserved-words*)
(symbol->keyword token) (symbol->keyword token)
token)) token))
@ -72,9 +71,9 @@
(define (clear-buffer) (define (clear-buffer)
"Clear the buffer and return a string of the contents" "Clear the buffer and return a string of the contents"
(define string (get-output-string buffer)) (let ((string (get-output-string buffer)))
(drop-buffer) (drop-buffer)
string) string))
;; Source code information ;; Source code information
(define saved-source-info #f) (define saved-source-info #f)
@ -86,8 +85,7 @@ of an identifier"
(define (get-source-info) (define (get-source-info)
"Get source code information" "Get source code information"
(if saved-source-info (or saved-source-info
saved-source-info
(source-info port))) (source-info port)))
(define (save-and-next!) (define (save-and-next!)
@ -98,7 +96,7 @@ of an identifier"
"Consume a comment" "Consume a comment"
(let consume ((c (read-char))) (let consume ((c (read-char)))
(cond ((eof-object? c) #f) (cond ((eof-object? c) #f)
((eq? c #\newline) #f) ((eqv? c #\newline) #f)
(else (consume (read-char)))))) (else (consume (read-char))))))
(define (get-long-string-nesting-level) (define (get-long-string-nesting-level)
@ -106,12 +104,12 @@ of an identifier"
(define delimiter (read-char)) (define delimiter (read-char))
(let* ((count (let* ((count
(let loop ((count 0)) (let loop ((count 0))
(if (eq? (peek-char) #\=) (if (eqv? (peek-char) #\=)
(begin (begin
(read-char) (read-char)
(loop (+ count 1))) (loop (+ count 1)))
count)))) count))))
(if (eq? (peek-char) delimiter) count -1))) (if (eqv? (peek-char) delimiter) count -1)))
(define (read-long-string string? nest) (define (read-long-string string? nest)
"Read a long string or comment" "Read a long string or comment"
@ -125,7 +123,9 @@ of an identifier"
(cond (cond
;; Error out if end-of-file is encountered ;; Error out if end-of-file is encountered
((eof-object? c) ((eof-object? c)
(syntax-error (get-source-info) (string-append "unfinished long " (if string? "string" "comment")))) (syntax-error (get-source-info)
(string-append "unfinished long "
(if string? "string" "comment"))))
;; Check to see if we've reached the end ;; Check to see if we've reached the end
((char=? c #\]) ((char=? c #\])
(let* ((nest2 (get-long-string-nesting-level))) (let* ((nest2 (get-long-string-nesting-level)))
@ -156,7 +156,7 @@ of an identifier"
(let loop ((c (peek-char))) (let loop ((c (peek-char)))
(cond (cond
;; string ends early ;; string ends early
((or (eof-object? c) (eq? c #\cr) (eq? c #\newline)) ((or (eof-object? c) (eqv? c #\cr) (eqv? c #\newline))
(syntax-error (get-source-info) "unfinished string ~S" c)) (syntax-error (get-source-info) "unfinished string ~S" c))
;; string escape ;; string escape
((char=? c #\\) ((char=? c #\\)
@ -171,12 +171,14 @@ of an identifier"
((#\r) #\return) ((#\r) #\return)
((#\t) #\tab) ((#\t) #\tab)
((#\v) #\vtab) ((#\v) #\vtab)
((#\x) (syntax-error (get-source-info) "hex escapes unsupported")) ((#\x) (syntax-error (get-source-info)
((#\d) (syntax-error (get-source-info) "decimal escapes unsupported")) "hex escapes unsupported"))
((#\d) (syntax-error (get-source-info)
"decimal escapes unsupported"))
(else escape))) (else escape)))
(loop (peek-char)))) (loop (peek-char))))
(else (else
(if (eq? c delimiter) (if (eqv? c delimiter)
(read-char) ;; terminate loop and discard delimiter (read-char) ;; terminate loop and discard delimiter
(begin (begin
(save-and-next!) (save-and-next!)
@ -185,28 +187,31 @@ of an identifier"
(define (read-number string) (define (read-number string)
(save-source-info) (save-source-info)
(let* ((main (string-append (or string "") (begin (let* ((main (string-append
(while (or (is-digit? (peek-char)) (eq? (peek-char) #\.)) (or string "")
(save-and-next!)) (begin
(clear-buffer)))) (while (or (is-digit? (peek-char)) (eqv? (peek-char) #\.))
(save-and-next!))
(clear-buffer))))
(exponent (exponent
(if (or (eq? (peek-char) #\e) (eq? (peek-char) #\E)) (if (or (eqv? (peek-char) #\e) (eqv? (peek-char) #\E))
(begin (begin
(read-char) (read-char)
(if (eq? (peek-char) #\+) (if (eqv? (peek-char) #\+)
(read-char) (read-char)
(if (eq? (peek-char) #\-) (if (eqv? (peek-char) #\-)
(save-and-next!))) (save-and-next!)))
(if (not (is-digit? (peek-char))) (if (not (is-digit? (peek-char)))
(syntax-error (get-source-info) "expecting number after exponent sign")) (syntax-error (get-source-info)
"expecting number after exponent sign"))
(while (is-digit? (peek-char)) (while (is-digit? (peek-char))
(save-and-next!)) (save-and-next!))
(clear-buffer)) (clear-buffer))
#f)) #f))
(final (string->number main))) (final (string->number main)))
(if exponent (if exponent
(* final (expt 10 (string->number exponent))) (* final (expt 10 (string->number exponent)))
final))) final)))
(define (lex) (define (lex)
(parameterize ((current-input-port port) (parameterize ((current-input-port port)
@ -218,15 +223,16 @@ of an identifier"
;; Skip spaces ;; Skip spaces
((#\newline #\return #\space #\page #\tab #\vtab) (read-char) (loop)) ((#\newline #\return #\space #\page #\tab #\vtab) (read-char) (loop))
;; Either a minus (-), or a long comment, which is a - followed by a bracketed string ;; Either a minus (-), or a long comment, which is a -
;; followed by a bracketed string
((#\-) ((#\-)
(read-char) (read-char)
(if (eq? (peek-char) #\-) (if (eqv? (peek-char) #\-)
;; It's a comment ;; It's a comment
(begin (begin
(read-char) (read-char)
;; Long comment ;; Long comment
(if (eq? (peek-char) #\[) (if (eqv? (peek-char) #\[)
(let* ((nest (get-long-string-nesting-level))) (let* ((nest (get-long-string-nesting-level)))
(drop-buffer) (drop-buffer)
(if (not (negative? nest)) (if (not (negative? nest))
@ -243,37 +249,39 @@ of an identifier"
;; ~= ;; ~=
((#\~) ((#\~)
(read-char) (read-char)
(if (eq? (peek-char) #\=) (if (eqv? (peek-char) #\=)
(begin (read-char) #:~=) (begin (read-char) #:~=)
(syntax-error (get-source-info) "expected = after ~ but got ~c" c))) (syntax-error (get-source-info)
"expected = after ~ but got ~c" c)))
;; < or <= ;; < or <=
((#\<) ((#\<)
(read-char) (read-char)
(if (eq? (peek-char) #\=) (begin (read-char) #:<=) #\<)) (if (eqv? (peek-char) #\=) (begin (read-char) #:<=) #\<))
;; > or >= ;; > or >=
((#\>) ((#\>)
(read-char) (read-char)
(if (eq? (peek-char) #\=) (begin (read-char) #:>=) #\>)) (if (eqv? (peek-char) #\=) (begin (read-char) #:>=) #\>))
;; = or == ;; = or ==
((#\=) ((#\=)
(read-char) (read-char)
(if (eq? (peek-char) #\=) (if (eqv? (peek-char) #\=)
(begin (read-char) #:==) (begin (read-char) #:==)
#:=)) #:=))
;; . can mean one of: floating point number (.12345), table field access (plain .), ;; . can mean one of: floating point number (.12345), table
;; concatenation operator (..) or the variable argument indicator (...) ;; field access (plain .), concatenation operator (..) or the
;; variable argument indicator (...)
((#\.) ((#\.)
(read-char) (read-char)
(if (is-digit? (peek-char)) (if (is-digit? (peek-char))
(read-number ".") (read-number ".")
(if (eq? (peek-char) #\.) (if (eqv? (peek-char) #\.)
(begin (begin
(read-char) (read-char)
(if (eq? (peek-char) #\.) (if (eqv? (peek-char) #\.)
(begin (read-char) #:dots) (begin (read-char) #:dots)
#:concat)) #:concat))
#\.))) #\.)))
@ -288,11 +296,12 @@ of an identifier"
((#\[) ((#\[)
(save-source-info) (save-source-info)
(let* ((nest (get-long-string-nesting-level))) (let* ((nest (get-long-string-nesting-level)))
(if (eq? nest -1) (if (eqv? nest -1)
#\[ #\[
(read-long-string #t nest)))) (read-long-string #t nest))))
;; Characters that are allowed to fall through directly to the parser ;; Characters that are allowed to fall through directly to the
;; parser
((#\; #\( #\) #\, ((#\; #\( #\) #\,
#\+ #\/ #\* #\+ #\/ #\*
#\^ #\{ #\} #\] #\: #\#) (read-char)) #\^ #\{ #\} #\] #\: #\#) (read-char))
@ -312,24 +321,8 @@ of an identifier"
(while (is-name? (peek-char)) (while (is-name? (peek-char))
(save-and-next!)) (save-and-next!))
(possible-keyword (string->symbol (clear-buffer)))) (possible-keyword (string->symbol (clear-buffer))))
(else (syntax-error (get-source-info) "disallowed character ~c" c)))) (else
) ; case (syntax-error (get-source-info)
) ; loop "disallowed character ~c" c))))))))
) ; parameterize
) ; lex
(values get-source-info lex)) ; make-lexer
(define-syntax define-lua-lexer (values get-source-info lex))
(syntax-rules ()
((_ a b)
(begin
(define a #f)
(define b #f)))))
(define-syntax initialize-lua-lexer!
(syntax-rules ()
((_ port a b)
(receive (get-source-info lex)
(make-lexer port)
(set! a get-source-info)
(set! b lex)))))

View file

@ -197,8 +197,11 @@
;;;;; PARSER ;;;;; PARSER
(define (make-parser port) (define (make-parser port)
;; Variables that will be set to the results of MAKE-LEXER. (define lexer-pair
(define-lua-lexer get-source-info lexer) (call-with-values (lambda () (make-lexer port)) cons))
(define get-source-info (car lexer-pair))
(define lexer (cdr lexer-pair))
;; We need two tokens of lookahead ;; We need two tokens of lookahead
(define token2 #f) (define token2 #f)
@ -843,8 +846,6 @@
(receive (is-last node) (statement) (receive (is-last node) (statement)
(loop (or (end-of-chunk? token) is-last) (cons node tree))))))) (loop (or (end-of-chunk? token) is-last) (cons node tree)))))))
(initialize-lua-lexer! port get-source-info lexer)
;; toplevel local environment ;; toplevel local environment
(enter-environment!) (enter-environment!)
;; read first token ;; read first token

View file

@ -25,13 +25,10 @@
(with-test-prefix "lua-lexer" (with-test-prefix "lua-lexer"
(define (from-string string) (define (from-string string)
(define-lua-lexer get-source-info lex) (call-with-values (lambda ()
(call-with-input-string (call-with-input-string string make-lexer))
string (lambda (get-source-info lex)
(lambda (port) (lex))))
(initialize-lua-lexer! port get-source-info lex)
(lex))))
(let-syntax (let-syntax
((test ((test