1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-29 08:20:20 +02:00
guile/module/ice-9/peg.scm
Noah Lavine bbc5564c42 Separate PEG Concerns
* module/ice-9/peg/codegen.scm: peg-sexp-compile no longer knows about
   string PEGs
* module/ice-9/peg.scm: add a new function peg-extended-compile that
   calls peg-sexp-compile or peg-string-compile on its argument as
   appropriate
2013-01-16 10:11:44 +01:00

132 lines
5.4 KiB
Scheme

;;;; peg.scm --- Parsing Expression Grammar (PEG) parser generator
;;;;
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
;;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;;
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;;; Lesser General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;;;;
(define-module (ice-9 peg)
#:export (peg-parse
define-nonterm
; define-nonterm-f
peg-match)
; #:export-syntax (define-nonterm)
#:use-module (ice-9 peg codegen)
#:use-module (ice-9 peg string-peg)
#:use-module (ice-9 peg simplify-tree)
#:use-module (ice-9 peg match-record)
#:re-export (peg-sexp-compile
define-grammar
define-grammar-f
; define-nonterm
keyword-flatten
context-flatten
peg:start
peg:end
peg:string
peg:tree
peg:substring
peg-record?))
;;;
;;; Helper Macros
;;;
(define-syntax until
(syntax-rules ()
"Evaluate TEST. If it is true, return its value. Otherwise,
execute the STMTs and try again."
((_ test stmt stmt* ...)
(let lp ()
(or test
(begin stmt stmt* ... (lp)))))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;; FOR DEFINING AND USING NONTERMINALS
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Parses STRING using NONTERM
(define (peg-parse nonterm string)
;; We copy the string before using it because it might have been modified
;; in-place since the last time it was parsed, which would invalidate the
;; cache. Guile uses copy-on-write for strings, so this is fast.
(let ((res (nonterm (string-copy string) (string-length string) 0)))
(if (not res)
#f
(make-prec 0 (car res) string (string-collapse (cadr res))))))
(define (peg-extended-compile pattern accum)
(syntax-case pattern (peg)
((peg str)
(string? (syntax->datum #'str))
(peg-string-compile #'str (if (eq? accum 'all) 'body accum)))
(else (peg-sexp-compile pattern accum))))
;; The results of parsing using a nonterminal are cached. Think of it like a
;; hash with no conflict resolution. Process for deciding on the cache size
;; wasn't very scientific; just ran the benchmarks and stopped a little after
;; the point of diminishing returns on my box.
(define *cache-size* 512)
;; Defines a new nonterminal symbol accumulating with ACCUM.
(define-syntax define-nonterm
(lambda (x)
(syntax-case x ()
((_ sym accum pat)
(let ((matchf (peg-extended-compile #'pat (syntax->datum #'accum)))
(accumsym (syntax->datum #'accum))
(c (datum->syntax x (gensym))));; the cache
;; CODE is the code to parse the string if the result isn't cached.
(let ((syn (wrap-parser-for-users x matchf accumsym #'sym)))
#`(begin
(define #,c (make-vector *cache-size* #f));; the cache
(define (sym str strlen at)
(let* ((vref (vector-ref #,c (modulo at *cache-size*))))
;; Check to see whether the value is cached.
(if (and vref (eq? (car vref) str) (= (cadr vref) at))
(caddr vref);; If it is return it.
(let ((fres ;; Else calculate it and cache it.
(#,syn str strlen at)))
(vector-set! #,c (modulo at *cache-size*)
(list str at fres))
fres)))))))))))
;; Searches through STRING for something that parses to PEG-MATCHER. Think
;; regexp search.
(define-syntax peg-match
(lambda (x)
(syntax-case x ()
((_ pattern string-uncopied)
(let ((pmsym (syntax->datum #'pattern)))
(let ((matcher (if (string? (syntax->datum #'pattern))
(peg-string-compile #'pattern 'body)
(peg-sexp-compile #'pattern 'body))))
;; We copy the string before using it because it might have been
;; modified in-place since the last time it was parsed, which would
;; invalidate the cache. Guile uses copy-on-write for strings, so
;; this is fast.
#`(let ((string (string-copy string-uncopied))
(strlen (string-length string-uncopied))
(at 0))
(let ((ret (until (or (>= at strlen)
(#,matcher string strlen at))
(set! at (+ at 1)))))
(if (eq? ret #t) ;; (>= at strlen) succeeded
#f
(let ((end (car ret))
(match (cadr ret)))
(make-prec
at end string
(string-collapse match))))))))))))