diff --git a/scripts/read-text-outline b/scripts/read-text-outline index bbfbac5e1..1a88f205e 100755 --- a/scripts/read-text-outline +++ b/scripts/read-text-outline @@ -31,7 +31,7 @@ exec ${GUILE-guile} -l $0 -c "(apply $main (cdr (command-line)))" "$@" ;; Scan OUTLINE file and display a list of trees, the structure of ;; each reflecting the "levels" in OUTLINE. The recognized outline ;; format (used to indicate outline headings) is zero or more pairs of -;; leading spaces followed by "-" or "+". Something like: +;; leading spaces followed by "-". Something like: ;; ;; - a 0 ;; - b 1 @@ -40,22 +40,60 @@ exec ${GUILE-guile} -l $0 -c "(apply $main (cdr (command-line)))" "$@" ;; - e 0 ;; - f 1 ;; - g 2 -;; -h 1 +;; - h 1 ;; ;; In this example the levels are shown to the right. The output for ;; such a file would be the single line: ;; ;; (("a" ("b" "c") "d") ("e" ("f" "g") "h")) ;; +;; Basically, anything at the beginning of a list is a parent, and the +;; remaining elements of that list are its children. ;; -;; Usage from a Scheme program: These three procs are exported: +;; +;; Usage from a Scheme program: These two procs are exported: ;; ;; (read-text-outline . args) ; only first arg is used ;; (read-text-outline-silently port) -;; (display-outline-tree tree) +;; (make-text-outline-reader re specs) ;; -;; Don't forget to iterate (say, `display-outline-tree') over the list of -;; trees that `read-text-outline-silently' returns. +;; `make-text-outline-reader' returns a proc that reads from PORT and +;; returns a list of trees (similar to `read-text-outline-silently'). +;; +;; RE is a regular expression (string) that is used to identify a header +;; line of the outline (as opposed to a whitespace line or intervening +;; text). RE must begin w/ a sub-expression to match the "level prefix" +;; of the line. You can use `level-submatch-number' in SPECS (explained +;; below) to specify a number other than 1, the default. +;; +;; Normally, the level of the line is taken directly as the length of +;; its level prefix. This often results in adjacent levels not mapping +;; to adjacent numbers, which confuses the tree-building portion of the +;; program, which expects top-level to be 0, first sub-level to be 1, +;; etc. You can use `level-substring-divisor' or `compute-level' in +;; SPECS to specify a constant scaling factor or specify a completely +;; alternative procedure, respectively. +;; +;; SPECS is an alist which may contain the following key/value pairs: +;; +;; - level-submatch-number NUMBER +;; - level-substring-divisor NUMBER +;; - compute-level PROC +;; - body-submatch-number NUMBER +;; - extra-fields ((FIELD-1 . SUBMATCH-1) (FIELD-2 . SUBMATCH-2) ...) +;; +;; The PROC value associated with key `compute-level' should take a +;; Scheme match structure (as returned by `regexp-exec') and return a +;; number, the normalized level for that line. If this is specified, +;; it takes precedence over other level-computation methods. +;; +;; Use `body-submatch-number' if RE specifies the whole body, or if you +;; want to make use of the extra fields parsing. The `extra-fields' +;; value is a sub-alist, whose keys name additional fields that are to +;; be recognized. These fields along with `level' are set as object +;; properties of the final string ("body") that is consed into the tree. +;; If a field name ends in "?" the field value is set to be #t if there +;; is a match and the result is not an empty string, and #f otherwise. ;; ;; ;; Bugs and caveats: @@ -73,96 +111,143 @@ exec ${GUILE-guile} -l $0 -c "(apply $main (cdr (command-line)))" "$@" ;; ;; TODO: Determine what's the right thing to do for skips. ;; Handle TABs. -;; Handle follow-on lines. -;; Make line/display format customizable via longopts. +;; Make line format customizable via longopts. ;;; Code: (define-module (scripts read-text-outline) - :export (read-text-outline read-text-outline-silently display-outline-tree) + :export (read-text-outline + read-text-outline-silently + make-text-outline-reader) :use-module (ice-9 regex) - :use-module (ice-9 rdelim)) + :autoload (ice-9 rdelim) (read-line) + :autoload (ice-9 getopt-long) (getopt-long)) -;; todo: make customizable -(define *depth-cue-rx* (make-regexp "(([ ][ ])*)[-+] *")) -(define *subm-number* 1) -(define *level-divisor* 2) +(define (?? symbol) + (let ((name (symbol->string symbol))) + (string=? "?" (substring name (1- (string-length name)))))) -(define (>> level line) - (format #t "\t~A\t~A- ~A\n" level (make-string level #\space) line)) +(define (msub n) + (lambda (m) + (match:substring m n))) -(define (display-outline-tree level tree) - (cond ((list? tree) - (>> level (car tree)) - (for-each (lambda (kid) - (display-outline-tree (+ *level-divisor* level) kid)) - (cdr tree))) - (else (>> level tree)))) +(define (??-predicates pair) + (cons (car pair) + (if (?? (car pair)) + (lambda (m) + (not (string=? "" (match:substring m (cdr pair))))) + (msub (cdr pair))))) -(define (read-text-outline-silently port) - (let* ((all '(start)) - (pchain (list))) ; parents chain - (let loop ((line (read-line port)) - (prev-level -1) ; how this relates to the first input +(define (make-line-parser re specs) + (let* ((rx (let ((fc (substring re 0 1))) + (make-regexp (if (string=? "^" fc) + re + (string-append "^" re))))) + (check (lambda (key) + (assq-ref specs key))) + (level-substring (msub (or (check 'level-submatch-number) 1))) + (extract-level (cond ((check 'compute-level) + => (lambda (proc) + (lambda (m) + (proc m)))) + ((check 'level-substring-divisor) + => (lambda (n) + (lambda (m) + (/ (string-length (level-substring m)) + n)))) + (else + (lambda (m) + (string-length (level-substring m)))))) + (extract-body (cond ((check 'body-submatch-number) + => msub) + (else + (lambda (m) (match:suffix m))))) + (misc-props! (cond ((check 'extra-fields) + => (lambda (alist) + (let ((new (map ??-predicates alist))) + (lambda (obj m) + (for-each + (lambda (pair) + (set-object-property! + obj (car pair) + ((cdr pair) m))) + new))))) + (else + (lambda (obj m) #t))))) + ;; retval + (lambda (line) + (cond ((regexp-exec rx line) + => (lambda (m) + (let ((level (extract-level m)) + (body (extract-body m))) + (set-object-property! body 'level level) + (misc-props! body m) + body))) + (else #f))))) + +(define (make-text-outline-reader re specs) + (let ((parse-line (make-line-parser re specs))) + ;; retval + (lambda (port) + (let* ((all '(start)) + (pchain (list))) ; parents chain + (let loop ((line (read-line port)) + (prev-level -1) ; how this relates to the first input ; level determines whether or not we ; start in "sibling" or "child" mode. ; in the end, `start' is ignored and ; it's much easier to ignore parents ; than siblings (sometimes). this is ; not to encourage ignorance, however. - (tp all)) ; tail pointer - (or (eof-object? line) - (cond ((regexp-exec *depth-cue-rx* line) - => (lambda (m) - (let* ((words (list (match:suffix m))) - (level (/ (string-length - (or (match:substring m *subm-number*) - "")) - *level-divisor*)) - (diff (- level prev-level))) - (cond + (tp all)) ; tail pointer + (or (eof-object? line) + (cond ((parse-line line) + => (lambda (w) + (let* ((words (list w)) + (level (object-property w 'level)) + (diff (- level prev-level))) + (cond - ;; sibling - ((zero? diff) - ;; just extend the chain - (set-cdr! tp words)) + ;; sibling + ((zero? diff) + ;; just extend the chain + (set-cdr! tp words)) - ;; child - ((positive? diff) - (or (= 1 diff) - (error "unhandled diff not 1:" diff line)) - ;; parent may be contacted by uncle later (kids - ;; these days!) so save its level - (set-object-property! tp 'level prev-level) - (set! pchain (cons tp pchain)) - ;; "push down" car into hierarchy - (set-car! tp (cons (car tp) words))) + ;; child + ((positive? diff) + (or (= 1 diff) + (error "unhandled diff not 1:" diff line)) + ;; parent may be contacted by uncle later (kids + ;; these days!) so save its level + (set-object-property! tp 'level prev-level) + (set! pchain (cons tp pchain)) + ;; "push down" car into hierarchy + (set-car! tp (cons (car tp) words))) - ;; uncle - ((negative? diff) - ;; prune back to where levels match - (do ((p pchain (cdr p))) - ((= level (object-property (car p) 'level)) - (set! pchain p))) - ;; resume at this level - (set-cdr! (car pchain) words) - (set! pchain (cdr pchain)))) + ;; uncle + ((negative? diff) + ;; prune back to where levels match + (do ((p pchain (cdr p))) + ((= level (object-property (car p) 'level)) + (set! pchain p))) + ;; resume at this level + (set-cdr! (car pchain) words) + (set! pchain (cdr pchain)))) - (loop (read-line port) level words)))) - (else (loop (read-line port) prev-level tp))))) - (set! all (car all)) - (if (eq? 'start all) - '() ; wasteland - (cdr all)))) + (loop (read-line port) level words)))) + (else (loop (read-line port) prev-level tp))))) + (set! all (car all)) + (if (eq? 'start all) + '() ; wasteland + (cdr all)))))) + +(define read-text-outline-silently + (make-text-outline-reader "(([ ][ ])*)- *" + '((level-substring-divisor . 2)))) (define (read-text-outline . args) - (let ((trees (read-text-outline-silently (open-file (car args) "r")))) - ;; try this - ;; (for-each (lambda (tree) - ;; (display-outline-tree 0 tree)) - ;; trees)) - (write trees) - (newline)) + (write (read-text-outline-silently (open-file (car args) "r"))) + (newline) #t) ; exit val (define main read-text-outline)