1 ;; TREX: Tools for Regluar EXpressions
3 ;; Regular Expression Compiler
5 ;; Coded by S.Tomura <tomura@etl.go.jp>
7 ;; Copyright (C) 1992 Free Software Foundation, Inc.
9 ;; This file is part of XEmacs.
10 ;; This file contains Japanese characters
12 ;; XEmacs is free software; you can redistribute it and/or modify it
13 ;; under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation; either version 2, or (at your option)
17 ;; XEmacs is distributed in the hope that it will be useful, but
18 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 ;; General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with XEmacs; see the file COPYING. If not, write to the
24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25 ;; Boston, MA 02111-1307, USA.
27 (defvar TREX-version "0.41")
28 ;;; Last modified date: Thu Jun 15 13:07:39 1995
30 ;;; 95.6.15 modified by S.Tomura <tomura@etl.go.jp>
32 ;;;
\e$BFbB"$N
\e(Bre_compile_pattern
\e$B$HF1MM$K
\e(B case-fold-search
\e$B$K$h$C$F!"
\e(B
33 ;;; translate
\e$B$9$k$h$&$KJQ99$7$?!#
\e(B
35 ;;; 95.6.14 modified by S.Tomura <tomura@etl.go.jp>
36 ;;; print-translate
\e$B$rDI2C!#
\e(B<0.38>
37 ;;; print-fastmap
\e$B$rDI2C!#
\e(B
39 ;;; start_memory, end_memory
\e$B$NBh
\e(B2
\e$B0z?t$r@8@.$9$k$?$a$K!"
\e(B:mark
\e$B$NFbIt9=
\e(B
40 ;;;
\e$BB$$rJQ99$7$?!#
\e(B
42 ;;; re-compile-and-dump, regexp-compile-and-dump
\e$B$rDI2C!#
\e(B
45 ;;; regexp19.c
\e$B$KBP1~$7$F
\e(B start_memory, end_memory
\e$B$N
\e(B dump
\e$B%k!<%A%s$r=$@5
\e(B
47 ;;;
\e$B$9$Y$-$3$H!'
\e(B
50 ;;; (2)
\e$B;^$N=gHV
\e(B
51 ;;; (3)
\e$B0UL#$N$J$$%0%k!<%W;2>H$N8!=P
\e(B "\(a\\)\\2"
\e$B$J$I
\e(B
53 (defmacro TREX-inc (symbol &optional delta)
54 (list 'setq symbol (if delta (list '+ symbol delta)
57 (defmacro TREX-dec (symbol &optional delta)
58 (list 'setq symbol (if delta (list '- symbol delta)
62 (list 'num* (list 'quote sym)))
65 (TREX-read-hexa (substring (symbol-name sym) 2)))
67 (defun TREX-read-hexa (str)
68 (let ((result 0) (i 0) (max (length str)))
70 (let ((ch (aref str i)))
71 (cond((and (<= ?0 ch) (<= ch ?9))
72 (setq result (+ (* result 16) (- ch ?0))))
73 ((and (<= ?a ch) (<= ch ?f))
74 (setq result (+ (* result 16) (+ (- ch ?a) 10))))
75 ((and (<= ?A ch) (<= ch ?F))
76 (setq result (+ (* result 16) (+ (- ch ?A) 10)))))
80 ;;; 1 bytes : 0x00 <= C11 <= 0x7F
81 ;;; n bytes : 0x80 == LCCMP
82 ;;; 2 bytes 0xA0 <= LC <= 0xAF
83 ;;; 3 bytes 0xB0 <= LC <= 0xBB
84 ;;; 4 bytes 0xBC <= LC <= 0xBE
85 ;;; 2 bytes : 0x81 <= LC <= 0x8F
86 ;;; 3 bytes : 0x90 <= LC <= 0x9B
87 ;;; 4 bytes : 0x9C <= LC <= 0x9E
90 (defun TREX-char-octets (str index)
91 (let ((max (length str)))
92 (if (or (< index 0) (<= max index)) 0
93 (let ((ch (aref str index))
96 (cond ((<= ch (num 0x7f)) 1)
98 (let ((max (length str))
100 (while (and (< i max)
101 (<= (num 0xa0) (aref str i))
102 (<= (aref str i) (num 0xbe)))
103 (setq ch (aref str i))
104 (cond ((<= ch (num 0xaf)) (TREX-inc i 2))
105 ((<= ch (num 0xbb)) (TREX-inc i 3))
106 ((<= ch (num 0xbe)) (TREX-inc i 4))))
108 ((<= ch (num 0x8f)) 2)
109 ((<= ch (num 0x9b)) 3)
110 ((<= ch (num 0x9e)) 4)
112 (if (<= (+ index bytes) max) bytes 1)))))
114 (defun TREX-comp-charp (str index)
115 (= (aref str index) (num 0x80)))
117 ;;; 0x00 <= C11 <= 0x7F : 1 bytes
119 ;;; 0x80 == LCCMP : n bytes
120 ;;; Type N LCCMP LCN1 C11 ... LCN2 C21 ... LCNn Cn1 ...
121 ;;; 0xA0 <= LCN* <= 0xBE
123 ;;; LCN* = 0xA0 (ASCII)
124 ;;; 0x81 <= LC1 <= 0x8F : 2 bytes
125 ;;; Type 1-2 LC1 C11 :
126 ;;; 0xA0 <= C11 <= 0xFF
127 ;;; 0x90 <= LC2 <= 0x99 : 3 bytes
128 ;;; Type 2-3 LC2 C21 C22
129 ;;; 0xA0 <= C21 <= 0xFF
130 ;;; 0xA0 <= C22 <= 0xFF
131 ;;; 0x9A == LCPRV1 : 3 bytes
132 ;;; Type 1-3 LCPRV1 LC12 C11
133 ;;; 0xA0 <= LC12 <= 0xB7
134 ;;; 0xA0 <= C11 <= 0xFF
135 ;;; 0x9B == LCPRV1 : 3 bytes
136 ;;; Type 1-3 LCPRV1 LC12 C11
137 ;;; 0xB8 <= LC12 <= 0xBF
138 ;;; 0xA0 <= C11 <= 0xFF
139 ;;; 0x9C == LCPRV2 : 4 bytes
140 ;;; Type 2-4 LCPRV2 LC22 C21 C22
141 ;;; 0xC0 <= LC22 <= 0xC7
142 ;;; 0xA0 <= C21 <= 0xFF
143 ;;; 0xA0 <= C22 <= 0xFF
144 ;;; 0x9D == LCPRV2 : 4 bytes
145 ;;; Type 2-4 LCPRV2 LC22 C21 C22
146 ;;; 0xC8 <= LC22 <= 0xDF
147 ;;; 0xA0 <= C21 <= 0xFF
148 ;;; 0xA0 <= C22 <= 0xFF
149 ;;; 0x9E == LCPRV3 : 4 bytes
150 ;;; Type 3-4 LCPRV3 C31 C32 C33
151 ;;; 0xA0 <= C31 <= 0xBF
152 ;;; 0xA0 <= C32 <= 0xFF
153 ;;; 0xA0 <= C33 <= 0xFF
154 ;;; char = [0x00-0x7f]\|
156 ;;; \(0xa0[0xa0-0xff]\|
157 ;;; [0xa1-0xaf][0xa0-0xff]\|
158 ;;; [0xb0-0xb9][0xa0-0xff][0xa0-0xff]\|
159 ;;; 0xba[0xa0-0xb7][0xa0-0xff]\|
160 ;;; 0xbb[0xb8-0xbf][0xa0-0xff]\|
161 ;;; 0xbc[0xc0-0xc7][0xa0-0xff][0xa0-0xff]\|
162 ;;; 0xbd[0xc8-0xdf][0xa0-0xff][0xa0-0xff]\|
163 ;;; 0xbe[0xa0-0xbf][0xa0-0xff][0xa0-0xff]
165 ;;; [0x81-0x8f][0xa0-0xff]\|
166 ;;; [0x90-0x99][0xa0-0xff][0xa0-0xff]\|
167 ;;; 0x9a[0xa0-0xb7][0xa0-0xff]\|
168 ;;; 0x9b[0xb8-0xbf][0xa0-0xff]\|
169 ;;; 0x9c[0xc0-0xc7][0xa0-0xff][0xa0-0xff]\|
170 ;;; 0x9d[0xc8-0xdf][0xa0-0xff][0xa0-0xff]\|
171 ;;; 0x9e[0xa0-0xbf][0xa0-0xff][0xa0-0xff]
173 (defun regexp-make-or (&rest body)
176 (defun regexp-make-seq (&rest body)
179 (defun regexp-make-star (regexp)
180 (list ':star regexp))
182 (defun regexp-make-range (from to)
183 (list 'CHARSET (list ':range from to)))
186 (defvar regexp-allchar-regexp
188 (regexp-make-range 0 (num 0x7f))
195 (regexp-make-range (num 0xa0) (num 0xff)))
197 (regexp-make-range (num 0xa1) (num 0xaf))
198 (regexp-make-range (num 0xa0) (num 0xff)))
200 (regexp-make-range (num 0xb0) (num 0xb9))
201 (regexp-make-range (num 0xa0) (num 0xff))
202 (regexp-make-range (num 0xa0) (num 0xff)))
205 (regexp-make-range (num 0xa0) (num 0xb7))
206 (regexp-make-range (num 0xa0) (num 0xff)))
209 (regexp-make-range (num 0xb8) (num 0xbf))
210 (regexp-make-range (num 0xa0) (num 0xff)))
213 (regexp-make-range (num 0xc0) (num 0xc7))
214 (regexp-make-range (num 0xa0) (num 0xff))
215 (regexp-make-range (num 0xa0) (num 0xff)))
218 (regexp-make-range (num 0xc8) (num 0xdf))
219 (regexp-make-range (num 0xa0) (num 0xff))
220 (regexp-make-range (num 0xa0) (num 0xff)))
223 (regexp-make-range (num 0xa0) (num 0xbf))
224 (regexp-make-range (num 0xa0) (num 0xff))
225 (regexp-make-range (num 0xa0) (num 0xff))))))
227 (regexp-make-range (num 0x81) (num 0x8f))
228 (regexp-make-range (num 0xa0) (num 0xff)))
230 (regexp-make-range (num 0x90) (num 0x99))
231 (regexp-make-range (num 0xa0) (num 0xff))
232 (regexp-make-range (num 0xa0) (num 0xff)))
235 (regexp-make-range (num 0xa0) (num 0xb7))
236 (regexp-make-range (num 0xa0) (num 0xff)))
239 (regexp-make-range (num 0xb8) (num 0xbf))
240 (regexp-make-range (num 0xa0) (num 0xff)))
243 (regexp-make-range (num 0xc0) (num 0xc7))
244 (regexp-make-range (num 0xa0) (num 0xff))
245 (regexp-make-range (num 0xa0) (num 0xff)))
248 (regexp-make-range (num 0xc8) (num 0xdf))
249 (regexp-make-range (num 0xa0) (num 0xff))
250 (regexp-make-range (num 0xa0) (num 0xff)))
253 (regexp-make-range (num 0xa0) (num 0xbf))
254 (regexp-make-range (num 0xa0) (num 0xff))
255 (regexp-make-range (num 0xa0) (num 0xff)))))
261 (defun TREX-string-reverse (str)
262 (if (<= (length str) 1) str
263 (let ((result (make-string (length str) 0))
265 (j (1- (length str))))
267 (aset result i (aref str j))
272 (defun TREX-string-forward-anychar (str start)
273 (and (stringp str) (numberp start)
274 (let ((max (length str)))
277 (+ start (TREX-char-octets str start))))))
279 (defmacro TREX-init (symbol value)
280 (` (if (null (, symbol))
281 (setq (, symbol) (, value)))))
283 (defmacro TREX-push (val symbol)
284 (list 'setq symbol (list 'cons val symbol)))
286 (defun TREX-member (elm list pred)
287 (while (and list (not (funcall pred elm (car list))))
288 (setq list (cdr list)))
291 (defun TREX-memequal (elm list)
292 (while (and list (not (equal elm (car list))))
293 (setq list (cdr list)))
296 (defun TREX-find (elm list)
298 (while (and list (not (equal elm (car list))))
299 (setq list (cdr list))
304 (defun TREX-find-if (pred list)
306 (while (and list (not (funcall pred (car list))))
308 (setq list (cdr list)))
312 (defun TREX-firstn (list n)
313 (if (or (<= n 0) (null list)) nil
314 (cons (car list) (TREX-firstn (cdr list) (1- n)))))
316 (defun TREX-delete-duplicate (list)
319 (let ((elm (car list)))
320 (if (not (TREX-memequal elm result))
321 (TREX-push elm result)))
322 (setq list (cdr list)))
325 (defun TREX-delete (elm list)
328 (if (not (equal elm (car list)))
329 (TREX-push (car list) result))
330 (setq list (cdr list)))
333 (defun TREX-string-to-list (str)
338 (TREX-push (aref str i) result)
342 (defun TREX-sort (list lessp &optional key)
345 (sort list (function (lambda (x y) (funcall lessp (funcall key x) (funcall key y)))))))
347 (defun TREX-key-lessp (x y)
356 ((and (consp y) (eq (car y) ':range))
359 ((and (consp x) (eq (car x) ':range))
360 (cond ((and (consp y) (eq (car y) ':range))
361 (< (nth 2 x) (nth 1 y)))
367 (defun TREX-lessp-car (x y)
370 (TREX-key-lessp x y)))
372 (defmacro TREX-define-enum (&rest list)
373 (list 'TREX-define-enum* (list 'quote list)))
375 (defun TREX-define-enum* (list)
380 (setq list (cdr list)))))
387 ;;;
\e$B@55,I=8=
\e(B(regular expression)
389 ;;; . single character except a newline
390 ;;; REG* more than zero
391 ;;; REG+ at least once
392 ;;; REG? once or not at all
393 ;;; [...] character set
394 ;;; [^...] character not set
395 ;;; ^ beginning of line
399 ;;; \( ... \) group and mark
401 ;;; \` beginning of buffer
403 ;;; \b beginning of word or end of word
405 ;;; \< beginning of word
408 ;;; \w word-constituent character
410 ;;; \sCODE syntax CODE character
411 ;;; \SCODE not \sCODE
427 ;;; "[" "^" ... "]" |
433 ;;;
\e$B>H9g$O@55,I=8=$N:8$+$i1&$X9T$o$l$k!%
\e(B
435 (defvar *regexp-parse-translate* nil
436 "
\e$B@55,I=8=$rFI$_9~$_Cf$K;HMQ$9$k
\e(B translate table.\n
437 case-fold-search
\e$B$NCM$K$h$C$F
\e(B downcasetable
\e$B$r@_Dj$9$k!#
\e(B")
439 (defun regexp-parse-translate-char-string (str)
440 (if (and *regexp-parse-translate*
442 ;;;
\e$BK\Ev$O
\e(B destructive
\e$B$G$b
\e(B OK
443 (char-to-string (aref *regexp-parse-translate* (aref str 0)))
446 (defvar *regexp-word-definition* nil)
448 (defvar *regexp-parse-index* nil)
449 (defvar *regexp-parse-end* nil)
450 (defvar *regexp-parse-str* nil)
451 (defvar *regexp-parse-regno* 1)
453 (defun regexp-error (&optional reason)
454 (if (null reason) (setq reason "Bad regexp"))
455 (error "Regexp-parse::%s \"%s\" * \"%s\"" reason (substring *regexp-parse-str* 0 *regexp-parse-index*)
456 (substring *regexp-parse-str* *regexp-parse-index*)))
458 (defun word-parse (pattern)
459 (let ((*regexp-word-definition* t))
460 (regexp-parse pattern)))
462 (defun regexp-parse (pattern)
463 (let*((*regexp-parse-str* pattern)
464 (*regexp-parse-index* 0)
465 (*regexp-parse-end* (length pattern))
466 (*regexp-parse-regno* 1)
467 (result (regexp-parse-0)))
468 (if (<= *regexp-parse-end* *regexp-parse-index*)
472 (defun regexp-parse-0 ()
473 (let* ((result (regexp-parse-1)))
474 (cond((<= *regexp-parse-end* *regexp-parse-index*)
476 ((and (< (1+ *regexp-parse-index*) *regexp-parse-end*)
477 (= (aref *regexp-parse-str* *regexp-parse-index*) ?\\)
478 (= (aref *regexp-parse-str* (1+ *regexp-parse-index*)) ?|))
479 (TREX-inc *regexp-parse-index* 2)
480 (list ':or result (regexp-parse-0)))
483 (defun regexp-parse-1 ()
486 (while (setq result2 (regexp-parse-2))
487 (TREX-push result2 results))
490 (cons ':seq (nreverse results))
494 (defun regexp-parse-2 ()
495 (let ((result (regexp-parse-3)))
496 (while (and (< *regexp-parse-index* *regexp-parse-end*)
497 (TREX-memequal (aref *regexp-parse-str* *regexp-parse-index*)
499 (let ((ch (aref *regexp-parse-str* *regexp-parse-index*)))
500 (TREX-inc *regexp-parse-index*)
502 (cond((= ch ?*) (list ':star result))
503 ((= ch ?+) (list ':plus result))
504 ((= ch ??) (list ':optional result))))))
507 (defun regexp-parse-3 ()
508 (if (<= *regexp-parse-end* *regexp-parse-index*)
510 (let* ((start *regexp-parse-index*)
511 (i *regexp-parse-index*)
512 (end *regexp-parse-end*)
513 (ch (aref *regexp-parse-str* i)))
514 (TREX-inc *regexp-parse-index*)
515 (cond ((= ch ?.) '(ANYCHAR))
516 ((= ch ?^) '(BEGLINE))
517 ((= ch ?$) '(ENDLINE))
519 (regexp-parse-charset))
521 (setq *regexp-parse-index* start)
524 (setq *regexp-parse-index* start)
527 (setq *regexp-parse-index* start)
530 (setq *regexp-parse-index* start)
532 ((and (= ch ?\\) (< (1+ i) end))
533 (setq ch (aref *regexp-parse-str* (1+ i)))
535 (TREX-inc *regexp-parse-index*)
537 (setq *regexp-parse-index* start)
540 (if (< 9 *regexp-parse-regno*)
541 (regexp-error "Too many parenth"))
542 (let ((regexp-parse-regno *regexp-parse-regno*))
543 (TREX-inc *regexp-parse-regno*)
544 (let ((result (regexp-parse-0)))
546 (cond((and (< (1+ *regexp-parse-index*) *regexp-parse-end*)
547 (= (aref *regexp-parse-str* *regexp-parse-index*) ?\\ )
548 (= (aref *regexp-parse-str* (1+ *regexp-parse-index*)) ?\) ))
549 (TREX-inc *regexp-parse-index* 2)
550 (if *regexp-word-definition*
552 (list ':mark regexp-parse-regno
553 (- *regexp-parse-regno* regexp-parse-regno 1)
558 (setq *regexp-parse-index* start)
560 ((= ch ?` ) '(BEGBUF))
561 ((= ch ?' ) '(ENDBUF))
563 (if *regexp-word-definition* (regexp-error) '(WORDBOUND)))
565 (if *regexp-word-definition* (regexp-error) '(NOTWORDBOUND)))
567 (if *regexp-word-definition* (regexp-error) '(WORDBEG)))
569 (if *regexp-word-definition* (regexp-error) '(WORDEND)))
570 ((= ch ?w ) (list 'SYNTAXSPEC
571 (syntax-spec-code ?w))) ;;;WORDCHAR
572 ((= ch ?W ) (list 'NOTSYNTAXSPEC
573 (syntax-spec-code ?w))) ;;;NOTWORDCHAR
574 ;;; ((= ch ?=) 'AT_DOT)
577 (if *regexp-word-definition*
578 (regexp-error) (list 'DUPLICATE (- ch ?0))))
583 (TREX-inc *regexp-parse-index*)
584 (list 'SYNTAXSPEC (syntax-spec-code (aref *regexp-parse-str* (1+ i)))))
587 (TREX-inc *regexp-parse-index*)
588 (list 'NOTSYNTAXSPEC (syntax-spec-code (aref *regexp-parse-str* (1+ i)))))
591 (TREX-inc *regexp-parse-index*)
592 (list 'CATEGORYSPEC (aref *regexp-parse-str* (1+ i))))
595 (TREX-inc *regexp-parse-index*)
596 (list 'NOTCATEGORYSPEC (aref *regexp-parse-str* (1+ i))))
598 (regexp-parse-translate-char-string
599 (substring *regexp-parse-str* (1+ i) (+ i 2))))))
601 (let ((nextpos (TREX-string-forward-anychar *regexp-parse-str* i)))
603 (setq *regexp-parse-index* nextpos)
604 (regexp-parse-translate-char-string
605 (substring *regexp-parse-str* i nextpos)))
606 (t (regexp-error)))))))))
608 (defun regexp-parse-charset ()
609 (if (< *regexp-parse-index* *regexp-parse-end*)
610 (cond((eq (aref *regexp-parse-str* *regexp-parse-index*) ?^)
611 (TREX-inc *regexp-parse-index*)
612 (regexp-parse-charset0 'CHARSET_NOT nil))
613 (t (regexp-parse-charset0 'CHARSET ;; ':or
617 (defun regexp-parse-charset0 (op list)
618 (if (< *regexp-parse-index* *regexp-parse-end*)
619 (cond ((eq (aref *regexp-parse-str* *regexp-parse-index*) ?\])
620 (TREX-inc *regexp-parse-index*)
621 (regexp-parse-charset1 op '("\]")))
623 (regexp-parse-charset1 op nil)))
626 (defun regexp-parse-charset1 (op list)
627 (if (< *regexp-parse-index* *regexp-parse-end*)
628 (let* ((pos0 *regexp-parse-index*)
629 (pos1 (TREX-string-forward-anychar *regexp-parse-str* pos0))
630 (pos2 (TREX-string-forward-anychar *regexp-parse-str* pos1))
631 (pos3 (TREX-string-forward-anychar *regexp-parse-str* pos2)))
634 (cond((eq (aref *regexp-parse-str* pos0) ?\])
635 (setq *regexp-parse-index* pos1)
636 ;;; returns charset form
637 (cons op (sort (nreverse list) 'TREX-charset-lessp)))
640 (eq (aref *regexp-parse-str* pos1) ?-)
641 (not (eq (aref *regexp-parse-str* pos2) ?\])))
642 (let ((from (substring *regexp-parse-str* pos0 pos1))
643 (to (substring *regexp-parse-str* pos2 pos3)))
644 (if (and (= (length from) (length to))
645 (not (TREX-comp-charp from 0))
646 (not (TREX-comp-charp to 0))
647 (or (= (length from) 1)
648 (= (aref from 0) (aref to 0)))
649 (or (string-equal from to) ;;; by Enami 93.08.08
650 (string-lessp from to)))
651 (if (string-equal from to)
652 (TREX-push from list)
653 (TREX-push (list ':range from to) list))
655 (setq *regexp-parse-index* pos3)
656 (regexp-parse-charset1 op list))
657 ;;; [^]] - ] ;;; by Enami 93.08.08
659 (eq (aref *regexp-parse-str* pos1) ?-)
660 (eq (aref *regexp-parse-str* pos2) ?\]))
661 (TREX-push (substring *regexp-parse-str* pos0 pos1) list)
662 (TREX-push (substring *regexp-parse-str* pos1 pos2) list)
663 (setq *regexp-parse-index* pos2)
664 (regexp-parse-charset1 op list))
666 (TREX-push (substring *regexp-parse-str* pos0 pos1) list)
667 (setq *regexp-parse-index* pos1)
668 (regexp-parse-charset1 op list)))
672 (defun TREX-charset-lessp (ch1 ch2)
673 (cond((and (stringp ch1) (stringp ch2))
674 (string-lessp ch1 ch2))
675 ((and (consp ch1) (consp ch2))
676 (string-lessp (nth 2 ch1) (nth 1 ch2)))
678 (string-lessp (nth 2 ch1) ch2))
680 (string-lessp ch1 (nth 1 ch2)))))
686 (defmacro define-regexp (name &rest forms)
687 (` (define-regexp* '(, name) '(, forms))))
689 (defun define-regexp* (name forms)
690 (put name ':regexp-has-definition t)
691 (put name ':regexp-definition
692 (if (= (length forms) 1)
694 (` (:seq (,@ forms))))))
696 (defun regexp-get-definition (name)
697 (get name ':regexp-definition))
699 (defun regexp-define-specials (names)
700 (mapcar (function (lambda (name)
701 (put name ':regexp-special t)))
704 (defun regexp-has-definition (name)
705 (get name ':regexp-has-definition))
707 (defun regexp-specialp (name)
708 (get name ':regexp-special))
710 (defun regexp-expand-definition (regexp &optional callers)
713 (let ((op (car regexp)))
715 (` (:mark (, (nth 1 regexp))
717 (, (regexp-expand-definition (nth 3 regexp))))))
719 (` (:or (,@ (mapcar 'regexp-expand-definition (cdr regexp))))))
721 (` (:seq (,@ (mapcar 'regexp-expand-definition (cdr regexp))))))
723 (` (:optional (, (regexp-expand-definition (nth 1 regexp))))))
725 (` (:star (, (regexp-expand-definition (nth 1 regexp))))))
727 (` (:plus (, (regexp-expand-definition (nth 1 regexp))))))
731 ((regexp-specialp op)
734 (error "regexp defs(%s)" op))
735 ((regexp-has-definition op)
736 (regexp-expand-definition (regexp-get-definition op)
739 (error "undefined regexp(%s)" op)))))
749 ;;;
\e$B@55,7A<0$NA4=g=x$rDj5A$9$k!%
\e(B
752 ;;; nil < number < string < symbol < cons
754 (defun regexp-lessp (exp1 exp2)
755 (cond((equal exp1 exp2)
759 (cond((null exp2) nil)
764 (cond((or (null exp2)
771 (cond((or (null exp2)
779 (cond ((not (consp exp2))
781 ((< (length exp1) (length exp2))
783 ((= (length exp1) (length exp2))
784 (regexp-lessp-list exp1 exp2))
787 (defun regexp-lessp-list (exp1 exp2)
788 (cond((null exp1) nil)
789 ((regexp-lessp (car exp1) (car exp2))
791 ((equal (car exp1) (car exp2))
792 (regexp-lessp-list (cdr exp1) (cdr exp2)))
796 ;;; item = list of seq-body(== list of regexp)
800 (defun regexp-item-lessp (item1 item2)
801 (cond((equal item1 item2)
806 (cond ((regexp-key-lessp (car item1) (car item2))
808 ((equal (car item1) (car item2))
809 (regexp-item-lessp (cdr item1) (cdr item2)))
814 (defun regexp-key-lessp-list (sym1 sym2 list)
815 (< (TREX-find sym1 list) (TREX-find sym2 list)))
817 (defun regexp-key-lessp (key1 key2)
818 (cond ((regexp-key-class0 key1)
819 (cond((regexp-key-class0 key2)
820 (regexp-key-lessp-list (car key1) (car key2) *regexp-key-class0*))
822 ((regexp-key-class1 key1)
823 (cond((regexp-key-class1 key2)
824 (regexp-key-lessp-list key1 key2 *regexp-key-class1*))
825 ((or (regexp-key-class2 key2)
826 (regexp-key-class3 key2)
827 (regexp-key-class4 key2)
830 ((regexp-key-class2 key1)
831 (cond((regexp-key-class2 key2)
832 (regexp-key-lessp-list key1 key2 *regexp-key-class2*))
833 ((or (regexp-key-class3 key2)
834 (regexp-key-class4 key2)
837 ((regexp-key-class3 key1)
838 (cond((regexp-key-class3 key2)
839 (regexp-key-lessp-list (car key1) (car key2) *regexp-key-class3*))
840 ((or (regexp-key-class4 key2)
843 ((regexp-key-class4 key1)
845 (and (regexp-key-class4 key2) (< key1 key2))))
848 (defun regexp-alist-lessp (pair1 pair2)
849 (regexp-key-lessp (car pair1) (car pair2)))
855 (defvar *regexp-key-class0* '(START_MEMORY STOP_MEMORY))
857 (defvar *regexp-key-class1* '(BEGLINE ENDLINE
858 ;;; BEFORE_DOT AT_DOT AFTER_DOT
861 WORDBOUND NOTWORDBOUND))
863 (defvar *regexp-key-class2* '(ANYCHAR
866 ;;;WORDCHAR NOTWORDCHAR
869 (defvar *regexp-key-class3* '(DUPLICATE
870 SYNTAXSPEC NOTSYNTAXSPEC
871 CATEGORYSPEC NOTCATEGORYSPEC
874 (regexp-define-specials *regexp-key-class0*)
875 (regexp-define-specials *regexp-key-class1*)
876 (regexp-define-specials *regexp-key-class2*)
877 (regexp-define-specials *regexp-key-class3*)
879 (defun regexp-key-class0 (key)
880 (and (consp key) (TREX-memequal (car key) *regexp-key-class0*)))
882 (defun regexp-key-class1 (key)
884 (TREX-memequal (car key) *regexp-key-class1*)))
886 (defun regexp-key-class2 (key)
887 (and (consp key) (TREX-memequal (car key) *regexp-key-class2*)))
889 (defun regexp-key-class3 (key)
891 (TREX-memequal (car key) *regexp-key-class3*)))
893 (defun regexp-key-class4 (key)
894 (or (and (consp key) (eq (car key) ':range))
895 (numberp key) (symbolp key)))
897 (defun regexp-item-key-class0 (item)
898 (regexp-key-class0 (car item)))
900 (defun regexp-item-key-class1 (item)
901 (regexp-key-class1 (car item)))
903 (defun regexp-item-key-class2 (item)
904 (regexp-key-class2 (car item)))
906 (defun regexp-item-key-class3 (item)
907 (regexp-key-class3 (car item)))
909 (defun regexp-item-key-class4 (item)
910 (regexp-key-class4 (car item)))
914 ;;;
\e$B@55,I=8=$NI8=`7A<0$r5a$a$k$?$a$K@0Ns$r9T$&!%
\e(B
917 (defvar *regexp-sort-flag* t)
918 (defvar *regexp-debug* nil)
920 (defun regexp-sort (list pred)
921 (if *regexp-sort-flag*
923 (if *regexp-debug* (princ (format "(regexp-sort %s %s)\n" list pred)))
924 (let ((result (sort list pred)))
925 (if *regexp-debug* (princ (format "<== %s\n" result)))
933 (defun regexp-inverse (regexp)
935 (let ((op (car regexp)))
937 (list ':mark (nth 1 regexp) (nth 2 regexp)
938 (regexp-inverse (nth 3 regexp))))
942 (cons ':or (mapcar 'regexp-inverse (cdr regexp))))
944 (cons ':seq (nreverse (mapcar 'regexp-inverse (cdr regexp)))))
946 (list ':optional (regexp-inverse (nth 1 regexp))))
948 (list ':star (regexp-inverse (nth 1 regexp))))
950 (list ':plus (regexp-inverse (nth 1 regexp))))
953 (TREX-string-reverse regexp)
957 ;;; regexp-remove-infinite-loop
960 (defun regexp-remove-infinite-loop (regexp)
962 (let ((op (car regexp)))
990 (defvar *regexp-register-definitions* nil)
991 (defvar *regexp-registers* nil)
993 (defun regexp-reform-duplication (regexp)
994 (let* ((*regexp-register-definitions* nil)
995 (newregexp (regexp-reform-duplication-1 regexp)))
996 (let ((*regexp-registers* nil))
997 (regexp-reform-duplication-2 newregexp))))
999 (defun regexp-reform-duplication-1 (regexp)
1000 (if (not (consp regexp)) regexp
1001 (let ((mop (car regexp)))
1003 (cons ':or (mapcar 'regexp-reform-duplication-1
1006 (cons ':seq (mapcar 'regexp-reform-duplication-1
1008 ((TREX-memequal mop '(:star :plus :optional))
1009 (list mop (regexp-reform-duplication-1 (nth 1 regexp))))
1011 (TREX-push (cdr regexp)
1012 *regexp-register-definitions*)
1013 (list 'DUPLICATE (nth 1 regexp)))
1016 (defun regexp-reform-duplication-2 (regexp)
1017 (if (not (consp regexp)) regexp
1018 (let ((mop (car regexp)))
1020 (let ((registers *regexp-registers*)
1023 (or-body (cdr regexp)))
1025 (setq *regexp-registers* registers)
1026 (TREX-push (regexp-reform-duplication-2 (car or-body)) result)
1027 (setq newregisters (TREX-delete-duplicate (append *regexp-registers* newregisters)))
1028 (setq or-body (cdr or-body)))
1029 (setq *regexp-registers* newregisters)
1030 (cons ':or (nreverse result))))
1032 (cons ':seq (mapcar 'regexp-reform-duplication-2
1034 ((TREX-memequal mop '(:star :plus :optional))
1035 (list mop (regexp-reform-duplication-2 (nth 1 regexp))))
1036 ((eq mop 'DUPLICATE)
1037 (let ((regno (nth 1 regexp)))
1038 (if (TREX-memequal regno *regexp-registers*)
1040 (let ((def (assoc regno *regexp-register-definitions*)))
1041 (TREX-push regno *regexp-registers*)
1042 ;;;
\e$BBg>fIW!)
\e(B
1053 ;;; <ISLAND> ::= ( <ITEM> ...)
1054 ;;; <ITEM> ::= ( <SEQ-BODY> ... )
1057 (defun regexp-expand-regexp (regexp)
1060 (let ((mop (car regexp)))
1062 ;;;((eq mop 'CHARSET)
1063 ;;; (regexp-expand-charset t (cdr regexp)))
1064 ;;;((eq mop 'CHARSET_NOT)
1065 ;;; (regexp-expand-charset nil (cdr regexp)))
1067 (regexp-expand-or (cdr regexp)))
1069 (regexp-expand-seq (cdr regexp)))
1071 (let ((arg (nth 1 regexp)))
1073 (append (regexp-expand-seq (list arg regexp)) (list nil))
1076 (let ((arg (nth 1 regexp)))
1078 (regexp-expand-seq (list arg (list ':star arg)))
1080 ((eq mop ':optional)
1081 (append (regexp-expand-regexp (nth 1 regexp)) (list nil)))
1083 (let ((regno (nth 1 regexp))
1084 (groups (nth 2 regexp))
1085 (arg (nth 3 regexp)))
1087 (list (list (list 'START_MEMORY regno groups)
1089 (list 'STOP_MEMORY regno groups)))
1090 (list (list (list 'START_MEMORY regno groups)
1091 (list 'STOP_MEMORY regno groups))))))
1092 (t (list (list regexp)))))
1093 (cond((null regexp) (list nil))
1094 ((symbolp regexp) (list (list regexp)))
1095 ((numberp regexp) (list (list regexp)))
1098 (let ((i 0) (max (length regexp)))
1100 (TREX-push (aref regexp i) result)
1102 (list (nreverse result)))))
1103 (t (list (list regexp))))))
1106 ;;; (CHARSET "abc" ... ) == (:or (:seq "a" "b" "c") .... )
1108 ;;; (:range "abc" "ade") == (:seq "a" (:range "bc" "de"))
1109 ;;; (:range "bc" "de" ) == (:or (:seq "b" (:range "c" 0xFF))
1110 ;;; (:seq (:range "b"+1 "d"-1) (:range 0xA0 0xFF))
1111 ;;; (:seq "d" (:range 0xA0 "e")))
1116 (defun charset-member-elt (ch elt)
1118 (if (eq (nth 0 elt) ':range)
1119 (and (<= ch (nth 1 elt))
1120 (<= (nth 2 elt) ch))
1124 (defun charset-member-P (ch or-form)
1125 (let ((result) (l (cdr or-form)))
1126 (while (and l (null result))
1127 (if (charset-membership-elt ch (car l))
1132 (defun charset-member-N (ch nor-form)
1133 (not (charset-member+ ch nor-form)))
1135 (defun charset-norp (form)
1136 (and (consp form) (eq (car form) 'CHARSET_NOT)))
1138 (defun charset-and (form1 form2)
1139 (if (charset-norp form1)
1140 (if (charset-norp form2)
1141 (cons ':or (charset-or-PP (cdr form1) (cdr form2)))
1142 (charset-and-PN form2 form1))
1143 (if (charset-norp form2)
1144 (charset-and-pn form1 form2)
1145 (charset-and-PP form1 form2))))
1147 (defun charset-or-PP (or-body1 or-body2)
1148 (append or-body1 or-body2))
1153 (defun regexp-charset-to-regexp (charsets)
1154 (cons ':or (mapcar 'regexp-charset-to-regexp* charsets)))
1156 (defun regexp-charset-to-regexp* (elm)
1157 (cond((consp elm) (regexp-charset-range-to-regexp (nth 1 elm) (nth 2 elm)))
1158 ((stringp elm) (cons ':seq (TREX-string-to-list elm)))
1161 (defun regexp-charset-range-to-regexp (str1 str2)
1162 (let ((result (regexp-charset-range-to-regexp* (TREX-string-to-list str1)
1163 (TREX-string-to-list str2))))
1164 (if (= (length result) 1) (car result) (cons ':seq result))))
1167 (defun regexp-charset-range-to-regexp* (nums1 nums2)
1168 (let ((len (length (cdr nums1)))
1172 (if (= ch1 ch2) (list ch1)
1173 (list (regexp-charset-range-1 ch1 ch2)))
1175 (cons ch1 (regexp-charset-range-to-regexp* (cdr nums1) (cdr nums2)))
1176 (let ((part1 (cons ch1 (regexp-charset-range-to-regexp* (cdr nums1) (make-list (length (cdr nums1)) 255))))
1177 (part2 (if (<= (1+ ch1) (1- ch2))
1178 (cons (regexp-charset-range-1 (1+ ch1) (1- ch2))
1179 (regexp-charset-range-to-regexp* (make-list len 160) (make-list len 255)))
1181 (part3 (cons ch2 (regexp-charset-range-to-regexp* (make-list len 160) (cdr nums2)))))
1183 (list (list ':or (cons ':seq part1) (cons ':seq part2) (cons ':seq part3)))
1184 (list (list ':or (cons ':seq part1) (cons ':seq part3)))))))))
1186 (defun regexp-charset-range-1 (from to)
1189 (TREX-push to result)
1191 (cons ':or result)))
1193 (defun regexp-charset-range-1* (from to)
1194 (if (not (<= from to)) nil
1195 (cons from (regexp-charset-range-1* (1+ from) to))))
1197 (defvar *regexp-charset-vector* nil)
1199 (defun regexp-expand-charset (mode charsets)
1200 (TREX-init *regexp-charset-vector* (make-vector 256 nil))
1203 (aset *regexp-charset-vector* i nil)
1206 (cond((numberp (car charsets))
1207 (aset *regexp-charset-vector* (car charsets) t))
1208 ((stringp (car charsets))
1209 (if (= (length (car charsets)) 1)
1210 (aset *regexp-charset-vector* (aref (car charsets) 0) t)
1211 (let ((list (TREX-string-to-list (car charsets))))
1212 (aset *regexp-charset-vector* (car list)
1213 (regexp-expand-charset-set-mark (cdr list)
1214 (aref *regexp-charset-vector* (car list)))))))
1215 ((and (consp (car charsets))
1216 (eq (car (car charsets)) ':range))
1217 (let ((from (aref (nth 1 (car charsets)) 0))
1218 (to (aref (nth 2 (car charsets)) 0)))
1223 (aset *regexp-charset-vector* char t)
1225 (let ((from-list (TREX-string-to-list (nth 1 (car charsets))))
1226 (to-list (TREX-string-to-list (nth 2 (car charsets)))))
1227 ;;;
\e$B$I$&$9$s$N!*
\e(B
1229 (setq charsets (cdr charsets)))
1233 (if (eq (aref *regexp-charset-vector* i) mode)
1234 (TREX-push (list i) result))
1239 (defun regexp-expand-charset-set-mark (chars alist)
1241 (let ((place (assoc (car chars) alist)))
1245 (regexp-expand-charset-set-mark (cdr chars) nil))
1249 (regexp-expand-charset-set-mark (cdr chars) (cdr place)))
1252 (defun regexp-expand-or (regexps)
1254 (append (regexp-expand-regexp (car regexps))
1255 (regexp-expand-or (cdr regexps)))
1258 (defun regexp-expand-seq (regexps)
1261 (let ((result (regexp-expand-regexp (car regexps))))
1262 (if (TREX-memequal nil result)
1263 (let ((newresult (regexp-expand-seq (cdr regexps))))
1264 (setq result (TREX-delete nil result))
1266 (TREX-push (append (car result) (cdr regexps)) newresult)
1267 (setq result (cdr result)))
1269 (let ((newresult nil))
1271 (TREX-push (append (car result) (cdr regexps)) newresult)
1272 (setq result (cdr result)))
1275 (defun regexp-expand-items (items)
1277 (append (regexp-expand-seq (car items))
1278 (regexp-expand-items (cdr items)))
1285 (defun regexp-make-island (items)
1286 (let ((result (TREX-delete-duplicate (regexp-expand-items items))))
1289 (cond((null (car l))
1292 (t (setq l (cdr l))))))
1295 (defun regexp-make-island-parallel (items)
1296 (regexp-sort (TREX-delete-duplicate (regexp-expand-items items))
1297 'regexp-item-lessp))
1300 ;;; Finate state Automaton:
1302 ;;; FA : Non-deterministic FA
1303 ;;; EFFA : Epsilon Free FA
1304 ;;; DFA : Deterministic FA
1307 ;;; DFA-optimize <- DFA <- EFFA <- NDFA <- regexp
1312 ;;; <FA> ::= ( <START> . <TransTables> )
1313 ;;; <TransTables> ::= ( <Node> . <TransTable> ) ...
1314 ;;; <TransTable> ::= ( <Key> . <Next> ) ...
1315 ;;; <Key> ::= <Char> | <Condition> | :epsilon
1318 (defvar *regexp-node-to-transtable* nil)
1319 (defvar *regexp-island-to-node* nil)
1320 (defvar *regexp-counter* 0)
1322 (defun FA-make (regexp)
1323 (setq *regexp-island-to-node* nil)
1324 (let ((*regexp-node-to-transtable* nil)
1325 ;;; (*regexp-island-to-node* nil)
1326 (*regexp-counter* 0))
1327 (let ((island (regexp-make-island (regexp-expand-regexp regexp))))
1328 (cons (FA-make-closure island) (nreverse *regexp-node-to-transtable*)))))
1330 (defun FA-make-closure (island)
1331 (if *regexp-debug* (princ (format "FA-make-closure %s\n" island)))
1332 (if (null island) nil
1333 (let ((place (assoc island *regexp-island-to-node*))
1335 (cond(place (cdr place))
1336 ;;; START_MEMORY and STOP_MEMORY
\e$B!JL5>r7o!$:GM%@h$GA+0\$9$k$b$N!K
\e(B
1337 ((setq pos (TREX-find-if 'regexp-item-key-class0 island))
1338 (let ((pre (TREX-firstn island pos))
1339 (item (nth pos island))
1340 (post (nthcdr (1+ pos) island)))
1341 (let* ((number (TREX-inc *regexp-counter*))
1342 (pair (cons (car item) nil))
1344 (place (cons number alist)))
1345 (TREX-push (cons island number) *regexp-island-to-node*)
1346 (TREX-push place *regexp-node-to-transtable*)
1349 (regexp-make-island (append pre (list (cdr item)) post))))
1351 ;;; BEGLINE, ENDLINE, WORDBEG, ....
\e$B!JD9$5#0$N$b$N!K
\e(B
1353 ;;; KEY --> TRUE+FALSE
1354 ;;; :epsilon --> FALSE
\e$B$H$J$k!%
\e(B
1355 ((setq pos (TREX-find-if 'regexp-item-key-class1 island))
1356 (let((key (car (nth pos island)))
1361 (let ((item (car items)))
1362 (if (equal key (car item))
1363 (TREX-push (cdr item) result-true)
1365 (TREX-push item result-true)
1366 (TREX-push item result-false))))
1367 (setq items (cdr items)))
1368 (setq result-true (nreverse result-true)
1369 result-false (nreverse result-false))
1370 (if (null result-false)
1371 (let* ((number (TREX-inc *regexp-counter*))
1372 (pair-true (cons key nil))
1373 (alist (list pair-true))
1374 (place (cons number alist)))
1375 (TREX-push (cons island number) *regexp-island-to-node*)
1376 (TREX-push place *regexp-node-to-transtable*)
1377 (setcdr pair-true (FA-make-closure (regexp-make-island result-true)))
1379 (let* ((number (TREX-inc *regexp-counter*))
1380 (pair-true (cons key nil))
1381 (pair-false (cons ':epsilon nil))
1382 (alist (list pair-true pair-false))
1383 (place (cons number alist)))
1384 (TREX-push (cons island number) *regexp-island-to-node*)
1385 (TREX-push place *regexp-node-to-transtable*)
1386 (setcdr pair-true (FA-make-closure (regexp-make-island result-true)))
1387 (setcdr pair-false (FA-make-closure (regexp-make-island result-false)))
1390 (FA-make-closure* island (FA-make-pre-alist island)))))))
1393 ;;;
\e$B$3$3$G07$&$N$O
\e(B class2,3,4
\e$B$N$_
\e(B
1395 (defun FA-make-closure* (island pre-alist)
1396 (if *regexp-debug* (princ (format "\nregexp-make-clousre* %s" pre-alist)))
1397 (let* ((number (TREX-inc *regexp-counter*))
1398 (place (cons number pre-alist)))
1399 (TREX-push (cons island number) *regexp-island-to-node*)
1400 (TREX-push place *regexp-node-to-transtable*)
1402 (let ((pair (car pre-alist)))
1404 (FA-make-closure (regexp-make-island (cdr pair)))))
1405 (setq pre-alist (cdr pre-alist)))
1409 ;;; PRE-ALIST ::= ( (key . items) ... )
1412 (defun FA-make-pre-alist (items)
1413 (let ((pre-alist nil))
1415 (let ((item (car items)))
1416 (cond((or (regexp-key-class2 (car item))
1417 (regexp-key-class3 (car item)))
1418 (let ((key (car item))
1420 (while (and items (equal key (car (car items))))
1421 (TREX-push (cdr (car items)) newitems)
1422 (setq items (cdr items)))
1423 (setq newitems (nreverse newitems))
1424 (TREX-push (cons key newitems) pre-alist)))
1426 (TREX-push (list nil) pre-alist)
1427 (setq items (cdr items)))
1428 ((regexp-key-class4 (car item))
1430 (while (and items (regexp-key-class4 (car (car items))))
1431 (let* ((newitem (car items))
1432 (place (assoc (car newitem) alist)))
1435 (cons (cdr newitem) (cdr place)))
1436 (TREX-push (cons (car newitem) (list (cdr newitem))) alist)))
1437 (setq items (cdr items)))
1438 (setq alist (sort alist 'TREX-lessp-car))
1441 (setcdr (car list) (nreverse (cdr (car list))))
1442 (setq list (cdr list)))
1443 (setq pre-alist (append alist pre-alist))
1445 (t (error "undefined items(%s)" item)))))
1446 (nreverse pre-alist)))
1452 (defun FA-inverse (FA)
1463 (let ((n (car (car l))))
1464 (if (< n minnode) (setq minnode n))
1465 (if (< maxnode n) (setq maxnode n)))
1467 (setq newstart (1- minnode))
1468 (setq newfinal (1+ maxnode))
1469 (setq newtable (FA-link newfinal nil nil newtable))
1471 (let* ((Snode (car table))
1472 (Snumber (car Snode))
1473 (Salist (cdr Snode)))
1475 (let* ((pair (car Salist))
1477 (Tnumber (cdr pair)))
1479 (setq newtable (FA-link newstart ':epsilon Snumber newtable)))
1481 (setq newtable (FA-link Tnumber key Snumber newtable))))
1482 (setq Salist (cdr Salist)))))
1483 (setq table (cdr table)))
1484 (setq newtable (FA-link start ':epsilon newfinal newtable))
1485 ;;;; FA
\e$B$N
\e(B final
\e$B$X
\e(B invFA
\e$B$N
\e(B start
\e$B$+$i
\e(B :epsilon link
\e$B$rD%$k!%
\e(B
1488 (setcdr (car l) (reverse (cdr(car l))))
1490 (setq newtable (sort newtable 'TREX-lessp-car))
1491 (cons newstart newtable)))
1493 (defun FA-link (from key to table)
1494 (let ((place (assoc from table)))
1495 (cond ((null place )
1496 (setq place (cons from nil))
1497 (TREX-push place table)))
1498 (setcdr place (cons (cons key to) (cdr place)))
1505 (defun FA-dump (table)
1506 (let ((start (car table))
1508 (princ (format "\nstart = %d\n" start))
1510 (princ (format "%3d: " (car (car l))))
1511 (let ((alist (cdr (car l))))
1512 (cond ((numberp (car (car alist)))
1513 (princ (format "%c -> %s\n" (car (car alist)) (cdr (car alist)))))
1514 ((and (consp (car (car alist))) (TREX-memequal (car (car (car alist))) '(CATEGORYSPEC NOTCATEGORYSPEC)))
1515 (princ (format "(%s %c) -> %s\n" (car (car (car alist))) (nth 1 (car (car alist))) (cdr (car alist)))))
1517 (princ (format "%s -> %s\n" (car (car alist)) (cdr (car alist))))))
1518 (setq alist (cdr alist))
1520 (cond ((numberp (car (car alist)))
1521 (princ (format " %c -> %s\n" (car (car alist)) (cdr (car alist)))))
1522 ((and (consp (car (car alist))) (TREX-memequal (car (car (car alist))) '(CATEGORYSPEC NOTCATEGORYSPEC)))
1523 (princ (format " (%s %c) -> %s\n" (car (car (car alist))) (nth 1 (car (car alist))) (cdr (car alist)))))
1525 (princ (format " %s -> %s\n" (car (car alist)) (cdr (car alist))))))
1526 (setq alist (cdr alist))))
1530 ;;; EFFA: Epsilon Free Finate Automaton
1533 (defvar *FA-table* nil)
1534 (defvar *EFFA-table* nil)
1536 (defun EFFA-make (FA)
1537 (let* ((start (car FA))
1538 (*FA-table* (cdr FA))
1541 (cons newstart (reverse (EFFA-make* start)))))
1543 (defun EFFA-make* (node)
1544 (let ((place (assoc node *EFFA-table*)))
1546 (let ((place (cons node nil)))
1547 (TREX-push place *EFFA-table*)
1549 (reverse (EFFA-make-alist nil (cdr (assoc node *FA-table*))
1551 (let ((alist (cdr place)))
1553 (cond((car (car alist))
1554 (EFFA-make* (cdr (car alist)))))
1555 (setq alist (cdr alist))))))))
1558 (defun EFFA-make-alist (newalist alist set)
1560 (let ((node (cdr (car alist))))
1561 (cond((eq (car (car alist)) ':epsilon)
1562 (cond((not (TREX-memequal node set))
1563 (TREX-push node set)
1565 (EFFA-make-alist newalist (cdr (assoc node *FA-table*)) set)))))
1567 (TREX-push (car alist) newalist))))
1568 (setq alist (cdr alist)))
1572 ;;; DFA: Deterministic Finate Automata
1575 (defvar *DFA-node-counter* nil)
1577 (defvar *DFA-node-definitions* nil
1578 "List of FD-nodes to node number")
1580 (defvar *DFA-table* nil
1581 "node number to alist")
1583 (defun DFA-make (EFFA)
1584 (let ((start (car EFFA))
1585 (*EFFA-table* (cdr EFFA))
1586 (*DFA-node-counter* 0)
1587 (*DFA-node-definitions* nil )
1589 (DFA-make-1 (list start))
1590 (cons (cdr (assoc (list start) *DFA-node-definitions*)) *DFA-table*)))
1592 (defun DFA-make-1 (states)
1593 (let ((place (assoc states *DFA-node-definitions*)))
1595 (TREX-inc *DFA-node-counter*)
1596 (setq place (cons states *DFA-node-counter*))
1597 (TREX-push place *DFA-node-definitions*)
1598 (let ((pair (cons *DFA-node-counter* nil)))
1599 (TREX-push pair *DFA-table*)
1600 (setcdr pair (DFA-make-pre-alist (DFA-collect-alist states)))
1601 (let ((alist (cdr pair)))
1603 (let ((top (car alist)))
1606 (DFA-make-1 (cdr top)))))
1607 (setq alist (cdr alist))))
1611 (defun DFA-collect-alist (states)
1614 (setq result (append (cdr (assoc (car states) *EFFA-table*)) result))
1615 (setq states (cdr states)))
1618 (defun DFA-make-pre-alist (oldAlist)
1619 (let ((pre-alist nil))
1621 (let ((oldKey (car (car oldAlist))))
1622 (cond((or (regexp-key-class0 oldKey)
1623 (regexp-key-class1 oldKey)
1624 (regexp-key-class2 oldKey)
1625 (regexp-key-class3 oldKey))
1628 (while (and oldAlist (equal key (car (car oldAlist))))
1629 (TREX-push (cdr (car oldAlist)) newAlist)
1630 (setq oldAlist (cdr oldAlist)))
1631 (setq newAlist (nreverse newAlist))
1632 (TREX-push (cons key newAlist) pre-alist)))
1633 ((regexp-key-class4 oldKey)
1635 (while (and oldAlist (regexp-key-class4 (car (car oldAlist))))
1636 (let ((place (assoc (car (car oldAlist)) alist)))
1639 (cons (cdr (car oldAlist)) (cdr place)))
1640 (TREX-push (cons (car (car oldAlist)) (list(cdr (car oldAlist)))) alist)))
1641 (setq oldAlist (cdr oldAlist)))
1642 (setq alist (sort alist 'TREX-lessp-car))
1645 (setcdr (car list) (reverse (cdr (car list))))
1646 (setq list (cdr list)))
1647 (setq pre-alist (append alist pre-alist))
1650 (TREX-push (list nil) pre-alist)
1651 (setq oldAlist (cdr oldAlist)))
1653 (setq oldAlist (cdr oldAlist))))))
1654 (nreverse pre-alist)))
1658 ;;;
\e$B$3$3$G$N:GE,2=$O>H9g=g=x$rJ]B8$9$k!%
\e(B
1659 ;;; longer match
\e$B$J$I$r$9$k>l9g$OJQ99$9$kI,MW$,$"$k!%
\e(B
1661 (defvar *DFA-optimize-debug* nil)
1663 (defvar *DFA-optimize-groups* nil)
1664 (defvar *DFA-optimize-node* 1)
1666 (defun DFA-optimize (FA)
1667 (if *DFA-optimize-debug* (terpri))
1668 (let* ((start (car FA))
1670 (*DFA-optimize-node* 1)
1671 (*DFA-optimize-groups*
1672 (list (cons *DFA-optimize-node* (mapcar 'car table)))))
1674 (catch 'DFA-optimize-changed
1675 (let ((groups *DFA-optimize-groups*))
1677 (if *DFA-optimize-debug*
1678 (princ (format "\nGroups to be checked: %s\n" groups)))
1679 (let* ((Sgroup (car groups))
1680 (Sgroup-number (car Sgroup))
1681 (oldgroup (cdr Sgroup))
1683 (Smembers oldgroup))
1684 (if *DFA-optimize-debug*
1685 (princ (format " Sgroup-number: %s = %s\n" Sgroup-number Smembers)))
1687 (let* ((Snumber (car Smembers))
1688 (Salist (cdr (assoc Snumber table))))
1689 (if *DFA-optimize-debug*
1690 (princ (format " Snumber: %s\n" Snumber)))
1691 (let ((Tmembers (cdr Smembers)))
1693 (if (not (eq Snumber (car Tmembers)))
1694 (let* ((Tnumber (car Tmembers))
1695 (Talist (cdr (assoc Tnumber table)))
1697 (if *DFA-optimize-debug*
1698 (princ (format " Tnumber: %s\n" Tnumber)))
1699 (while (and Talist Salist
1700 (equal (car (car Talist))
1701 (car (car Salist))) ;;; key
1702 (equal (DFA-optimize-group-number
1704 (DFA-optimize-group-number
1705 (cdr (car Salist))) ;;; next group
1707 (if *DFA-optimize-debug*
1709 (princ (format " Skey: %s -> %s(%s)\n"
1712 (DFA-optimize-group-number (cdr (car Salist)))))
1713 (princ (format " Tkey: %s -> %s(%s)\n"
1716 (DFA-optimize-group-number (cdr (car Talist)))))))
1717 (setq Talist (cdr Talist)
1718 Salist (cdr Salist)))
1719 (cond((or Talist Salist)
1720 (setq newgroup (cons Tnumber newgroup)
1721 oldgroup (TREX-delete Tnumber oldgroup))
1722 (if *DFA-optimize-debug*
1723 (princ(format " oldGroup : %s\n newGroup : %s\n" oldgroup newgroup)))))
1725 (setq Tmembers (cdr Tmembers)))))
1727 (if *DFA-optimize-debug*
1728 (princ (format "Changed :%s --> " Sgroup)))
1729 (setcdr Sgroup oldgroup)
1730 (if *DFA-optimize-debug*
1731 (princ (format "%s" Sgroup)))
1732 (TREX-inc *DFA-optimize-node*)
1733 (if *DFA-optimize-debug*
1734 (princ (format "+%s\n" (cons *DFA-optimize-node* newgroup))))
1735 (TREX-push (cons *DFA-optimize-node* newgroup) *DFA-optimize-groups*)
1736 (throw 'DFA-optimize-changed t)))
1737 (setq Smembers (cdr Smembers))))
1738 (setq groups (cdr groups))))))
1741 (if *DFA-optimize-debug*
1742 (princ (format "table: %s\n" table)))
1743 (if *DFA-optimize-debug*
1744 (princ (format "groups: %s\n" *DFA-optimize-groups*)))
1745 (let ((newtable nil)
1747 (groups *DFA-optimize-groups*))
1749 ;;; start node
\e$B$rC5$9
\e(B
1750 (let ((l *DFA-optimize-groups*))
1752 (cond((TREX-memequal start (cdr (car l)))
1753 (setq newstart (car (car l)))
1756 (setq l (cdr l))))))
1758 ;;;
\e$B?7$7$$
\e(B transTable
\e$B$r:n$k!%
\e(B
1760 (let* ((group (car groups))
1761 (group-number (car group))
1762 (member-number (car (cdr group)))
1763 (member-alist (cdr (assoc member-number table))))
1764 (TREX-push (cons group-number
1765 (let ((group-alist nil))
1767 (let ((Mkey (car (car member-alist)))
1768 (Mnext (cdr (car member-alist))))
1769 (TREX-push (cons Mkey (DFA-optimize-group-number Mnext))
1771 (setq member-alist (cdr member-alist)))
1772 (nreverse group-alist)))
1774 (setq groups (cdr groups))))
1775 (cons newstart newtable))))
1777 (defun DFA-optimize-group-number (node)
1778 (let ((l *DFA-optimize-groups*) (result nil))
1780 (cond((TREX-memequal node (cdr (car l)))
1781 (setq result (car (car l))
1783 (t (setq l (cdr l)))))
1786 (defun DFA-optimize-parallel (FA)
1787 (if *DFA-optimize-debug* (terpri))
1788 (let* ((start (car FA))
1790 (*DFA-optimize-node* 1)
1791 (*DFA-optimize-groups*
1792 (list (cons *DFA-optimize-node* (mapcar 'car table)))))
1794 (catch 'DFA-optimize-changed
1795 (let ((groups *DFA-optimize-groups*))
1797 (if *DFA-optimize-debug*
1798 (princ (format "\nGroups to be checked: %s\n" groups)))
1799 (let* ((Sgroup (car groups))
1800 (Sgroup-number (car Sgroup))
1801 (oldgroup (cdr Sgroup))
1803 (Smembers oldgroup))
1804 (if *DFA-optimize-debug*
1805 (princ (format " Sgroup-number: %s = %s\n" Sgroup-number Smembers)))
1807 (let* ((Snumber (car Smembers))
1808 (Salist (cdr (assoc Snumber table))))
1809 (if *DFA-optimize-debug*
1810 (princ (format " Snumber: %s\n" Snumber)))
1812 (let* ((Spair (car Salist))
1815 (Snext-group (DFA-optimize-group-number Snext))
1816 (Tmembers oldgroup))
1817 (if *DFA-optimize-debug*
1818 (princ (format " Skey: %s -> %s(%s)\n" Skey Snext-group Snext)))
1820 (if (not (eq Snumber (car Tmembers)))
1821 (let* ((Tnumber (car Tmembers))
1822 ;;;
\e$BMW:F8!F$
\e(B
1823 (Tpair (assoc Skey (cdr (assoc Tnumber table))))
1825 (Tnext-group (DFA-optimize-group-number (cdr Tpair))))
1826 (if *DFA-optimize-debug*
1827 (princ (format " Tnumber: %s : %s -> %s(%s)\n" Tnumber (car Tpair)
1828 (DFA-optimize-group-number (cdr Tpair))(cdr Tpair))))
1829 (cond((and (equal Spair '(nil))
1830 (equal Tpair '(nil))))
1831 ((and Skey (equal Snext-group Tnext-group)))
1833 (TREX-push Tnumber newgroup)
1834 (setq oldgroup (TREX-delete Tnumber oldgroup))
1835 (if *DFA-optimize-debug*
1836 (princ(format (format " oldGroup : %s\n newGroup : %s\n" oldgroup newgroup))))
1838 (setq Tmembers (cdr Tmembers)))
1840 (if *DFA-optimize-debug*
1841 (princ (format "Changed :%s --> " Sgroup)))
1842 (setcdr Sgroup oldgroup)
1843 (if *DFA-optimize-debug*
1844 (princ (format "%s" Sgroup)))
1845 (TREX-inc *DFA-optimize-node*)
1846 (if *DFA-optimize-debug*
1847 (princ (format "+%s\n" (cons *DFA-optimize-node* newgroup))))
1848 (TREX-push (cons *DFA-optimize-node* newgroup) *DFA-optimize-groups*)
1849 (throw 'DFA-optimize-changed t))))
1850 (setq Salist (cdr Salist))))
1851 (setq Smembers (cdr Smembers))))
1852 (setq groups (cdr groups))))))
1855 (if *DFA-optimize-debug*
1856 (princ (format "table: %s\n" table)))
1857 (if *DFA-optimize-debug*
1858 (princ (format "groups: %s\n" *DFA-optimize-groups*)))
1859 (let ((newtable nil)
1861 (groups *DFA-optimize-groups*))
1863 ;;; start node
\e$B$rC5$9
\e(B
1864 (let ((l *DFA-optimize-groups*))
1866 (cond((TREX-memequal start (cdr (car l)))
1867 (setq newstart (car (car l)))
1870 (setq l (cdr l))))))
1872 ;;;
\e$B?7$7$$
\e(B transTable
\e$B$r:n$k!%
\e(B
1874 (let* ((group (car groups))
1875 (group-number (car group))
1876 (member-number (car (cdr group)))
1877 (member-alist (cdr (assoc member-number table))))
1878 (TREX-push (cons group-number
1879 (let ((group-alist nil))
1881 (let ((Mkey (car (car member-alist)))
1882 (Mnext (cdr (car member-alist))))
1883 (TREX-push (cons Mkey
1885 (cons (DFA-optimize-group-number (car Mnext))
1886 (DFA-optimize-group-number (cdr Mnext)))
1887 (DFA-optimize-group-number Mnext)))
1889 (setq member-alist (cdr member-alist)))
1892 (setq groups (cdr groups))))
1893 (cons newstart newtable))))
1898 ;;; Non Empty Finite Automata
1901 (defun NEFA-make (EFFA)
1902 (let* ((start (car EFFA))
1904 (Salist (cdr (assoc start table))))
1905 (cond((equal Salist '((nil)))
1907 ((and (assoc nil Salist)
1909 (while (and Salist (not (equal start (cdr (car Salist)))))
1910 (setq Salist (cdr Salist)))
1916 (if (< (car (car l)) min)
1917 (setq min (car (car l))))
1918 (if (< max (car (car l)))
1919 (setq max (car (car l))))
1921 (let* ((newstart (1- min))
1922 (newtable (copy-alist table))
1923 (oldSalist (cdr (assoc start table)))
1924 (newSalist (TREX-delete '(nil) (copy-alist oldSalist))))
1926 (cons (cons newstart newSalist) newtable)))))
1934 (defvar *FA-simplify-table* nil)
1936 (defun FA-simplify (FA)
1937 (let ((start (car FA))
1940 (*FA-simplify-table* nil))
1941 (FA-simplify-mark start table)
1942 (while *FA-simplify-table*
1943 (TREX-push (assoc (car *FA-simplify-table*) table) newtable)
1944 (setq *FA-simplify-table* (cdr *FA-simplify-table*)))
1945 (cons start newtable)))
1947 (defun FA-simplify-mark (node table)
1948 (cond ((not (TREX-memequal node *FA-simplify-table*))
1949 (TREX-push node *FA-simplify-table*)
1950 (let ((alist (cdr (assoc node table))))
1952 (cond((car (car alist))
1953 (FA-simplify-mark (cdr (car alist)) table)))
1954 (setq alist (cdr alist)))))))
1957 ;;; Shortest match DFA
1960 (defun DFA-shortest-match (DFA)
1961 (let ((start (car DFA))
1965 (cond ((assoc nil (cdr (car table)))
1966 (TREX-push (cons (car (car table)) '((nil))) newtable))
1968 (TREX-push (car table) newtable)))
1969 (setq table (cdr table)))
1970 (cons start newtable)))
1973 ;;; Fastmap computation
1976 (defvar *DFA-fastmap-chars* nil)
1977 (defvar *DFA-fastmap-syntax* nil)
1978 (defvar *DFA-fastmap-category* nil)
1979 (defvar *DFA-fastmap-init* 0 )
1980 (defvar *DFA-fastmap-pos* 1 ) ;;; SYNTAXSPEC or CATEGORYSPEC
1981 (defvar *DFA-fastmap-neg* 2 ) ;;; NOTSYNTAXSPEC or NOTCATEGORYSPEC
1983 ;;;;
\e$B$9$Y$F$N
\e(B char
\e$B$OB~0l$D$N
\e(B syntaxspec
\e$B$KB0$9$k
\e(B
1984 ;;;; ==> syntaxspec(ch) and notsyntaxspec(ch) --> all char
1985 ;;;; ==> notsyntaxspec(ch1) and notsyntaxspec(ch2) --> all char
1986 ;;;; ==> notsyntaxspec(ch1) and syntaxspec(ch2) == notsyntaxspec(ch1)
1987 ;;;;
\e$B$D$^$j
\e(B notsyntaxspec
\e$B$O9b!9#1$D$7$+$J$$!%
\e(B
1989 ;;; Returns [ CODE FASTMAP SYNTAX-FASTMAP CATEGOY-FASTMAP ]
1991 (defun DFA-code-with-fastmap (DFA)
1992 (TREX-init *DFA-fastmap-chars* (make-vector 256 nil))
1993 (TREX-init *DFA-fastmap-syntax* (make-vector 256 nil))
1994 (TREX-init *DFA-fastmap-category* (make-vector 256 nil))
1995 (let ((code (regexp-code-gen DFA))
1997 (*DFA-fastmap-table* (cdr DFA))
1998 (*DFA-fastmap-mark* nil)
1999 (*DFA-fastmap-special* nil))
2002 (aset *DFA-fastmap-chars* i nil)
2003 (aset *DFA-fastmap-syntax* i nil)
2004 (aset *DFA-fastmap-category* i nil)
2006 (DFA-fastmap-collect start)
2007 (let ((fastmap (if *DFA-fastmap-special*
2008 nil ;;;(make-string 256 1)
2009 (make-string 256 0)))
2011 (syntax (if *DFA-fastmap-special*
2013 (make-string 256 0)))
2015 (notsyntax-entries 0)
2016 (category (if *DFA-fastmap-special*
2018 (make-string 256 0)))
2019 (category-entries 0))
2020 (let ((result (make-vector 4 nil)))
2021 (aset result 0 code)
2022 (if *DFA-fastmap-special*
2024 (aset result 1 fastmap)
2025 (aset result 2 syntax)
2026 (aset result 3 category))
2030 (if (aref *DFA-fastmap-chars* i)
2032 (TREX-inc fastmap-entries)
2033 (aset fastmap i 1)))
2035 (cond((null (aref *DFA-fastmap-syntax* i))
2037 ((eq (aref *DFA-fastmap-syntax* i) 'SYNTAXSPEC)
2038 (TREX-inc syntax-entries)
2040 ((eq (aref *DFA-fastmap-syntax* i) 'NOTSYNTAXSPEC)
2041 (TREX-inc notsyntax-entries)
2042 (TREX-inc syntax-entries)
2043 *DFA-fastmap-neg*)))
2045 (cond((null (aref *DFA-fastmap-category* i))
2047 ((eq (aref *DFA-fastmap-category* i) 'CATEGORYSPEC)
2048 (TREX-inc category-entries)
2050 ((eq (aref *DFA-fastmap-category* i) 'NOTCATEGORYSPEC)
2051 (TREX-inc category-entries)
2052 *DFA-fastmap-neg*)))
2055 (cond((<= 2 notsyntax-entries)
2056 (setq fastmap (make-string 256 1)
2059 ((= 1 notsyntax-entries)
2062 (if (= (aref syntax ch) *DFA-fastmap-neg*)
2063 (aset syntax ch *DFA-fastmap-init*)
2064 (aset syntax ch *DFA-fastmap-pos*))
2066 (aset result 1 fastmap)
2067 (aset result 2 syntax)
2068 (aset result 3 category)))
2071 (defun DFA-fastmap-collect (node)
2072 (if (TREX-memequal node *DFA-fastmap-mark*) nil
2073 (let ((alist (cdr (assoc node *DFA-fastmap-table*))))
2074 (TREX-push node *DFA-fastmap-mark*)
2076 (let ((key (car (car alist))))
2078 (aset *DFA-fastmap-chars* key t))
2079 ((symbolp key);;; can be null
2080 (setq *DFA-fastmap-special* t))
2082 (let ((op (car key)))
2084 ((TREX-memequal op '(START_MEMORY STOP_MEMORY))
2085 (DFA-fastmap-collect (cdr (car alist))))
2086 ((TREX-memequal op '(SYNTAXSPEC NOTSYNTAXSPEC))
2087 (let ((specch (syntax-code-spec (nth 1 key))))
2088 (cond((null (aref *DFA-fastmap-syntax* (nth 1 key)))
2089 (aset *DFA-fastmap-syntax* specch op))
2090 ((not (eq (aref *DFA-fastmap-syntax* specch) op))
2091 (setq *DFA-fastmap-special* t)))))
2092 ((TREX-memequal op '(CATEGORYSPEC NOTCATEGORYSPEC))
2093 (let ((specch (nth 1 key)))
2094 (cond((null (aref *DFA-fastmap-category* specch))
2095 (aset *DFA-fastmap-category* specch op))
2096 ((not (eq (aref *DFA-fastmap-category* specch) op))
2097 (setq *DFA-fastmap-special* t)))))
2098 ((TREX-memequal op '(CHARSET CHARSET_NOT))
2099 (let ((list (cdr key)))
2101 (let ((from nil) (to nil))
2102 (cond((stringp (car list))
2103 (setq from (aref (car list) 0)
2104 to (aref (car list) 0)))
2106 (setq from (aref (nth 1 (car list)) 0)
2107 to (aref (nth 2 (car list)) 0))))
2109 (cond((null (aref *DFA-fastmap-chars* from))
2110 (aset *DFA-fastmap-chars* from
2111 (if (eq op 'CHARSET_NOT) 'CHARSET_NOT
2114 (setq list (cdr list))))
2115 (if (eq op 'CHARSET_NOT)
2118 (cond((null (aref *DFA-fastmap-chars* i))
2119 (aset *DFA-fastmap-chars* i t))
2120 ((eq (aref *DFA-fastmap-chars* i) 'CHARSET_NOT)
2121 (aset *DFA-fastmap-chars* i nil)))
2124 (setq *DFA-fastmap-special* t)))))))
2125 (setq alist (cdr alist))))))
2128 ;;;
\e$B@55,I=8=%3!<%I$NL?NaI=
\e(B
2131 (if (= regexp-version 19)
2138 START_MEMORY ;;; 18*
2146 JUMP_PAST_ALT ;;; 19
2147 ON_FAILURE_JUMP ;;; 18
2148 ON_FAILURE_KEEP_STRING_JUMP ;;; 19
2150 ;;;; maybe_finalize_jump
2151 POP_FAILURE_JUMP ;;; 19
2152 MAYBE_POP_JUMP ;;; 19
2153 DUMMY_FAILURE_JUMP ;;; 18
2154 PUSH_DUMMY_FAILURE ;;; 19
2157 SET_NUMBER_AT ;;; 19
2168 NOTSYNTAXSPEC ;;; 18
2186 ;; else regexp-version == 18.
2203 BEFORE_DOT ;;; not used
2205 AFTER_DOT ;;; not used
2208 WORDCHAR ;;; not used
2209 NOTWORDCHAR ;;; not used
2217 ;;; extended instructions
2225 SUCCESS_SHORT ;;; == ON_FAILURE_SUCCESS
2234 (defvar ON_FAILURE_SUCCESS SUCCESS_SHORT)
2237 ;;; ANYCHAR = EXCEPT1 \n
2238 ;;; ALLCHAR = EXCEPT0
2242 ;;;
\e$B@55,I=8=>H9g4o$NL?NaBN7O
\e(B
2245 ;;; EXACTN n ch1 ch2 ... chn
2249 ;;; +JUMP_PAST_ALT disp[2]
2250 ;;; ON_FAILURE_JUMP disp[2]
2251 ;;; +ON_FAILURE_KEEP_STRING_JUMP disp[2]
2252 ;;; -FINALIZE_JUMP disp[2]
2253 ;;; -MAYBE_FINALIZE_JUMP disp[2]
2254 ;;; +POP_FAILURE_JUMP disp[2]
2255 ;;; +MAYBE_POP_JUMP disp[2]
2256 ;;; DUMMY_FAILURE_JUMP disp[2]
2257 ;;; +PUSH_DUMMY_FAILURE
2258 ;;; +SUCCEED_N disp[2] n[2]
2259 ;;; +JUMP_N disp[2] n[2]
2260 ;;; +SET_NUMBER_AT disp[2] n[2]
2262 ;;; CHARSET n b1 b2 ... bn
2263 ;;;**CHARSET 0xff l1 l2 cf1 ct1 cf2 ct2 ... cfn ctn
2264 ;;; CHARSET_NOT n b1 b2 ... bn
2265 ;;;**CHARSET_NOT 0xff l1 l2 cf1 ct1 cf2 ct2 ... cfn ctn
2266 ;;;
\e$B0J2<$O$($J$_;a$NDs0F$K$h$k?7$?$J%;%^%s%F%#%C%/%9
\e(B
2268 ;;; CHARSET n b1 b2 ... bn (n < 0x80)
2269 ;;; CHARSET n+0x80 b1 b2 ... bn
2270 ;;; |<-- n bytes -->|
2271 ;;; lh lo CHARF1 CHART1 .... CHARFm CHARTm
2272 ;;; |<- lh << 8 + lo bytes ->|
2273 ;; CHARSET n b1 b2 ... bn lh lo cf1 ct1 cf2 ct2 ... cfn ctn
2274 ;; |<- bitmap ->| |<- range table ->|
2275 ;; CHARSET_NOT n b1 b2 ... bn lh lo cf1 ct1 cf2 ct2 ... cfn ctn
2276 ;; CHARSETM m n b1 b2 ... bn lh lo cf1 ct1 cf2 ct2 ... cfn ctn
2277 ;; CHARSETM_NOT m n b1 b2 ... bn lh lo cf1 ct1 cf2 ct2 ... cfn ctn
2279 ;; o cfx, ctx
\e$B0J30$O$9$Y$F
\e(B 1byte. cfx, ctx
\e$B$O
\e(B multi byte
2282 ;; o CHARSET(_NOT)
\e$B$H
\e(B CHARSETM(_NOT)
\e$B$H$N0c$$$O
\e(B, CHARSETM(_NOT)
2283 ;;
\e$B$N>l9g$K$O
\e(B bitmap
\e$B$N@hF,$N
\e(B m bytes
\e$B$,>J$+$l$F$$$kE@
\e(B.
2285 ;; o b1 ... bn (
\e$B$D$^$j
\e(B bitmap
\e$B$ND9$5
\e(B)
\e$B$O
\e(B, (n & 0x7f) bytes. n
\e$B$N
\e(B
2286 ;;
\e$BJ,
\e(B 1byte
\e$B$O4^$^$J$$
\e(B.
2288 ;; o lh
\e$B0J2<$O
\e(B n & 0x80
\e$B$,
\e(B 0
\e$B$J$iB8:_$7$J$$
\e(B.
2290 ;; o lh
\e$B$+$i
\e(B ctn
\e$B$^$G$ND9$5
\e(B(
\e$B$D$^$j
\e(B range table
\e$B$ND9$5
\e(B)
\e$B$O
\e(B ((lh
2291 ;; << 8) + lo) byte. lh
\e$B$H
\e(B lo
\e$B$N
\e(B 2byte
\e$B$r4^$`
\e(B. (
\e$B>e$N
\e(B n
\e$B$N>l
\e(B
2292 ;;
\e$B9g$H0c$$$^$9$,
\e(B,
\e$BE}0l$7$?$[$&$,$$$$$+$J
\e(B?).
2294 ;; o cfx
\e$B$O
\e(B multi byte character
\e$B$G
\e(B, cfx
\e$B$H
\e(B ctx
\e$B$N
\e(B leading char
2295 ;;
\e$B$OF1$8$G$J$$$H$$$1$J$$
\e(B.
\e$B$^$?
\e(B, cfx
\e$B$N
\e(B leading char
\e$B$O
\e(B 0
\e$B$G
\e(B
2296 ;;
\e$B$"$C$F$O$$$1$J$$
\e(B(range table
\e$B$K
\e(B leading char
\e$B$,
\e(B 0 (ASCII
\e$B$H
\e(B
2297 ;;
\e$B$+
\e(B)
\e$B$NJ8;z$,$"$C$F$b
\e(B,
\e$B8=:_$O
\e(B fastmap
\e$B$KH?1G$5$l$J$$$+$i
\e(B).
2299 ;;; START_MEMORY regno
2300 ;;; STOP_MEMORY regno
2301 ;;; o emacs 19
\e$B$N
\e(B regex.c
\e$B$G$O
\e(B,
2302 ;;; START_MEMORY regno groupno
2303 ;;; STOP_MEMORY regno groupno
2304 ;;; groupno
\e$B$O<+J,$h$j2<$N%l%Y%k$N%0%k!<%W$N?t
\e(B
2307 ;;; BEFORE_DOT ;;; not used
2308 ;;; AT_DOT ;;; not used
2309 ;;; AFTER_DOT ;;; not used
2312 ;;; WORDCHAR ;;; not used
2313 ;;; NOTWORDCHAR ;;; not used
2319 ;;; NOTSYNTAXSPEC ch
2322 ;;;
\e$B3HD%L?Na!J
\e(BTREX
\e$B$G;HMQ$9$k$b$N!K
\e(B
2326 ;;; EXACT3 ch1 ch2 ch3
2327 ;;; CHARSETM m n b1 b2 .. bn
2328 ;;; charset
\e$B$N
\e(B bitmaps
\e$B$N$&$A@hF,$N
\e(B m bytes
\e$B$r>J$$$?$b$N
\e(B
2329 ;;; CHARSETM_NOT m n b1 b2 .. bn
2330 ;;; charset_not
\e$B$N
\e(B bitmaps
\e$B$N$&$A@hF,$N
\e(B m bytes
\e$B$r>J$$$?$b$N
\e(B
2331 ;;; CASEN n disp[1] disp[2] ... disp[n] l u ind[l] ... ind[u]
2332 ;;;
\e$B:G=i$K
\e(B n
\e$B8D$N
\e(B jump relative address(2bytes)
\e$B$,B3$-!$
\e(B
2333 ;;;
\e$B<!$K
\e(Bcharacter code l
\e$B$+$i
\e(B m
\e$B$^$G$NJ,$N
\e(Bindex(1byte)
\e$B$,B3$/!%
\e(B
2334 ;;; ON_FAILURE_SUCCESS
2335 ;;; alternative stack
\e$B$r6u$K$7!$
\e(Bpend
\e$B$r
\e(B push
\e$B$9$k!%
\e(B
2337 ;;; pend
\e$B$X%8%c%s%W$9$k!%
\e(B
2339 ;;; alternative stack
\e$B$r
\e(B pop
\e$B$9$k!%
\e(B
2342 ;;; RANGE_A == RANGE 0xA0 0xFF
2345 ;;; [^
\e$B&A
\e(B]
\e$B&B
\e(B\|
\e$B&C
\e(B
\e$B$N0UL#!'
\e(B
2346 ;;; on_failure_jump L1
2347 ;;; on_failure_jump L2
2360 (defvar *regexp-code-buffer* (get-buffer-create " *regexp-code-buffer*"))
2362 (defun regexp-code-gen (FA)
2363 (let ((start (car FA))
2370 (while (and list (null *final*))
2371 (if (equal '((nil)) (cdr (car list)))
2372 (setq *final* (car (car list))))
2373 (setq list (cdr list))))
2374 (cond((null *final*)
2375 (setq *final* (1+ (length table)))
2376 (setq *counter* (1+ *final*)))
2378 (setq *counter* (1+ (length table)))))
2380 (set-buffer *regexp-code-buffer*)
2381 (let ((kanji-flag nil)
2384 (regexp-code-gen* start)
2385 (buffer-substring (point-min) (point-max)))
2388 (defun regexp-code-gen* (node)
2389 (cond((= node *final*)
2390 (if (null (assoc node *labels*))
2391 (TREX-push (cons node (point)) *labels*))
2393 ((null (assoc node *labels*))
2394 (TREX-push (cons node (point)) *labels*)
2395 (let ((alist (cdr (assoc node *table*))))
2396 (cond((equal '((nil)) alist)
2398 (t (regexp-code-gen-alist alist)))))
2400 (let ((disp (- (cdr (assoc node *labels*)) (+ (point) 3))))
2403 (/ (logand disp (* 255 256)) 256))))))
2405 (defvar *regexp-charset-table* nil)
2406 (defvar *regexp-case-table* nil)
2408 (defun regexp-code-gen-alist (alist)
2409 (TREX-init *regexp-charset-table* (make-vector 256 nil))
2410 (TREX-init *regexp-case-table* (make-vector 256 nil))
2411 (if (eq (car (car alist)) nil)
2413 (let ((nextalist alist)
2416 (min 256) (max -1) (nexts nil) (nodealist nil))
2417 (cond((numberp (car (car alist)))
2421 (aset *regexp-case-table* i nil)
2424 (while (and nextalist
2425 (numberp (car (car nextalist))))
2426 (let ((ch (car (car nextalist)))
2427 (next (cdr (car nextalist))))
2428 (let ((place (assoc next nodealist)))
2431 (cons ch (cdr place)))
2432 (TREX-push (cons ch (list next)) nodealist)))
2433 (aset *regexp-case-table* ch next)
2434 (if (< ch min) (setq min ch))
2435 (if (< max ch) (setq max ch))
2436 (if (not (TREX-memequal next nexts))
2437 (TREX-push next nexts)))
2438 (setq nextalist (cdr nextalist))))
2439 (t (setq nextalist (cdr alist))))
2442 (cond((eq (car (car nextalist)) nil)
2443 (insert ON_FAILURE_SUCCESS )) ;;; SUCCESS_SHORT
2445 (insert ON_FAILURE_JUMP 0 0)
2446 (setq point (point)))))
2451 (regexp-code-gen-exact (list min) (car nexts)))
2453 ((= (length nexts) 1)
2454 ;;; charset or charset_not
2455 (if (= (length alist) 256)
2456 (insert EXCEPT0) ;92.10.26 by T.Saneto
2462 (cond((null (aref *regexp-case-table* ch))
2463 (if (< ch not_min) (setq not_min ch))
2464 (if (< not_max ch) (setq not_max ch))))
2466 (if (<= (- not_max not_min) (- max min))
2470 (let ((minb (/ min 8))
2471 (maxb (1+ (/ max 8))))
2472 (insert (if mode CHARSET_M CHARSET_M_NOT) minb (- maxb minb))
2475 (let ((i 7) (bits 0))
2477 (if (eq (aref *regexp-case-table* (+ (* 8 b) i))
2479 ;;;; bits table
\e$B$N=g=x$O<!$NDL$j
\e(B
2480 (TREX-inc bits (aref [1 2 4 8 16 32 64 128] i)))
2484 (regexp-code-gen* (car nexts)))
2489 (insert (length nexts))
2490 (setq point (point))
2494 (setq list (cdr list))))
2498 (if (aref *regexp-case-table* ch)
2499 (insert (1+ (TREX-find (aref *regexp-case-table* ch) nexts)))
2504 (if (null (assoc (car list) *labels*))
2505 (regexp-code-gen* (car list)))
2506 (setq list (cdr list))))
2512 (let ((disp (- (cdr (assoc (car list) *labels*)) (+ (point) 2))))
2513 (insert (logand disp 255)
2514 (/ (logand disp (* 255 256)) 256)))
2515 (setq list (cdr list)))))
2517 ((eq (car (car alist)) ':epsilon)
2518 (regexp-code-gen* (cdr (car alist))))
2520 (let ((key (car (car alist)))
2521 (next (cdr (car alist))))
2522 (cond ((symbolp key)
2523 (insert (eval key)))
2524 ((TREX-memequal (car key) '(CHARSET CHARSET_NOT))
2525 (let ((charset (cdr key))
2531 (aset *regexp-charset-table* i nil)
2534 (cond((stringp (car charset))
2535 (cond((eq (length (car charset)) 1)
2536 (aset *regexp-charset-table* (aref (car charset) 0) t)
2537 (if (< (aref (car charset) 0) min)
2538 (setq min (aref (car charset) 0)))
2539 (if (< max (aref (car charset) 0))
2540 (setq max (aref (car charset) 0)))
2543 (TREX-inc mcbytes (* 2 (length (car charset))))
2544 (if (null mcchars) (setq mcchars charset))
2546 ((consp (car charset)) ;;; range
2547 (cond ((eq (length (nth 1 (car charset))) 1)
2548 (let ((from (aref (nth 1 (car charset)) 0))
2549 (to (aref (nth 2 (car charset)) 0)))
2550 (if (< from min) (setq min from))
2551 (if (< max to) (setq max to))
2553 (aset *regexp-charset-table* from t)
2558 (+ (length (nth 1 (car charset))) (length (nth 2 (car charset)))))
2559 (if (null mcchars) (setq mcchars charset))))))
2560 (setq charset (cdr charset)))
2562 (insert (if (eq (car key) 'CHARSET) CHARSET CHARSET_NOT)
2563 (if (< 0 mcbytes) 128 0)))
2565 (let ((minb (/ min 8))
2566 (maxb (1+ (/ max 8))))
2567 (insert (if (eq (car key) 'CHARSET) CHARSET_M CHARSET_M_NOT)
2568 minb (+ (if (< 0 mcbytes) 128 0) (- maxb minb)))
2571 (let ((i 7) (bits 0))
2573 (if (aref *regexp-charset-table* (+ (* 8 b) i))
2574 ;;;; bits table
\e$B$N=g=x$O<!$NDL$j
\e(B
2575 (TREX-inc bits (aref [1 2 4 8 16 32 64 128] i)))
2580 (cond( (< 0 mcbytes)
2581 (TREX-inc mcbytes 2)
2582 (insert (/ mcbytes 256) (mod mcbytes 256))
2584 (cond((stringp (car mcchars))
2585 (insert (car mcchars) (car mcchars)))
2586 ((consp (car mcchars))
2587 (insert (nth 1 (car mcchars)) (nth 2 (car mcchars)))))
2588 (setq mcchars (cdr mcchars)))))
2591 (insert (eval (car key))))
2593 (insert (eval (car key)) (nth 1 key)))
2595 (insert (eval (car key)) (nth 1 key) (nth 2 key)))
2598 (regexp-code-gen* next))))
2600 (let ((disp (- (point) point)))
2604 (insert (logand disp 255)
2605 (/ (logand disp (* 255 256)) 256)))
2606 (regexp-code-gen-alist nextalist))))))
2608 (defun regexp-code-gen-exact (chars node)
2609 (let ((alist (cdr (assoc node *table*))))
2610 (cond((and (null (assoc node *labels*))
2611 (= (length alist) 1)
2612 (numberp (car (car alist))))
2613 (regexp-code-gen-exact (cons (car (car alist)) chars)
2616 (regexp-code-gen-exact* (reverse chars))
2617 (regexp-code-gen* node)))))
2619 (defun regexp-code-gen-exact* (chars)
2620 (cond((= (length chars) 1)
2621 (insert EXACT1 (car chars)))
2622 ((= (length chars) 2)
2623 (insert EXACT2 (car chars) (nth 1 chars)))
2624 ((= (length chars) 3)
2625 (insert EXACT3 (car chars) (nth 1 chars) (nth 2 chars)))
2627 (insert EXACTN (length chars))
2631 (setq list (cdr list)))))))
2634 ;;; regexp-code-dump
2635 ;;;
\e$B@55,I=8=$N%3!<%I$rI=<($9$k!%
\e(B
2638 (defvar *regexp-code-dump* nil)
2639 (defvar *regexp-code-index* nil)
2641 (defun regexp-code-dump (*regexp-code-dump*)
2643 (let ((*regexp-code-index* 0)
2644 (max (length *regexp-code-dump*)))
2645 (while (< *regexp-code-index* max)
2646 (princ (format "%4d:" *regexp-code-index*))
2647 (let((op (aref *regexp-code-dump* *regexp-code-index*)))
2648 (cond((= op UNUSED) (regexp-code-dump-0 "unused"))
2650 (princ (format "exactn(%d) " (aref *regexp-code-dump* (1+ *regexp-code-index*))))
2651 (let ((j (+ *regexp-code-index* 2))
2652 (max (+ *regexp-code-index* 2 (aref *regexp-code-dump* (1+ *regexp-code-index*)))))
2654 (princ (format "%c" (aref *regexp-code-dump* j)))
2656 (setq *regexp-code-index* j))
2659 ((= op BEGLINE) (regexp-code-dump-0 "begline"))
2660 ((= op ENDLINE) (regexp-code-dump-0 "endline"))
2661 ((= op JUMP) (regexp-code-dump-jump "jump"))
2662 ((and (= regexp-version 19)
2663 (= op JUMP_PAST_ALT))
2664 (regexp-code-dump-jump "jump_past_alt"))
2665 ((= op ON_FAILURE_JUMP ) (regexp-code-dump-jump "on_failure_jump"))
2666 ((and (= regexp-version 19)
2667 (= op ON_FAILURE_KEEP_STRING_JUMP))
2668 (regexp-code-dump-jump "on_failure_keep_string_jump"))
2669 ((and (= regexp-version 18)
2670 (= op FINALIZE_JUMP))
2671 (regexp-code-dump-jump "finalize_jump"))
2672 ((and (= regexp-version 18)
2673 (= op MAYBE_FINALIZE_JUMP))
2674 (regexp-code-dump-jump "maybe_finalize_jump"))
2675 ((and (= regexp-version 19)
2676 (= op POP_FAILURE_JUMP))
2677 (regexp-code-dump-jump "pop_failure_jump"))
2678 ((and (= regexp-version 19)
2679 (= op MAYBE_POP_JUMP))
2680 (regexp-code-dump-jump "maybe_pop_jump"))
2681 ((= op DUMMY_FAILURE_JUMP) (regexp-code-dump-jump "dummy_failure_jump"))
2682 ((and (= regexp-version 19)
2683 (= op PUSH_DUMMY_FAILURE))
2684 (regexp-code-dump-0 "push_dummy_failure"))
2685 ((and (= regexp-version 19)
2687 (regexp-code-dump-jump-2 "succeed_n"))
2688 ((and (= regexp-version 19)
2690 (regexp-code-dump-jump-2 "jump_n"))
2691 ((and (= regexp-version 19)
2692 (= op SET_NUMBER_AT))
2693 (regexp-code-dump-jump-2 "SET_NUMBER_AT"))
2694 ((= op ANYCHAR) (regexp-code-dump-0 "anychar"))
2695 ((= op CHARSET) (regexp-code-dump-charset "charset"))
2696 ((= op CHARSET_NOT) (regexp-code-dump-charset "charset_not"))
2697 ((= op START_MEMORY)
2698 (if (= regexp-version 19)
2699 (regexp-code-dump-2 "start_memory")
2700 (regexp-code-dump-1 "start_memory")))
2702 (if (= regexp-version 19)
2703 (regexp-code-dump-2 "stop_memory")
2704 (regexp-code-dump-1 "stop_memory")))
2705 ((= op DUPLICATE) (regexp-code-dump-1 "duplicate"))
2706 ((= op BEFORE_DOT) (regexp-code-dump-0 "before_dot"))
2707 ((= op AT_DOT) (regexp-code-dump-0 "at_dot"))
2708 ((= op AFTER_DOT) (regexp-code-dump-0 "after_dot"))
2709 ((= op BEGBUF) (regexp-code-dump-0 "begbuf"))
2710 ((= op ENDBUF) (regexp-code-dump-0 "endbuf"))
2711 ((= op WORDCHAR) (regexp-code-dump-0 "wordchar"))
2712 ((= op NOTWORDCHAR) (regexp-code-dump-0 "notwordchar"))
2713 ((= op WORDBEG) (regexp-code-dump-0 "wordbeg"))
2714 ((= op WORDEND) (regexp-code-dump-0 "wordend"))
2715 ((= op WORDBOUND) (regexp-code-dump-0 "wordbound"))
2716 ((= op NOTWORDBOUND) (regexp-code-dump-0 "notwordbound"))
2717 ((= op SYNTAXSPEC) (regexp-code-dump-syntax "syntaxspec"))
2718 ((= op NOTSYNTAXSPEC) (regexp-code-dump-syntax "notsyntaxspec"))
2719 ((= op EXACT1) (regexp-code-dump-1ch "EXACT1"))
2721 (princ (format "EXACT2 %c%c\n" (aref *regexp-code-dump* (1+ *regexp-code-index*))
2722 (aref *regexp-code-dump* (+ *regexp-code-index* 2))))
2723 (TREX-inc *regexp-code-index* 3))
2725 (princ (format "EXACT3 %c%c%c\n"
2726 (aref *regexp-code-dump* (1+ *regexp-code-index*))
2727 (aref *regexp-code-dump* (+ *regexp-code-index* 2))
2728 (aref *regexp-code-dump* (+ *regexp-code-index* 3))))
2729 (TREX-inc *regexp-code-index* 4))
2730 ((= op CHARSET_M) (regexp-code-dump-charset-m "CHARSET_M"))
2731 ((= op CHARSET_M_NOT) (regexp-code-dump-charset-m "CHARSET_M_NOT"))
2733 (princ (format "CASEN %d\n" (aref *regexp-code-dump* (1+ *regexp-code-index*))))
2734 (let ((j (+ *regexp-code-index* 2))
2735 (max (+ *regexp-code-index* 2 (* 2 (aref *regexp-code-dump* (1+ *regexp-code-index*))))))
2737 (princ (format "[%d]::%d\n" (1+ (/ (- j (+ *regexp-code-index* 2)) 2))
2738 (regexp-get-absolute-address
2739 (+ j 2) (aref *regexp-code-dump* j)
2740 (aref *regexp-code-dump* (1+ j)))))
2742 (let ((ch (aref *regexp-code-dump* j)) (chmax (aref *regexp-code-dump* (1+ j))))
2743 (princ (format "%c::%c\n" ch chmax))
2745 (while (<= ch chmax)
2746 (princ (format "%c=>[%d]\n" ch (aref *regexp-code-dump* j)))
2749 (setq *regexp-code-index* j)))
2750 ((= op ON_FAILURE_SUCCESS) (regexp-code-dump-0 "ON_FAILURE_SUCCESS"))
2751 ((= op SUCCESS) (regexp-code-dump-0 "SUCCESS"))
2752 ((= op POP) (regexp-code-dump-0 "POP"))
2753 ((= op EXCEPT0) (regexp-code-dump-0 "EXCEPT0"))
2754 ((= op EXCEPT1) (regexp-code-dump-1ch "EXCEPT1"))
2755 ((= op CATEGORYSPEC) (regexp-code-dump-1ch "CATEGORYSPEC"))
2756 ((= op NOTCATEGORYSPEC) (regexp-code-dump-1ch "NOTCATEGORYSPEC"))
2757 (t (princ (format "unknown op=%d\n" op))
2758 (TREX-inc *regexp-code-index*)))))
2759 (princ (format "%4d:\n" *regexp-code-index*)))
2763 (defun regexp-code-dump-0 (op)
2765 (TREX-inc *regexp-code-index*))
2767 (defun regexp-code-dump-1 (op)
2768 (princ (format "%s %d\n" op (aref *regexp-code-dump* (1+ *regexp-code-index*))))
2769 (TREX-inc *regexp-code-index* 2))
2771 (defun regexp-code-dump-2 (op)
2772 (princ (format "%s %d %d\n"
2774 (aref *regexp-code-dump* (1+ *regexp-code-index*))
2775 (aref *regexp-code-dump* (+ *regexp-code-index* 2))
2777 (TREX-inc *regexp-code-index* 3))
2779 (defun regexp-code-dump-syntax (op)
2780 (princ (format "%s %c\n" op (syntax-code-spec (aref *regexp-code-dump* (1+ *regexp-code-index*)))))
2781 (TREX-inc *regexp-code-index* 2))
2783 (defun regexp-code-dump-1ch (op)
2784 (princ (format "%s %c\n" op (aref *regexp-code-dump* (1+ *regexp-code-index*))))
2785 (TREX-inc *regexp-code-index* 2))
2787 (defun regexp-get-absolute-address (point b1 b2)
2789 (+ point (+ (* 256 b2) b1)))
2791 (+ point (logior (logxor -1 (+ (* 255 256) 255)) (* 256 b2) b1)))))
2793 (defun regexp-code-dump-jump (op)
2794 (let* ((b1 (aref *regexp-code-dump* (1+ *regexp-code-index*)))
2795 (b2 (aref *regexp-code-dump* (+ *regexp-code-index* 2)))
2796 (p (regexp-get-absolute-address (+ *regexp-code-index* 3) b1 b2)))
2797 (princ (format "%s %d\n" op p)))
2798 (TREX-inc *regexp-code-index* 3))
2800 (defun regexp-code-dump-jump-2 (op)
2801 (let* ((b1 (aref *regexp-code-dump* (1+ *regexp-code-index*)))
2802 (b2 (aref *regexp-code-dump* (+ *regexp-code-index* 2)))
2803 (p (regexp-get-absolute-address (+ *regexp-code-index* 3) b1 b2)))
2804 (princ (format "%s %d %d\n" op p
2806 (* 256 (aref *regexp-code-dump* (+ *regexp-code-index* 3)))
2807 (aref *regexp-code-dump* (+ *regexp-code-index* 4))))))
2808 (TREX-inc *regexp-code-index* 5))
2810 (defun regexp-code-dump-charset (op)
2811 (let ((n (aref *regexp-code-dump* (1+ *regexp-code-index*))))
2812 (princ (format "%s %d " op n))
2813 (let ((j (+ *regexp-code-index* 2))
2814 (max (+ *regexp-code-index* 2 (if (<= 128 n) (- n 128) n))))
2816 (princ (format "0x%2x " (aref *regexp-code-dump* j)))
2819 (let* ((len (+ (* 256 (aref *regexp-code-dump* j))
2820 (aref *regexp-code-dump* (1+ j))))
2822 (princ (format "\n range list[%d-2 bytes]" len))
2825 (let ((ch (sref *regexp-code-dump* j)))
2826 (princ (format " %c" ch))
2827 (TREX-inc j (char-octets ch))
2828 (setq ch (sref *regexp-code-dump* j))
2829 (princ (format "-%c" ch))
2830 (TREX-inc j (char-octets ch))))
2832 (setq *regexp-code-index* j)
2836 (defun regexp-code-dump-charset-m (op)
2837 (let ((m (aref *regexp-code-dump* (1+ *regexp-code-index*)))
2838 (n (aref *regexp-code-dump* (+ *regexp-code-index* 2))))
2839 (princ (format "%s %d %d " op m n))
2840 (let ((j (+ *regexp-code-index* 3))
2841 (max (+ *regexp-code-index* 3 (if (<= 128 n) (- n 128) n))))
2843 (princ (format "0x%02x " (aref *regexp-code-dump* j)))
2846 (let* ((len (+ (* 256 (aref *regexp-code-dump* j))
2847 (aref *regexp-code-dump* (1+ j))))
2849 (princ (format "\n range list[%d-2 bytes]" len))
2852 (let ((ch (sref *regexp-code-dump* j)))
2853 (princ (format " %c" ch))
2854 (TREX-inc j (char-octets ch))
2855 (setq ch (sref *regexp-code-dump* j))
2856 (princ (format "-%c" ch))
2857 (TREX-inc j (char-octets ch))))
2859 (setq *regexp-code-index* j)
2864 ;;; Compile functions
2867 (defun TREX-simple-test1 ()
2868 (regexp-word-compile
2869 "\\cA+\\cH*\\|\\cK+\\cH*\\|\\cC+\\cH*\\|\\cH+\\|\\sw+"))
2871 (defun TREX-test1 (pattern)
2872 (let* ((regexp (regexp-parse pattern))
2873 (fFA (EFFA-make (FA-make regexp)))
2874 (bFA (EFFA-make (FA-inverse fFA)))
2877 (TREX-push (cons (DFA-optimize (DFA-make fFA))
2878 (DFA-optimize (DFA-make bFA)))
2881 (let* ((forward (NEFA-make (EFFA-make (cons (car (car l)) (cdr fFA)))))
2882 (backward (NEFA-make (EFFA-make (cons (car (car l)) (cdr bFA))))))
2883 (cond((and forward backward)
2884 (TREX-push (cons (DFA-optimize (FA-simplify (DFA-shortest-match (DFA-make forward))))
2885 (DFA-optimize (FA-simplify (DFA-shortest-match (DFA-make backward)))))
2888 (setq result (reverse result))
2891 (princ (format "\nForward[%2d]:" count)) (FA-dump (car (car result)))
2892 (princ (format "\nBackward[%2d]:" count)) (FA-dump (cdr (car result)))
2894 (setq result (cdr result))))))
2896 (defun TREX-test2 (pattern)
2897 (let* ((regexp (regexp-parse pattern))
2898 (fFA (EFFA-make (FA-make regexp)))
2901 (regexp-code-dump (setq result (regexp-code-gen (DFA-optimize (DFA-make fFA)))))
2905 (defun regexp-compile (pattern)
2906 (regexp-compile-internal pattern nil))
2909 (defun regexp-word-compile (pattern)
2910 (regexp-compile-internal pattern t))
2913 ;;; Returns a list of pair of forward-code and backward-code
2917 (defun regexp-compile-internal (pattern &optional word)
2918 (let* ((*regexp-word-definition* word)
2919 (*regexp-parse-translate*
2920 (if case-fold-search
2921 ;;; DOWNCASE or CANONICAL?
2922 (nth 2 (current-case-table))
2924 (regexp (regexp-parse pattern))
2925 (fFA (EFFA-make (FA-make (regexp-reform-duplication regexp))))
2926 (bFA (EFFA-make (FA-make (regexp-reform-duplication (regexp-inverse regexp)))))
2928 (let ((ofFA (DFA-optimize (DFA-make fFA)))
2929 (obFA (DFA-optimize (DFA-make bFA))))
2930 (TREX-push (cons (DFA-code-with-fastmap ofFA)
2931 (let* ((START_MEMORY STOP_MEMORY)
2932 (STOP_MEMORY START_MEMORY))
2933 (DFA-code-with-fastmap obFA)))
2937 (bFA (EFFA-make (FA-inverse fFA))))
2939 (let* ((forward (NEFA-make (EFFA-make (cons (car (car l)) (cdr fFA)))))
2940 (backward (NEFA-make (EFFA-make (cons (car (car l)) (cdr bFA))))))
2941 (cond((and forward backward)
2942 (let ((fFA (DFA-optimize (FA-simplify (DFA-shortest-match (DFA-make forward)))))
2943 (bFA (DFA-optimize (FA-simplify (DFA-shortest-match (DFA-make backward))))))
2944 (TREX-push (cons (DFA-code-with-fastmap fFA)
2945 (DFA-code-with-fastmap bFA))
2948 (setq result (nreverse result))))
2951 (defun regexp-compiled-pattern-dump (pattern)
2952 ;;; PATTERN is a vector of [ code fastmap fastmap-syntax fastmap-categoy]
2953 (regexp-code-dump (aref pattern 0))
2954 (print-fastmap (aref pattern 1) " fastmap[char]")
2955 (print-fastmap (aref pattern 2) " fastmap[synt]")
2956 (print-fastmap (aref pattern 3) " fastmap[cate]")
2959 (defun regexp-compile-dump (code)
2960 (let ((Fcode (aref (car (car code)) 0))
2961 (Bcode (aref (cdr (car code)) 0))
2963 (princ (format "\nRegular Expression Compiler Dump:\n"))
2964 (princ (format "Forward Search:"))
2965 (regexp-compiled-pattern-dump (car (car code)))
2966 (princ (format "Backward Search:"))
2967 (if Bcode (regexp-compiled-pattern-dump (cdr (car code)))
2968 (princ (format "\n Use the interpreter\n")))
2971 (princ (format "In word conditions:\n"))
2973 (princ (format "Forward[%d]" i))
2974 (regexp-compiled-pattern-dump (car (car words)))
2975 (princ (format "Backward[%d]" i))
2976 (regexp-compiled-pattern-dump (cdr (car words)))
2978 (setq words (cdr words)))))))
2980 (defun regexp-compile-and-dump (regexp)
2981 (regexp-compile-dump (regexp-compile regexp)))
2985 (defmacro define-word-regexp (name regexp)
2986 (` (defconst (, name) '(, (regexp-word-compile regexp)))))
2988 (put 'define-word-regexp 'byte-hunk-handler ;93.7.16 by S.Tomura
2995 (defmacro define-coding-systems (&rest rest)
2996 (` (define-coding-systems* '(, rest))))
2998 (defun define-coding-systems* (names)
3000 (` (:or (,@ (mapcar (function (lambda (name) (` (:seq (, (regexp-get-definition name))
3005 (defun oct (str) (aref str 0))
3007 (defvar *TREX-range-from* nil)
3008 (defvar *TREX-range-to* nil)
3010 (defun TREX-range-make-jisjoint (regexp)
3011 (TREX-init *TREX-range-from* (make-vector 256 nil))
3012 (TREX-init *TREX-range-to* (make-vector 256 nil))
3015 (aset *TREX-range-from* i nil)
3016 (aset *TREX-range-to* i nil)
3018 (aset *TREX-range-from* 0 t)
3019 (aset *TREX-range-to* 255 t)
3020 (TREX-range-mark regexp)
3021 (TREX-range-replace regexp))
3023 (defun TREX-range-mark (regexp)
3026 (let ((op (car regexp)))
3027 (cond((eq op ':mark)
3028 (TREX-range-mark (nth 3 regexp)))
3030 (mapcar 'TREX-range-mark (cdr regexp)))
3032 (mapcar 'TREX-range-mark (cdr regexp)))
3034 (TREX-range-mark (nth 1 regexp)))
3036 (TREX-range-mark (nth 1 regexp)))
3038 (TREX-range-mark (nth 1 regexp)))
3040 (TREX-range-mark2 (nth 1 regexp) (nth 2 regexp))))))
3042 (TREX-range-mark2 regexp regexp))
3044 (TREX-range-mark2 regexp regexp))))
3046 (defun TREX-range-mark2 (from to)
3047 (if (stringp from) (setq from (aref from 0)))
3048 (if (stringp to) (setq to (aref to 0)))
3049 (if (< 0 from) (aset *TREX-range-to* (1- from) t))
3050 (if (< to 255) (aset *TREX-range-from* (1+ to) t))
3051 (aset *TREX-range-from* from t)
3052 (aset *TREX-range-to* to t))
3054 (defun TREX-range-replace (regexp)
3057 (let ((op (car regexp)))
3058 (cond((eq op ':mark)
3059 (` (:mark (, (nth 1 regexp))
3061 (, (TREX-range-replace (nth 3 regexp))))))
3063 (` (:or (,@ (mapcar 'TREX-range-replace (cdr regexp))))))
3065 (` (:seq (,@ (mapcar 'TREX-range-replace (cdr regexp))))))
3067 (` (:optional (,(TREX-range-replace (nth 1 regexp))))))
3069 (` (:star (,(TREX-range-replace (nth 1 regexp))))))
3071 (` (:plus (,(TREX-range-replace (nth 1 regexp))))))
3073 (let ((from (nth 1 regexp))
3077 (if (stringp from) (setq from (aref from 0)))
3078 (if (stringp to ) (setq to (aref to 0)))
3082 (while (not (aref *TREX-range-to* j))
3084 (if (not (= i j)) (TREX-push (` (:range (, i) (, j))) result)
3085 (TREX-push i result))
3088 (if (= (length result) 1) (car result)
3089 (` (:or (,@ (nreverse result))))))))))
3091 (if (= (length regexp) 1)
3099 (let ((start (car FA))
3101 (setq alist (sort alist 'TREX-lessp-car))
3103 (setcdr (car alist) (sort (cdr (car alist)) 'TREX-lessp-car))
3104 (setcdr (car alist ) (TREX-sort (cdr (car alist)) 'TREX-key-lessp 'cdr))
3105 (setq alist (cdr alist)))
3109 ;;; CHARSET functions:
3111 ;;; CHARSET ::= RANGE |
3114 ;;; RANGE+ ::= CHAR |
3115 ;;; (:range CHAR CHAR)
3118 (defun CHARSET-rangep (charset)
3119 (or (numberp charset)
3120 (and (consp charset) (eq (car charset) ':range))))
3122 (defun CHARSET-orp (charset)
3123 (and (consp charset) (eq (car charset) ':or)))
3125 (defun CHARSET-range-from (range)
3126 (if (numberp range) range
3129 (defun CHARSET-range-to (range)
3130 (if (numberp range) range
3133 (defun CHARSET-range-make (from to)
3134 (if (= from to) from
3135 (list ':range from to)))
3137 (defun CHARSET-membership (range charset)
3138 (let ((from (CHARSET-range-from range))
3139 (to (CHARSET-range-to range))
3141 (while (and charset flag1)
3142 (if (< from (CHARSET-range-from (car charset)))
3143 (setq charset (cdr charset))
3145 (and flag1 (<= to (CHARSET-range-to (car charset))))))
3147 (defun CHARSET-not (charset)
3148 (cond((CHARSET-rangep charset)
3149 (list ':nor charset))
3150 ((CHARSET-orp charset)
3151 (cons ':nor (cdr charset)))
3153 (cons ':or (cdr charset)))))
3155 (defun CHARSET-union (charset1 charset2)
3156 (cond((CHARSET-rangep charset1)
3157 (cond ((CHARSET-rangep charset2)
3158 (CHARSET-union-range-range charset1 charset2))
3159 ((CHARSET-orp charset2)
3160 (CHARSET-union-range-or charset1 charset2))
3162 (CHARSET-union-range-nor charset1 charset2))))
3163 ((CHARSET-orp charset1)
3164 (cond ((CHARSET-rangep charset2)
3165 (CHARSET-union-range-or charset2 charset1))
3166 ((CHARSET-orp charset2)
3167 (CHARSET-union-or-or charset1 charset2))
3169 (CHARSET-union-or-nor charset1 charset2))))
3170 (t ;;; (CHARSET-norp charset1)
3171 (cond((CHARSET-rangep charset2)
3172 (CHARSET-union-range-nor charset2 charset1))
3173 ((CHARSET-orp charset2)
3174 (CHARSET-union-or-nor charset2 charset1))
3176 (CHARSET-union-nor-nor charset1 charset2))))))
3178 (defun CHARSET-union-range-range (range1 range2)
3179 (let ((from1 (CHARSET-range-from range1))
3180 (to1 (CHARSET-range-to range1))
3181 (from2 (CHARSET-range-from range2))
3182 (to2 (CHARSET-range-to range2)))
3184 (list ':or range1 range2))
3185 (t ;;; (<= from2 (1+ to1))
3186 (cond((<= to1 to2) ;;; (<= from2 to1 to2)
3187 (CHARSET-range-make (min from1 from2) to2))
3188 ((<= from1 to2) ;;; (<= from1 to2 to1)
3189 (CHARSET-range-make (min from1 from2) to1))
3190 (t ;;; (<= to2 from1 to1)
3191 (list ':or range2 range1)))))))
3193 (defun CHARSET-union-range-or (range or)
3194 (cons ':or (CHARSET-union-range-or* range (cdr or))))
3196 (defun CHARSET-union-range-or* (range or-body)
3197 (let ((from (CHARSET-range-from range))
3198 (to (CHARSET-range-to range))
3201 (while (and or-body (null flag))
3202 (let ((next (car or-body)))
3203 (if (< (CHARSET-range-from next) from)
3205 (if (< (CHARSET-range-to next) from)
3207 (setq part1 (cons next part1)
3208 or-body (cdr or-body))
3209 ;;; from[i] < from <= to[i]
3210 (setq from (CHARSET-range-from next)
3213 ;;; to[i-1] < from <= from[i]
3215 ;;; part1 < from <= from[i]
3216 (if (and part1 (<= (1+ (CHARSET-range-to (car part1))) from))
3217 (setq from (CHARSET-range-from (car part1))
3219 ;;; part1 << from <= from[i]
3221 (while (and or-body (null flag))
3222 (let ((next (car or-body)))
3223 (if (< (CHARSET-range-from next) to)
3225 (if (< (CHARSET-range-to next) to)
3227 (setq or-body (cdr or-body))
3228 ;;; from[j] < to <= to[j]
3229 (setq to (CHARSET-range-to next)
3232 ;;; to[j-1] < to <= from[j]
3234 ;;; part2 < to <= from[j]
3235 (if (and or-body (<= (CHARSET-range-from (car or-body)) (1+ to)))
3236 (setq to (CHARSET-range-to (car or-body))
3237 or-body (cdr or-body)))
3238 ;;; part2 <= to << from[j]
3239 (nconc (reverse part1)
3240 (cons (CHARSET-range-make from to)
3244 (defun CHARSET-union-range-nor (range nor)
3245 (let ((from (CHARSET-range-from range))
3246 (to (CHARSET-range-to range))
3247 (nor-body (cdr nor)))
3251 (defun CHARSET-union-or-or (or1 or2)
3252 (cons ':or (CHARSET-union-or*-or* (cdr or1) (cdr or2))))
3254 (defun CHARSET-union-or*-or* (or1-body or2-body)
3255 (let ((result-body or2-body))
3258 (CHARSET-union-range-or* (car or1-body) result-body))
3259 (setq or1-body (cdr or1-body)))
3262 (defun CHARSET-union-or-nor (or nor)
3265 (defun CHARSET-union-nor-nor (nor1 nor2)
3266 (cons ':nor (CHARSET-intersection-or*-or* (cdr nor1) (cdr nor2))))
3268 (defun CHARSET-intersection (charset1 charset2)
3269 (cond((CHARSET-rangep charset1)
3270 (cond ((CHARSET-rangep charset2)
3271 (CHARSET-intersection-range-range charset1 charset2))
3272 ((CHARSET-orp charset2)
3273 (CHARSET-intersection-range-or charset1 charset2))
3275 (CHARSET-intersection-range-nor charset1 charset2))))
3276 ((CHARSET-orp charset1)
3277 (cond ((CHARSET-rangep charset2)
3278 (CHARSET-intersection-range-or charset2 charset1))
3279 ((CHARSET-orp charset2)
3280 (CHARSET-intersection-or-or charset1 charset2))
3282 (CHARSET-intersection-or-nor charset1 charset2))))
3283 (t ;;; (CHARSET-norp charset1)
3284 (cond((CHARSET-rangep charset2)
3285 (CHARSET-intersection-range-nor charset2 charset1))
3286 ((CHARSET-orp charset2)
3287 (CHARSET-intersection-or-nor charset2 charset1))
3289 (CHARSET-intersection-nor-nor charset1 charset2))))))
3291 (defun CHARSET-intersection-range-or (range or)
3292 (CHARSET-intersection-range-or* range (cdr or)))
3294 (defun CHARSET-intersection-range-or* (range or-body)
3295 (let ((from (CHARSET-range-from range))
3296 (to (CHARSET-range-to range))
3299 (while (and or-body (null flag))
3300 (let ((next (car or-body)))
3301 (if (< (CHARSET-range-from next) from)
3303 (if (< (CHARSET-range-to next) from)
3305 (setq or-body (cdr or-body))
3306 ;;; from[i] < from <= to[i]
3309 ;;; to[i-1] < from <= from[i]
3311 ;;; from[i] < from <= to[i]
3313 ;;; to[i-1] < from <= from[i]
3315 (while (and or-body (null flag))
3316 (let ((next (car or-body)))
3317 (if (<= (CHARSET-range-from next) to)
3319 (if (<= (CHARSET-range-to next) to)
3321 (setq part2 (cons next part2)
3322 or-body (cdr or-body))
3323 ;;; from[j] <= to < to[j]
3324 (setq part2 (cons next part2)
3325 or-body (cdr or-body)
3328 ;;; to[j-1] <= to < from[j]
3330 ;;; from[j] <= to < to[j]
3332 ;;; to[j-1] <= to < from[j]
3333 (cond ((null part2) nil)
3334 ((= (length part2) 1)
3335 (list (CHARSET-range-make (max from (CHARSET-range-from (car part2)))
3336 (min to (CHARSET-range-to (car part2))))))
3338 (setcar part2 (CHARSET-range-make (CHARSET-range-from (car part2))
3339 (min to (CHARSET-range-to (car part2)))))
3340 (setq part2 (nreverse part2))
3341 (setcar part2 (CHARSET-range-make (max from (CHARSET-range-from (car part2)))
3342 (CHARSET-range-to (car part2))))
3345 (defun CHARSET-intersection-range-nor (range nor)
3346 (CHARSET-intersection-range-nor* range (cdr nor)))
3348 (defun CHARSET-intersecion-range-nor* (range nor-body)
3349 (let ((from (CHARSET-range-from range))
3350 (to (CHARSET-range-to range)))
3353 ;;; (and (or a b) c) == (or (and a c) (and b c))
3355 (defun CHARSET-intersection-or-or (or1 or2)
3357 (or1-body (cdr or1))
3358 (or2-body (cdr or2)))
3360 (setq result (CHARSET-union-or*-or*
3361 (CHARSET-intersection-range-or* (car or1-body) or2-body)
3363 (setq or1-body (cdr or1-body)))
3364 (if (= (length result) 1) (car result)
3365 (cons ':or result))))
3367 (defun CHARSET-intersection-or-nor (or nor)
3370 ;;; (and (not or1) (not or2)) == (not (or or1 or2))
3372 (defun CHARSET-intersection-nor-nor (nor1 nor2)
3373 (cons ':nor (CHARSET-union-or*-or* (cdr nor1) (cdr nor2))))
3375 (defun FA-compaction (FA)
3376 (let ((start (car FA))
3378 (setq alist (TREX-sort alist 'TREX-key-lessp 'car))
3380 (let ((table (cdr (car alist)))
3382 (keys nil) (next nil))
3383 (setq table (TREX-sort table '< 'car))
3385 (setq next (cdr (car table)))
3386 (TREX-push (car (car table)) keys)
3387 (setq table (cdr table))
3388 (while (and table (eq next (cdr (car table))))
3389 (TREX-push (car (car table)) keys)
3390 (setq table (cdr table)))
3391 (setq keys (reverse (sort keys 'TREX-key-lessp)))
3392 (let ((newkeys nil))
3393 (setq newkeys (car keys)
3396 (cond((numberp (car keys))
3397 (cond((numberp (car newkeys))
3398 (if (= (1+ (car keys)) (car newkeys))
3399 (setcar newkeys (list ':range (car keys) (car newkeys)))
3400 (TREX-push (car keys) newkeys)))
3401 ((and (consp (car newkeys)) (eq (car (car newkeys)) ':range)))))))))))))
3405 (defun FA-dump2 (table)
3406 (let ((start (car table))
3408 (princ (format "\nstart = %d\n" start))
3410 (princ (format "%3d: " (car (car l))))
3411 (let ((alist (cdr (car l))))
3412 (cond ((numberp (car (car alist)))
3413 (princ (format "\\%03o(%c) -> %s\n" (car (car alist))(car (car alist)) (cdr (car alist)))))
3414 ((and (consp (car (car alist))) (TREX-memequal (car (car (car alist))) '(CATEGORYSPEC NOTCATEGORYSPEC)))
3415 (princ (format "(%s %c) -> %s\n" (car (car (car alist))) (nth 1 (car (car alist))) (cdr (car alist)))))
3416 ((and (consp (car (car alist))) (eq (car (car (car alist))) ':range))
3417 (princ (format "(:range \\%03o \\%03o) -> %s\n"
3418 (nth 1 (car (car alist))) (nth 2 (car (car alist)))
3419 (cdr (car alist)))))
3421 (princ (format "%s -> %s\n" (car (car alist)) (cdr (car alist))))))
3422 (setq alist (cdr alist))
3424 (cond ((numberp (car (car alist)))
3425 (princ (format " \\%03o(%c) -> %s\n" (car (car alist))(car (car alist)) (cdr (car alist)))))
3426 ((and (consp (car (car alist))) (TREX-memequal (car (car (car alist))) '(CATEGORYSPEC NOTCATEGORYSPEC)))
3427 (princ (format " (%s %c) -> %s\n" (car (car (car alist))) (nth 1 (car (car alist))) (cdr (car alist)))))
3428 ((and (consp (car (car alist))) (eq (car (car (car alist))) ':range))
3429 (princ (format " (:range \\%03o \\%03o) -> %s\n"
3430 (nth 1 (car (car alist))) (nth 2 (car (car alist)))
3431 (cdr (car alist)))))
3433 (princ (format " %s -> %s\n" (car (car alist)) (cdr (car alist))))))
3434 (setq alist (cdr alist))))
3437 ;;;function re-compile REGEXP
3438 ;;;Compile REGEXP by GNU Emacs original regexp compiler,
3439 ;;;and return information of the compiled code by a vector of length 11:
3440 ;;; [ COMPILED-PATTERN (string)
3441 ;;; RE-NSUB REGS-ALLOCATED CAN-BE-NULL NEWLINE-ANCHOR (integers)
3442 ;;; NO-SUB NOT-BOL NOT-EOL SYNTAX (integers)
3443 ;;; FASTMAP TRANSLATE (string) ].
3446 (defun print-compiled-pattern (compiled-code)
3447 (let ((compiled-pattern (aref compiled-code 0))
3448 (re-nsub (aref compiled-code 1))
3449 (regs-allocated (aref compiled-code 2))
3450 (can-be-null (aref compiled-code 3))
3451 (newline-anchor (aref compiled-code 4))
3452 (no-sub (aref compiled-code 5))
3453 (not-bol (aref compiled-code 6))
3454 (not-eol (aref compiled-code 7))
3455 (syntax (aref compiled-code 8))
3456 (fastmap (aref compiled-code 9))
3457 (translate (aref compiled-code 10)))
3458 (regexp-code-dump compiled-pattern)
3460 (if fastmap (print-fastmap fastmap "fastmap"))
3461 (princ (format "re_nsub: %d\n" re-nsub))
3462 (princ (format "regs-alloc: %d\n" regs-allocated))
3463 (princ (format "can-be-null: %d\n" can-be-null))
3464 (princ (format "newline-anchor: %d\n" newline-anchor))
3465 (princ (format "no-sub: %d\n" no-sub))
3466 (princ (format "not-bol: %d\n" not-bol))
3467 (princ (format "not-eol: %d\n" not-eol))
3468 (princ (format "syntax: %d\n" syntax))
3469 (if translate (print-translate translate))
3474 (defun print-fastmap (fastmap name)
3477 (princ (format "%s:[" name))
3478 (let ((max (length fastmap))
3481 (if (not (= (aref fastmap i) 0))
3482 (princ (format "%c" i)))
3486 (defun print-translate (trans)
3489 (princ "translate:\n")
3490 (let ((max (length trans))
3493 (if (not (= (aref trans i) i))
3494 (princ (format " %c --> %c" i (aref trans i))))
3498 (defun re-compile-and-dump (regexp)
3499 (print-compiled-pattern (re-compile regexp)))