1 /* String search routines for SXEmacs.
2 Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of SXEmacs
7 SXEmacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 SXEmacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 /* Synched up with: FSF 19.29, except for region-cache stuff. */
23 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */
25 /* This file has been Mule-ized except for the TRT stuff. */
31 #include "ui/insdel.h"
33 #ifdef REGION_CACHE_NEEDS_WORK
34 #include "region-cache.h"
38 #include <sys/types.h>
45 #define TRANSLATE(table, pos) \
46 (!NILP (table) ? TRT_TABLE_OF (table, (Emchar) pos) : pos)
49 #define REGEXP_CACHE_SIZE 0x80
50 #define REGEXP_CACHE_HASH_MASK (REGEXP_CACHE_SIZE-1)
51 #define REGEXP_FASTMAP_SIZE 0400
53 #define __REGEXP_DEBUG__(args...) fprintf(stderr, "REGEXP " args)
54 #ifndef REGEXP_DEBUG_FLAG
55 #define REGEXP_DEBUG(args...)
57 #define REGEXP_DEBUG(args...) __REGEXP_DEBUG__(args)
59 #define REGEXP_DEBUG_COMPRE(args...) REGEXP_DEBUG("[compre]: " args)
60 #define REGEXP_DEBUG_COMPRE_C(args...) REGEXP_DEBUG("[compre/cache]: " args)
61 #define REGEXP_DEBUG_COMPRE_H(args...) REGEXP_DEBUG("[compre/hash]: " args)
62 #define REGEXP_CRITICAL(args...) __REGEXP_DEBUG__("CRITICAL: " args)
65 /* If the regexp is non-nil, then the buffer contains the compiled form
66 of that regexp, suitable for searching. */
68 struct regexp_cache *next;
70 struct re_pattern_buffer buf;
71 char fastmap[REGEXP_FASTMAP_SIZE];
72 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
76 /* The instances of that struct. */
77 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
79 /* The head of the linked list; points to the most recently used buffer. */
80 static struct regexp_cache *searchbuf_head;
82 /* Every call to re_match, etc., must pass &search_regs as the regs
83 argument unless you can show it is unnecessary (i.e., if re_match
84 is certainly going to be called again before region-around-match
87 Since the registers are now dynamically allocated, we need to make
88 sure not to refer to the Nth register before checking that it has
89 been allocated by checking search_regs.num_regs.
91 The regex code keeps track of whether it has allocated the search
92 buffer using bits in the re_pattern_buffer. This means that whenever
93 you compile a new pattern, it completely forgets whether it has
94 allocated any registers, and will allocate new registers the next
95 time you call a searching or matching function. Therefore, we need
96 to call re_set_registers after compiling a new pattern or after
97 setting the match registers, so that the regex functions will be
98 able to free or re-allocate it properly. */
100 /* Note: things get trickier under Mule because the values returned from
101 the regexp routines are in Bytinds but we need them to be in Bufpos's.
102 We take the easy way out for the moment and just convert them immediately.
103 We could be more clever by not converting them until necessary, but
104 that gets real ugly real fast since the buffer might have changed and
105 the positions might be out of sync or out of range.
107 static struct re_registers search_regs;
109 /* The buffer in which the last search was performed, or
110 Qt if the last search was done in a string;
111 Qnil if no searching has been done yet. */
112 static Lisp_Object last_thing_searched;
114 /* error condition signalled when regexp compile_pattern fails */
116 Lisp_Object Qinvalid_regexp;
118 /* Regular expressions used in forward/backward-word */
119 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
121 /* range table for use with skip_chars. Only needed for Mule. */
122 Lisp_Object Vskip_chars_range_table;
124 static void set_search_regs(struct buffer *buf, Bufpos beg, Charcount len);
125 static void clear_unused_search_regs(struct re_registers *regp, int no_sub);
126 static void save_search_regs(void);
127 static Bufpos simple_search(struct buffer *buf, Bufbyte * base_pat,
128 Bytecount len, Bytind pos, Bytind lim,
129 EMACS_INT n, Lisp_Object trt);
130 static Bufpos boyer_moore(struct buffer *buf, Bufbyte * base_pat,
131 Bytecount len, Bytind pos, Bytind lim,
132 EMACS_INT n, Lisp_Object trt,
133 Lisp_Object inverse_trt, int charset_base);
134 static Bufpos search_buffer(struct buffer *buf, Lisp_Object str,
135 Bufpos bufpos, Bufpos buflim, EMACS_INT n, int RE,
136 Lisp_Object trt, Lisp_Object inverse_trt,
139 static void matcher_overflow(void)
141 error("Stack overflow in regexp matcher");
148 #define COMPRE_T struct re_pattern_buffer
149 #define COMPREP(obj) \
150 (DYNACATP(obj) && EQ(XDYNACAT_TYPE(obj), Qcompre))
151 #define COMPRE_GET(obj) (COMPRE_T*)XDYNACAT(obj)->ptr
152 #define COMPRE_PUT(obj, rec) do { \
153 obj = make_dynacat((void*)rec); \
154 set_dynacat_printer(obj, compre_prfun); \
155 set_dynacat_finaliser(obj, compre_finfun); \
156 XDYNACAT_TYPE(obj) = Qcompre; \
158 #define XCOMPRE_GET(obj) XSTRING(obj)->compre
159 #define XCOMPRE_PUT(obj, b) XSTRING(obj)->compre = b
160 #define XCOMPRE_REM(obj) XSTRING(obj)->compre = Qnil
162 /* idenitifier for hard-cached rexp */
163 Lisp_Object Qcompre, Qcomprep;
164 static Lisp_Object cache_regexp(Lisp_Object, struct re_pattern_buffer*);
165 static COMPRE_T* make_compre(void)
166 #if defined(__GNUC__)
167 __attribute__((unused))
170 static COMPRE_T* clone_compre(COMPRE_T*);
171 static inline void free_compre(COMPRE_T*);
174 /* Compile a regexp and signal a Lisp error if anything goes wrong.
175 PATTERN is the pattern to compile.
176 CP is the place to put the result.
177 TRANSLATE is a translation table for ignoring case, or NULL for none.
178 REGP is the structure that says where to store the "register"
179 values that will result from matching this pattern.
180 If it is 0, we should compile the pattern not to record any
181 subexpression bounds.
182 POSIX is nonzero if we want full backtracking (POSIX style)
183 for this pattern. 0 means backtrack only enough to get a valid match. */
186 compile_pattern_1(struct regexp_cache *cp, Lisp_Object pattern,
187 Lisp_Object translate, struct re_registers *regp, int posix,
194 cp->buf.translate = translate;
196 old = re_set_syntax(RE_SYNTAX_EMACS
197 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
199 re_compile_pattern((char *)XSTRING_DATA(pattern),
200 XSTRING_LENGTH(pattern), &cp->buf);
203 maybe_signal_error(Qinvalid_regexp, list1(build_string(val)),
208 cp->regexp = Fcopy_sequence(pattern);
213 /* extremely simple hash table, no collision management is performed */
214 static struct re_pattern_buffer *
215 try_pattern_from_cache(Lisp_Object pattern,
216 struct re_registers *regp,
217 Lisp_Object translate,
221 struct regexp_cache *cp;
225 pathash = internal_hash(pattern, 0);
226 place = (int)((pathash>>4) & REGEXP_CACHE_HASH_MASK);
227 cp = &(searchbufs[place]);
228 REGEXP_DEBUG_COMPRE_H("trying hash place: %d.\n", place);
230 if (!NILP(Fstring_equal(cp->regexp, pattern))
231 && EQ(cp->buf.translate, translate) && cp->posix == posix) {
233 REGEXP_DEBUG_COMPRE_H("match: %d.\n", place);
236 /* collision or empty slot, dont care just put the
237 * new regexp there */
238 if (!compile_pattern_1(
239 cp, pattern, translate, regp, posix, errb))
241 REGEXP_DEBUG_COMPRE_H("collision: %d.\n", place);
246 #else /* !EF_USE_COMPRE */
247 static struct re_pattern_buffer *
248 try_pattern_from_cache(Lisp_Object pattern,
249 struct re_registers *regp,
250 Lisp_Object translate,
254 struct regexp_cache *cp, **cpp;
256 for (cpp = &searchbuf_head;; cpp = &cp->next) {
258 if (!NILP(Fstring_equal(cp->regexp, pattern))
259 && EQ(cp->buf.translate, translate)
260 && cp->posix == posix)
263 /* If we're at the end of the cache, compile
264 * into the last cell. */
266 if (!compile_pattern_1(
267 cp, pattern, translate, regp, posix, errb))
273 /* When we get here, cp (aka *cpp) contains the compiled pattern,
274 either because we found it in the cache or because we just compiled it.
275 Move it to the front of the queue to mark it as most recently used. */
277 cp->next = searchbuf_head;
282 #endif /* EF_USE_COMPRE */
284 /* Compile a regexp if necessary, but first check to see if there's one in
286 PATTERN is the pattern to compile.
287 TRANSLATE is a translation table for ignoring case, or NULL for none.
288 REGP is the structure that says where to store the "register"
289 values that will result from matching this pattern.
290 If it is 0, we should compile the pattern not to record any
291 subexpression bounds.
292 POSIX is nonzero if we want full backtracking (POSIX style)
293 for this pattern. 0 means backtrack only enough to get a valid match. */
295 struct re_pattern_buffer *
296 compile_pattern(Lisp_Object pattern,
297 struct re_registers *regp,
298 Lisp_Object translate,
302 struct re_pattern_buffer *result = NULL;
305 Lisp_Object rc = XCOMPRE_GET(pattern);
307 if (!NILP(rc) && COMPREP(rc) &&
308 (result = COMPRE_GET(rc)) &&
309 result->re_ngroups >= 0 && result->re_ngroups < 256) {
310 REGEXP_DEBUG_COMPRE_C("using cache: 0x%x.\n",
311 (unsigned int)result);
315 result = try_pattern_from_cache(
316 pattern, regp, translate, posix, errb);
318 cache_regexp(pattern, result);
323 /* Advise the searching functions about the space we have allocated
324 for register data. */
326 re_set_registers(result, regp, regp->num_regs,
327 regp->start, regp->end);
334 compre_prfun(Lisp_Object obj, Lisp_Object pcfun, int escflag)
337 write_c_string("#<compiled regexp ", pcfun);
338 snprintf(buf, 17, "%lx>",
339 (long unsigned int)((COMPRE_GET(obj))->buffer));
340 write_c_string(buf, pcfun);
346 compre_finfun(Lisp_Object obj, int unused)
348 REGEXP_DEBUG_COMPRE_C("0x%lx@0x%lx will pass away\n",
349 (long unsigned int)COMPRE_GET(obj),
350 (long unsigned int)obj);
351 free_compre(COMPRE_GET(obj));
358 COMPRE_T *result = xnew_and_zero(COMPRE_T);
360 result->fastmap = xmalloc_atomic(REGEXP_FASTMAP_SIZE);
367 clone_compre(COMPRE_T *src)
369 COMPRE_T *result = xnew_and_zero(COMPRE_T);
371 /* alloc and clone fastmap */
372 result->fastmap = (char*)xmalloc_atomic(REGEXP_FASTMAP_SIZE);
373 memcpy(result->fastmap, src->fastmap, REGEXP_FASTMAP_SIZE);
375 /* alloc and clone buffer */
376 result->buffer = (unsigned char *)xmalloc_atomic(src->allocated);
377 memcpy(result->buffer, src->buffer, src->allocated);
378 result->allocated = src->allocated;
379 result->used = src->used;
381 result->syntax = src->syntax;
382 result->translate = src->translate;
383 result->re_nsub = src->re_nsub;
384 result->re_ngroups = src->re_ngroups;
386 result->can_be_null = src->can_be_null;
387 result->regs_allocated = src->regs_allocated;
388 result->fastmap_accurate = src->fastmap_accurate;
390 result->no_sub = src->no_sub;
391 result->not_bol = src->not_bol;
392 result->not_eol = src->not_eol;
393 result->newline_anchor = src->newline_anchor;
395 /* alloc and clone ext_to_int_register */
396 result->external_to_internal_register =
397 (int*)xmalloc_atomic(
398 sizeof(int) * src->external_to_internal_register_size);
399 memcpy(result->external_to_internal_register,
400 src->external_to_internal_register,
401 sizeof(int)*src->external_to_internal_register_size);
402 result->external_to_internal_register_size =
403 src->external_to_internal_register_size;
409 free_compre(COMPRE_T *buf)
419 if (buf->external_to_internal_register) {
420 xfree(buf->external_to_internal_register);
421 buf->external_to_internal_register = NULL;
427 cache_regexp(Lisp_Object regexp, COMPRE_T *buf)
435 resbuf = clone_compre(buf);
436 COMPRE_PUT(rc, resbuf);
437 XCOMPRE_PUT(regexp, rc);
439 REGEXP_DEBUG_COMPRE_C("caching 0x%08x into 0x%08x\n",
440 (unsigned int)resbuf, (unsigned int)rc);
444 DEFUN("compile-regexp", Fcompile_regexp, 1, 1, 0, /*
445 Forcibly compile REGEXP and store the result in object-plist.
449 CHECK_STRING(regexp);
452 compile_pattern(regexp, &search_regs, Qnil, 0, ERROR_ME);
457 DEFUN("defregexp", Fdefregexp, 2, UNEVALLED, 0, /*
458 \(defregexp SYMBOL REGEXP DOCSTRING\)
459 Like `defconst' but for forcing compiled regexps.
461 The same restrictions that apply to `defconst' apply here in regard
462 to user variables. You shouldn't use this for regular expressions
463 that a user might want to customise. Instead, use `defcustom' with
468 /* This function can GC */
469 Lisp_Object sym = XCAR(args);
470 Lisp_Object pat = Feval(XCAR(args = XCDR(args)));
476 pat = Fcompile_regexp(pat);
477 Fset_default(sym, pat);
480 if (!NILP(args = XCDR(args))) {
481 Lisp_Object doc = XCAR(args);
482 Fput(sym, Qvariable_documentation, doc);
483 if (!NILP(args = XCDR(args)))
484 error("too many arguments");
487 if (!NILP(Vfile_domain))
488 Fput(sym, Qvariable_domain, Vfile_domain);
491 LOADHIST_ATTACH(sym);
494 #endif /* EF_USE_COMPRE */
496 /* Error condition used for failing searches */
497 Lisp_Object Qsearch_failed;
499 static Lisp_Object signal_failure(Lisp_Object arg)
502 Fsignal(Qsearch_failed, list1(arg));
503 return Qnil; /* Not reached. */
506 /* Convert the search registers from Bytinds to Bufpos's. Needs to be
507 done after each regexp match that uses the search regs.
509 We could get a potential speedup by not converting the search registers
510 until it's really necessary, e.g. when match-data or replace-match is
511 called. However, this complexifies the code a lot (e.g. the buffer
512 could have changed and the Bytinds stored might be invalid) and is
513 probably not a great time-saver. */
515 static void fixup_search_regs_for_buffer(struct buffer *buf)
518 int num_regs = search_regs.num_regs;
520 for (i = 0; i < num_regs; i++) {
521 if (search_regs.start[i] >= 0)
522 search_regs.start[i] =
523 bytind_to_bufpos(buf, search_regs.start[i]);
524 if (search_regs.end[i] >= 0)
526 bytind_to_bufpos(buf, search_regs.end[i]);
530 /* Similar but for strings. */
531 static void fixup_search_regs_for_string(Lisp_Object string)
534 int num_regs = search_regs.num_regs;
536 /* #### bytecount_to_charcount() is not that efficient. This function
537 could be faster if it did its own conversion (using INC_CHARPTR()
538 and such), because the register ends are likely to be somewhat ordered.
539 (Even if not, you could sort them.)
541 Think about this if this function is a time hog, which it's probably
543 for (i = 0; i < num_regs; i++) {
544 if (search_regs.start[i] > 0) {
545 search_regs.start[i] =
546 bytecount_to_charcount(XSTRING_DATA(string),
547 search_regs.start[i]);
549 if (search_regs.end[i] > 0) {
551 bytecount_to_charcount(XSTRING_DATA(string),
558 looking_at_1(Lisp_Object string, struct buffer *buf, int posix)
560 /* This function has been Mule-ized, except for the trt table handling. */
565 struct re_pattern_buffer *bufp;
567 if (running_asynch_code)
570 CHECK_STRING(string);
571 bufp = compile_pattern(string, &search_regs,
572 (!NILP(buf->case_fold_search)
573 ? XCASE_TABLE_DOWNCASE(buf->case_table) : Qnil),
578 /* Get pointers and sizes of the two strings
579 that make up the visible portion of the buffer. */
581 p1 = BI_BUF_BEGV(buf);
582 p2 = BI_BUF_CEILING_OF(buf, p1);
584 s2 = BI_BUF_ZV(buf) - p2;
586 regex_match_object = Qnil;
587 regex_emacs_buffer = buf;
588 i = re_match_2(bufp, (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
589 s1, (char *)BI_BUF_BYTE_ADDRESS(buf, p2), s2,
590 BI_BUF_PT(buf) - BI_BUF_BEGV(buf), &search_regs,
591 BI_BUF_ZV(buf) - BI_BUF_BEGV(buf));
596 val = (0 <= i ? Qt : Qnil);
600 int num_regs = search_regs.num_regs;
601 for (i = 0; i < num_regs; i++)
602 if (search_regs.start[i] >= 0) {
603 search_regs.start[i] += BI_BUF_BEGV(buf);
604 search_regs.end[i] += BI_BUF_BEGV(buf);
607 XSETBUFFER(last_thing_searched, buf);
608 fixup_search_regs_for_buffer(buf);
612 DEFUN("looking-at", Flooking_at, 1, 2, 0, /*
613 Return t if text after point matches regular expression REGEXP.
614 This function modifies the match data that `match-beginning',
615 `match-end' and `match-data' access; save and restore the match
616 data if you want to preserve them.
618 Optional argument BUFFER defaults to the current buffer.
622 return looking_at_1(regexp, decode_buffer(buffer, 0), 0);
625 DEFUN("posix-looking-at", Fposix_looking_at, 1, 2, 0, /*
626 Return t if text after point matches regular expression REGEXP.
627 Find the longest match, in accord with Posix regular expression rules.
628 This function modifies the match data that `match-beginning',
629 `match-end' and `match-data' access; save and restore the match
630 data if you want to preserve them.
632 Optional argument BUFFER defaults to the current buffer.
636 return looking_at_1(regexp, decode_buffer(buffer, 0), 1);
640 string_match_1(Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
641 struct buffer *buf, int posix)
643 /* This function has been Mule-ized, except for the trt table handling. */
646 struct re_pattern_buffer *bufp;
648 if (running_asynch_code)
651 CHECK_STRING(regexp);
652 CHECK_STRING(string);
657 Charcount len = XSTRING_CHAR_LENGTH(string);
661 if (s < 0 && -s <= len)
663 else if (0 > s || s > len)
664 args_out_of_range(string, start);
667 bufp = compile_pattern(regexp, &search_regs,
668 (!NILP(buf->case_fold_search)
669 ? XCASE_TABLE_DOWNCASE(buf->case_table) : Qnil),
673 Bytecount bis = charcount_to_bytecount(XSTRING_DATA(string), s);
674 regex_match_object = string;
675 regex_emacs_buffer = buf;
676 val = re_search(bufp, (char *)XSTRING_DATA(string),
677 XSTRING_LENGTH(string), bis,
678 XSTRING_LENGTH(string) - bis, &search_regs);
684 last_thing_searched = Qt;
685 fixup_search_regs_for_string(string);
686 return make_int(bytecount_to_charcount(XSTRING_DATA(string), val));
689 DEFUN("string-match", Fstring_match, 2, 4, 0, /*
690 Return index of start of first match for REGEXP in STRING, or nil.
691 If third arg START is non-nil, start search at that index in STRING.
692 For index of first char beyond the match, do (match-end 0).
693 `match-end' and `match-beginning' also give indices of substrings
694 matched by parenthesis constructs in the pattern.
696 Optional arg BUFFER controls how case folding is done (according to
697 the value of `case-fold-search' in that buffer and that buffer's case
698 tables) and defaults to the current buffer.
700 (regexp, string, start, buffer))
702 return string_match_1(regexp, string, start, decode_buffer(buffer, 0),
706 DEFUN("posix-string-match", Fposix_string_match, 2, 4, 0, /*
707 Return index of start of first match for REGEXP in STRING, or nil.
708 Find the longest match, in accord with Posix regular expression rules.
709 If third arg START is non-nil, start search at that index in STRING.
710 For index of first char beyond the match, do (match-end 0).
711 `match-end' and `match-beginning' also give indices of substrings
712 matched by parenthesis constructs in the pattern.
714 Optional arg BUFFER controls how case folding is done (according to
715 the value of `case-fold-search' in that buffer and that buffer's case
716 tables) and defaults to the current buffer.
718 (regexp, string, start, buffer))
720 return string_match_1(regexp, string, start, decode_buffer(buffer, 0),
724 /* Match REGEXP against STRING, searching all of STRING,
725 and return the index of the match, or negative on failure.
726 This does not clobber the match data. */
729 fast_string_match(Lisp_Object regexp, const Bufbyte * nonreloc,
730 Lisp_Object reloc, Bytecount offset,
731 Bytecount length, int case_fold_search,
732 Error_behavior errb, int no_quit)
734 /* This function has been Mule-ized, except for the trt table handling. */
736 const Bufbyte *newnonreloc = (const Bufbyte*)nonreloc;
737 struct re_pattern_buffer *bufp;
739 bufp = compile_pattern(regexp, 0,
741 ? XCASE_TABLE_DOWNCASE(current_buffer->
745 return -1; /* will only do this when errb != ERROR_ME */
749 no_quit_in_re_search = 1;
751 fixup_internal_substring(nonreloc, reloc, offset, &length);
755 newnonreloc = XSTRING_DATA(reloc);
757 /* QUIT could relocate RELOC. Therefore we must
758 alloca() and copy. No way around this except some
759 serious rewriting of re_search(). */
760 /* yeah, let's rewrite this bugger, the warning
761 hereafter is inevitable too */
762 newnonreloc = alloca(length);
763 memcpy((void*)newnonreloc, (void*)XSTRING_DATA(reloc), length);
767 /* #### evil current-buffer dependency */
768 regex_match_object = reloc;
769 regex_emacs_buffer = current_buffer;
770 val = re_search(bufp, (const char*)newnonreloc + offset, length, 0,
773 no_quit_in_re_search = 0;
777 Bytecount fast_lisp_string_match(Lisp_Object regex, Lisp_Object string)
779 return fast_string_match(regex, 0, string, 0, -1, 0, ERROR_ME, 0);
782 #ifdef REGION_CACHE_NEEDS_WORK
783 /* The newline cache: remembering which sections of text have no newlines. */
785 /* If the user has requested newline caching, make sure it's on.
786 Otherwise, make sure it's off.
787 This is our cheezy way of associating an action with the change of
788 state of a buffer-local variable. */
789 static void newline_cache_on_off(struct buffer *buf)
791 if (NILP(buf->cache_long_line_scans)) {
792 /* It should be off. */
793 if (buf->newline_cache) {
794 free_region_cache(buf->newline_cache);
795 buf->newline_cache = 0;
798 /* It should be on. */
799 if (buf->newline_cache == 0)
800 buf->newline_cache = new_region_cache();
805 /* Search in BUF for COUNT instances of the character TARGET between
808 If COUNT is positive, search forwards; END must be >= START.
809 If COUNT is negative, search backwards for the -COUNTth instance;
810 END must be <= START.
811 If COUNT is zero, do anything you please; run rogue, for all I care.
813 If END is zero, use BEGV or ZV instead, as appropriate for the
814 direction indicated by COUNT.
816 If we find COUNT instances, set *SHORTAGE to zero, and return the
817 position after the COUNTth match. Note that for reverse motion
818 this is not the same as the usual convention for Emacs motion commands.
820 If we don't find COUNT instances before reaching END, set *SHORTAGE
821 to the number of TARGETs left unfound, and return END.
823 If ALLOW_QUIT is non-zero, call QUIT periodically. */
826 bi_scan_buffer(struct buffer *buf, Emchar target, Bytind st, Bytind en,
827 EMACS_INT count, EMACS_INT * shortage, int allow_quit)
829 /* This function has been Mule-ized. */
830 Bytind lim = en > 0 ? en :
831 ((count > 0) ? BI_BUF_ZV(buf) : BI_BUF_BEGV(buf));
833 /* #### newline cache stuff in this function not yet ported */
842 /* Due to the Mule representation of characters in a buffer,
843 we can simply search for characters in the range 0 - 127
844 directly. For other characters, we do it the "hard" way.
845 Note that this way works for all characters but the other
847 if (target >= 0200) {
848 while (st < lim && count > 0) {
849 if (BI_BUF_FETCH_CHAR(buf, st) == target)
856 while (st < lim && count > 0) {
860 _ceil_ = BI_BUF_CEILING_OF(buf, st);
861 _ceil_ = min(lim, _ceil_);
864 memchr(BI_BUF_BYTE_ADDRESS(buf, st),
865 (int)target, _ceil_ - st);
868 st = BI_BUF_PTR_BYTE_POS(buf,
882 if (target >= 0200) {
883 while (st > lim && count < 0) {
885 if (BI_BUF_FETCH_CHAR(buf, st) == target)
891 while (st > lim && count < 0) {
896 _floor_ = BI_BUF_FLOOR_OF(buf, st);
897 _floor_ = max(lim, _floor_);
898 /* No memrchr() ... */
899 bufptr = BI_BUF_BYTE_ADDRESS_BEFORE(buf, st);
900 floorptr = BI_BUF_BYTE_ADDRESS(buf, _floor_);
901 while (bufptr >= floorptr) {
903 /* At this point, both ST and BUFPTR
904 refer to the same character. When
905 the loop terminates, ST will always
906 point to the last character we
908 if (*(unsigned char *)bufptr ==
909 (unsigned char)target) {
925 /* We found the character we were looking for; we have to return
926 the position *after* it due to the strange way that the return
935 scan_buffer(struct buffer * buf, Emchar target, Bufpos start, Bufpos end,
936 EMACS_INT count, EMACS_INT * shortage, int allow_quit)
939 Bytind bi_start, bi_end;
941 bi_start = bufpos_to_bytind(buf, start);
943 bi_end = bufpos_to_bytind(buf, end);
946 bi_retval = bi_scan_buffer(buf, target, bi_start, bi_end, count,
947 shortage, allow_quit);
948 return bytind_to_bufpos(buf, bi_retval);
951 Bytind bi_find_next_newline_no_quit(struct buffer * buf, Bytind from, int count)
953 return bi_scan_buffer(buf, '\n', from, 0, count, 0, 0);
956 Bufpos find_next_newline_no_quit(struct buffer * buf, Bufpos from, int count)
958 return scan_buffer(buf, '\n', from, 0, count, 0, 0);
961 Bufpos find_next_newline(struct buffer * buf, Bufpos from, int count)
963 return scan_buffer(buf, '\n', from, 0, count, 0, 1);
967 bi_find_next_emchar_in_string(Lisp_String * str, Emchar target, Bytind st,
970 /* This function has been Mule-ized. */
971 Bytind lim = string_length(str) - 1;
972 Bufbyte *s = string_data(str);
977 /* Due to the Mule representation of characters in a buffer,
978 we can simply search for characters in the range 0 - 127
979 directly. For other characters, we do it the "hard" way.
980 Note that this way works for all characters but the other
982 if (target >= 0200) {
983 while (st < lim && count > 0) {
984 if (string_char(str, st) == target)
986 INC_CHARBYTIND(s, st);
991 while (st < lim && count > 0) {
993 (Bufbyte *) memchr(charptr_n_addr(s, st),
994 (int)target, lim - st);
997 st = (Bytind) (bufptr - s) + 1;
1005 /* Like find_next_newline, but returns position before the newline,
1006 not after, and only search up to TO. This isn't just
1007 find_next_newline (...)-1, because you might hit TO. */
1009 find_before_next_newline(struct buffer * buf, Bufpos from, Bufpos to, int count)
1012 Bufpos pos = scan_buffer(buf, '\n', from, to, count, &shortage, 1);
1020 /* This function synched with FSF 21.1 */
1022 skip_chars(struct buffer *buf, int forwardp, int syntaxp,
1023 Lisp_Object string, Lisp_Object lim)
1025 /* This function has been Mule-ized. */
1026 REGISTER Bufbyte *p, *pend;
1028 /* We store the first 256 chars in an array here and the rest in
1030 unsigned char fastmap[REGEXP_FASTMAP_SIZE];
1034 Lisp_Char_Table *syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
1039 limit = forwardp ? BUF_ZV(buf) : BUF_BEGV(buf);
1041 CHECK_INT_COERCE_MARKER(lim);
1044 /* In any case, don't allow scan outside bounds of buffer. */
1045 if (limit > BUF_ZV(buf))
1046 limit = BUF_ZV(buf);
1047 if (limit < BUF_BEGV(buf))
1048 limit = BUF_BEGV(buf);
1051 CHECK_STRING(string);
1052 p = XSTRING_DATA(string);
1053 pend = p + XSTRING_LENGTH(string);
1054 memset(fastmap, 0, sizeof(fastmap));
1056 Fclear_range_table(Vskip_chars_range_table);
1058 if (p != pend && *p == '^') {
1063 /* Find the characters specified and set their elements of fastmap.
1064 If syntaxp, each character counts as itself.
1065 Otherwise, handle backslashes and ranges specially */
1068 c = charptr_emchar(p);
1071 if (c < REGEXP_FASTMAP_SIZE
1072 && syntax_spec_code[c] < (unsigned char)Smax)
1075 signal_simple_error("Invalid syntax designator",
1081 c = charptr_emchar(p);
1084 if (p != pend && *p == '-') {
1087 /* Skip over the dash. */
1091 cend = charptr_emchar(p);
1092 while (c <= cend && c < REGEXP_FASTMAP_SIZE) {
1097 Fput_range_table(make_int(c),
1099 Vskip_chars_range_table);
1102 if (c < REGEXP_FASTMAP_SIZE)
1105 Fput_range_table(make_int(c),
1107 Vskip_chars_range_table);
1112 /* #### Not in FSF 21.1 */
1113 if (syntaxp && fastmap['-'] != 0)
1116 /* If ^ was the first character, complement the fastmap.
1117 We don't complement the range table, however; we just use negate
1118 in the comparisons below. */
1121 for (i = 0; i < (int)(sizeof fastmap); i++)
1125 Bufpos start_point = BUF_PT(buf);
1126 Bufpos pos = start_point;
1127 Bytind pos_byte = BI_BUF_PT(buf);
1130 SETUP_SYNTAX_CACHE_FOR_BUFFER(buf, pos,
1132 /* All syntax designators are normal chars so nothing strange
1136 while (fastmap[(unsigned char)
1138 [(int)SYNTAX_FROM_CACHE
1140 BI_BUF_FETCH_CHAR(buf,
1144 INC_BYTIND(buf, pos_byte);
1147 UPDATE_SYNTAX_CACHE_FORWARD
1151 while (pos > limit) {
1152 Bufpos savepos = pos_byte;
1154 DEC_BYTIND(buf, pos_byte);
1155 UPDATE_SYNTAX_CACHE_BACKWARD(pos);
1156 if (!fastmap[(unsigned char)
1158 [(int)SYNTAX_FROM_CACHE
1160 BI_BUF_FETCH_CHAR(buf,
1171 while (pos < limit) {
1173 BI_BUF_FETCH_CHAR(buf, pos_byte);
1175 REGEXP_FASTMAP_SIZE) ? fastmap[ch]
1179 Vskip_chars_range_table, Qnil))
1182 INC_BYTIND(buf, pos_byte);
1187 while (pos > limit) {
1188 Bufpos prev_pos_byte = pos_byte;
1191 DEC_BYTIND(buf, prev_pos_byte);
1192 ch = BI_BUF_FETCH_CHAR(buf,
1195 REGEXP_FASTMAP_SIZE) ? fastmap[ch]
1199 Vskip_chars_range_table, Qnil))
1202 pos_byte = prev_pos_byte;
1209 BOTH_BUF_SET_PT(buf, pos, pos_byte);
1210 return make_int(BUF_PT(buf) - start_point);
1214 DEFUN("skip-chars-forward", Fskip_chars_forward, 1, 3, 0, /*
1215 Move point forward, stopping before a char not in STRING, or at pos LIMIT.
1216 STRING is like the inside of a `[...]' in a regular expression
1217 except that `]' is never special and `\\' quotes `^', `-' or `\\'.
1218 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1219 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1220 Returns the distance traveled, either zero or positive.
1222 Optional argument BUFFER defaults to the current buffer.
1224 (string, limit, buffer))
1226 return skip_chars(decode_buffer(buffer, 0), 1, 0, string, limit);
1229 DEFUN("skip-chars-backward", Fskip_chars_backward, 1, 3, 0, /*
1230 Move point backward, stopping after a char not in STRING, or at pos LIMIT.
1231 See `skip-chars-forward' for details.
1232 Returns the distance traveled, either zero or negative.
1234 Optional argument BUFFER defaults to the current buffer.
1236 (string, limit, buffer))
1238 return skip_chars(decode_buffer(buffer, 0), 0, 0, string, limit);
1241 DEFUN("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0, /*
1242 Move point forward across chars in specified syntax classes.
1243 SYNTAX is a string of syntax code characters.
1244 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT.
1245 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1246 This function returns the distance traveled, either zero or positive.
1248 Optional argument BUFFER defaults to the current buffer.
1250 (syntax, limit, buffer))
1252 return skip_chars(decode_buffer(buffer, 0), 1, 1, syntax, limit);
1255 DEFUN("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0, /*
1256 Move point backward across chars in specified syntax classes.
1257 SYNTAX is a string of syntax code characters.
1258 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT.
1259 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1260 This function returns the distance traveled, either zero or negative.
1262 Optional argument BUFFER defaults to the current buffer.
1264 (syntax, limit, buffer))
1266 return skip_chars(decode_buffer(buffer, 0), 0, 1, syntax, limit);
1269 /* Subroutines of Lisp buffer search functions. */
1272 search_command(Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
1273 Lisp_Object count, Lisp_Object buffer, int direction,
1276 /* This function has been Mule-ized, except for the trt table handling. */
1279 EMACS_INT n = direction;
1287 buf = decode_buffer(buffer, 0);
1288 CHECK_STRING(string);
1290 lim = n > 0 ? BUF_ZV(buf) : BUF_BEGV(buf);
1292 CHECK_INT_COERCE_MARKER(limit);
1294 if (n > 0 ? lim < BUF_PT(buf) : lim > BUF_PT(buf))
1295 error("Invalid search limit (wrong side of point)");
1296 if (lim > BUF_ZV(buf))
1298 if (lim < BUF_BEGV(buf))
1299 lim = BUF_BEGV(buf);
1302 np = search_buffer(buf, string, BUF_PT(buf), lim, n, RE,
1303 (!NILP(buf->case_fold_search)
1304 ? XCASE_TABLE_CANON(buf->case_table)
1305 : Qnil), (!NILP(buf->case_fold_search)
1306 ? XCASE_TABLE_EQV(buf->case_table)
1311 return signal_failure(string);
1312 if (!EQ(noerror, Qt)) {
1313 if (lim < BUF_BEGV(buf) || lim > BUF_ZV(buf))
1315 BUF_SET_PT(buf, lim);
1317 #if 0 /* This would be clean, but maybe programs depend on
1318 a value of nil here. */
1325 if (np < BUF_BEGV(buf) || np > BUF_ZV(buf))
1328 BUF_SET_PT(buf, np);
1330 return make_int(np);
1333 static int trivial_regexp_p(Lisp_Object regexp)
1335 /* This function has been Mule-ized. */
1336 Bytecount len = XSTRING_LENGTH(regexp);
1337 Bufbyte *s = XSTRING_DATA(regexp);
1338 while (--len >= 0) {
1340 /* ']' doesn't appear here because it's only special after ] */
1370 /* 97/2/25 jhod Added for category matches */
1394 /* Search for the n'th occurrence of STRING in BUF,
1395 starting at position BUFPOS and stopping at position BUFLIM,
1396 treating PAT as a literal string if RE is false or as
1397 a regular expression if RE is true.
1399 If N is positive, searching is forward and BUFLIM must be greater
1401 If N is negative, searching is backward and BUFLIM must be less
1404 Returns -x if only N-x occurrences found (x > 0),
1405 or else the position at the beginning of the Nth occurrence
1406 (if searching backward) or the end (if searching forward).
1408 POSIX is nonzero if we want full backtracking (POSIX style)
1409 for this pattern. 0 means backtrack only enough to get a valid match. */
1411 search_buffer(struct buffer *buf, Lisp_Object string, Bufpos bufpos,
1412 Bufpos buflim, EMACS_INT n, int RE, Lisp_Object trt,
1413 Lisp_Object inverse_trt, int posix)
1415 /* This function has been Mule-ized, except for the trt table handling. */
1416 Bytecount len = XSTRING_LENGTH(string);
1417 Bufbyte *base_pat = XSTRING_DATA(string);
1418 REGISTER EMACS_INT i, j;
1423 if (running_asynch_code)
1426 /* Null string is found at starting position. */
1428 set_search_regs(buf, bufpos, 0);
1429 clear_unused_search_regs(&search_regs, 0);
1433 /* Searching 0 times means noop---don't move, don't touch registers. */
1437 pos = bufpos_to_bytind(buf, bufpos);
1438 lim = bufpos_to_bytind(buf, buflim);
1439 if (RE && !trivial_regexp_p(string)) {
1440 struct re_pattern_buffer *bufp;
1442 bufp = compile_pattern(string, &search_regs, trt, posix,
1445 /* Get pointers and sizes of the two strings
1446 that make up the visible portion of the buffer. */
1448 p1 = BI_BUF_BEGV(buf);
1449 p2 = BI_BUF_CEILING_OF(buf, p1);
1451 s2 = BI_BUF_ZV(buf) - p2;
1452 regex_match_object = Qnil;
1457 regex_emacs_buffer = buf;
1458 val = re_search_2(bufp,
1459 (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
1460 s1, (char *)BI_BUF_BYTE_ADDRESS(buf,
1462 s2, pos - BI_BUF_BEGV(buf), lim - pos,
1463 &search_regs, pos - BI_BUF_BEGV(buf));
1469 int num_regs = search_regs.num_regs;
1470 j = BI_BUF_BEGV(buf);
1471 for (i = 0; i < num_regs; i++)
1472 if (search_regs.start[i] >= 0) {
1473 search_regs.start[i] += j;
1474 search_regs.end[i] += j;
1476 /* re_match (called from re_search et al) does this for us */
1477 /* clear_unused_search_regs (search_regs, bufp->no_sub); */
1478 XSETBUFFER(last_thing_searched, buf);
1479 /* Set pos to the new position. */
1480 pos = search_regs.start[0];
1481 fixup_search_regs_for_buffer(buf);
1482 /* And bufpos too. */
1483 bufpos = search_regs.start[0];
1492 regex_emacs_buffer = buf;
1493 val = re_search_2(bufp,
1494 (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
1495 s1, (char *)BI_BUF_BYTE_ADDRESS(buf,
1497 s2, pos - BI_BUF_BEGV(buf), lim - pos,
1498 &search_regs, lim - BI_BUF_BEGV(buf));
1503 int num_regs = search_regs.num_regs;
1504 j = BI_BUF_BEGV(buf);
1505 for (i = 0; i < num_regs; i++)
1506 if (search_regs.start[i] >= 0) {
1507 search_regs.start[i] += j;
1508 search_regs.end[i] += j;
1510 /* re_match (called from re_search et al) does this for us */
1511 /* clear_unused_search_regs (search_regs, bufp->no_sub); */
1512 XSETBUFFER(last_thing_searched, buf);
1513 /* Set pos to the new position. */
1514 pos = search_regs.end[0];
1515 fixup_search_regs_for_buffer(buf);
1516 /* And bufpos too. */
1517 bufpos = search_regs.end[0];
1524 } else { /* non-RE case */
1526 int charset_base = -1;
1527 int boyer_moore_ok = 1;
1529 Bufbyte *patbuf = alloca_array(Bufbyte, len * MAX_EMCHAR_LEN);
1533 Bufbyte tmp_str[MAX_EMCHAR_LEN];
1534 Emchar c, translated, inverse;
1535 Bytecount orig_bytelen, new_bytelen, inv_bytelen;
1537 /* If we got here and the RE flag is set, it's because
1538 we're dealing with a regexp known to be trivial, so the
1539 backslash just quotes the next character. */
1540 if (RE && *base_pat == '\\') {
1544 c = charptr_emchar(base_pat);
1545 translated = TRANSLATE(trt, c);
1546 inverse = TRANSLATE(inverse_trt, c);
1548 orig_bytelen = charcount_to_bytecount(base_pat, 1);
1549 inv_bytelen = set_charptr_emchar(tmp_str, inverse);
1550 new_bytelen = set_charptr_emchar(tmp_str, translated);
1552 if (new_bytelen != orig_bytelen
1553 || inv_bytelen != orig_bytelen)
1555 if (translated != c || inverse != c) {
1556 /* Keep track of which character set row
1557 contains the characters that need translation. */
1558 int charset_base_code = c & ~CHAR_FIELD3_MASK;
1559 if (charset_base == -1)
1560 charset_base = charset_base_code;
1561 else if (charset_base != charset_base_code)
1562 /* If two different rows appear, needing translation,
1563 then we cannot use boyer_moore search. */
1566 memcpy(pat, tmp_str, new_bytelen);
1568 base_pat += orig_bytelen;
1569 len -= orig_bytelen;
1571 #else /* not MULE */
1572 while (--len >= 0) {
1573 /* If we got here and the RE flag is set, it's because
1574 we're dealing with a regexp known to be trivial, so the
1575 backslash just quotes the next character. */
1576 if (RE && *base_pat == '\\') {
1580 *pat++ = TRANSLATE(trt, *base_pat++);
1584 pat = base_pat = patbuf;
1586 return boyer_moore(buf, base_pat, len, pos, lim, n,
1587 trt, inverse_trt, charset_base);
1589 return simple_search(buf, base_pat, len, pos, lim, n,
1594 /* Do a simple string search N times for the string PAT,
1595 whose length is LEN/LEN_BYTE,
1596 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1597 TRT is the translation table.
1599 Return the character position where the match is found.
1600 Otherwise, if M matches remained to be found, return -M.
1602 This kind of search works regardless of what is in PAT and
1603 regardless of what is in TRT. It is used in cases where
1604 boyer_moore cannot work. */
1607 simple_search(struct buffer *buf, Bufbyte * base_pat, Bytecount len_byte,
1608 Bytind idx, Bytind lim, EMACS_INT n, Lisp_Object trt)
1610 int forward = n > 0;
1611 Bytecount buf_len = 0; /* Shut up compiler. */
1616 Bytecount this_len = len_byte;
1617 Bytind this_idx = idx;
1618 const Bufbyte *p = base_pat;
1622 while (this_len > 0) {
1623 Emchar pat_ch, buf_ch;
1626 pat_ch = charptr_emchar(p);
1628 BI_BUF_FETCH_CHAR(buf, this_idx);
1630 buf_ch = TRANSLATE(trt, buf_ch);
1632 if (buf_ch != pat_ch)
1635 pat_len = charcount_to_bytecount(p, 1);
1637 this_len -= pat_len;
1638 INC_BYTIND(buf, this_idx);
1640 if (this_len == 0) {
1641 buf_len = this_idx - idx;
1645 INC_BYTIND(buf, idx);
1652 Bytecount this_len = len_byte;
1653 Bytind this_idx = idx;
1654 const Bufbyte *p = base_pat + len_byte;
1660 while (this_len > 0) {
1661 Emchar pat_ch, buf_ch;
1664 DEC_BYTIND(buf, this_idx);
1665 pat_ch = charptr_emchar(p);
1667 BI_BUF_FETCH_CHAR(buf, this_idx);
1669 buf_ch = TRANSLATE(trt, buf_ch);
1671 if (buf_ch != pat_ch)
1675 charcount_to_bytecount(p, 1);
1677 if (this_len == 0) {
1678 buf_len = idx - this_idx;
1682 DEC_BYTIND(buf, idx);
1689 Bufpos beg, end, retval;
1691 beg = bytind_to_bufpos(buf, idx - buf_len);
1692 retval = end = bytind_to_bufpos(buf, idx);
1694 retval = beg = bytind_to_bufpos(buf, idx);
1695 end = bytind_to_bufpos(buf, idx + buf_len);
1697 set_search_regs(buf, beg, end - beg);
1698 clear_unused_search_regs(&search_regs, 0);
1708 /* Do Boyer-Moore search N times for the string PAT,
1709 whose length is LEN/LEN_BYTE,
1710 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1711 DIRECTION says which direction we search in.
1712 TRT and INVERSE_TRT are translation tables.
1714 This kind of search works if all the characters in PAT that have
1715 nontrivial translation are the same aside from the last byte. This
1716 makes it possible to translate just the last byte of a character,
1717 and do so after just a simple test of the context.
1719 If that criterion is not satisfied, do not call this function. */
1722 boyer_moore(struct buffer *buf, Bufbyte * base_pat, Bytecount len,
1723 Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
1724 Lisp_Object inverse_trt, int charset_base)
1726 /* #### Someone really really really needs to comment the workings
1727 of this junk somewhat better.
1729 BTW "BM" stands for Boyer-Moore, which is one of the standard
1730 string-searching algorithms. It's the best string-searching
1731 algorithm out there, provided that:
1733 a) You're not fazed by algorithm complexity. (Rabin-Karp, which
1734 uses hashing, is much much easier to code but not as fast.)
1735 b) You can freely move backwards in the string that you're
1738 As the comment below tries to explain (but garbles in typical
1739 programmer-ese), the idea is that you don't have to do a
1740 string match at every successive position in the text. For
1741 example, let's say the pattern is "a very long string". We
1742 compare the last character in the string (`g') with the
1743 corresponding character in the text. If it mismatches, and
1744 it is, say, `z', then we can skip forward by the entire
1745 length of the pattern because `z' does not occur anywhere
1746 in the pattern. If the mismatching character does occur
1747 in the pattern, we can usually still skip forward by more
1748 than one: e.g. if it is `l', then we can skip forward
1749 by the length of the substring "ong string" -- i.e. the
1750 largest end section of the pattern that does not contain
1751 the mismatched character. So what we do is compute, for
1752 each possible character, the distance we can skip forward
1753 (the "stride") and use it in the string matching. This
1754 is what the BM_tab holds. */
1755 REGISTER EMACS_INT *BM_tab;
1756 EMACS_INT *BM_tab_base;
1757 REGISTER Bytecount dirlen;
1760 Bytecount stride_for_teases = 0;
1761 REGISTER EMACS_INT i, j;
1762 Bufbyte *pat, *pat_end;
1763 REGISTER Bufbyte *cursor, *p_limit, *ptr2;
1764 Bufbyte simple_translate[REGEXP_FASTMAP_SIZE];
1765 REGISTER int direction = ((n > 0) ? 1 : -1);
1767 Bufbyte translate_prev_byte = 0;
1768 Bufbyte translate_anteprev_byte = 0;
1771 EMACS_INT BM_tab_space[REGEXP_FASTMAP_SIZE];
1772 BM_tab = &BM_tab_space[0];
1774 BM_tab = alloca_array(EMACS_INT, 256);
1777 /* The general approach is that we are going to maintain that we
1778 know the first (closest to the present position, in whatever
1779 direction we're searching) character that could possibly be
1780 the last (furthest from present position) character of a
1781 valid match. We advance the state of our knowledge by
1782 looking at that character and seeing whether it indeed
1783 matches the last character of the pattern. If it does, we
1784 take a closer look. If it does not, we move our pointer (to
1785 putative last characters) as far as is logically possible.
1786 This amount of movement, which I call a stride, will be the
1787 length of the pattern if the actual character appears nowhere
1788 in the pattern, otherwise it will be the distance from the
1789 last occurrence of that character to the end of the pattern.
1790 As a coding trick, an enormous stride is coded into the table
1791 for characters that match the last character. This allows
1792 use of only a single test, a test for having gone past the
1793 end of the permissible match region, to test for both
1794 possible matches (when the stride goes past the end
1795 immediately) and failure to match (where you get nudged past
1796 the end one stride at a time).
1798 Here we make a "mickey mouse" BM table. The stride of the
1799 search is determined only by the last character of the
1800 putative match. If that character does not match, we will
1801 stride the proper distance to propose a match that
1802 superimposes it on the last instance of a character that
1803 matches it (per trt), or misses it entirely if there is
1806 dirlen = len * direction;
1807 infinity = dirlen - (lim + pos + len + len) * direction;
1808 /* Record position after the end of the pattern. */
1809 pat_end = base_pat + len;
1811 base_pat = pat_end - 1;
1812 BM_tab_base = BM_tab;
1813 BM_tab += REGEXP_FASTMAP_SIZE;
1814 j = dirlen; /* to get it in a register */
1815 /* A character that does not appear in the pattern induces a
1816 stride equal to the pattern length. */
1817 while (BM_tab_base != BM_tab) {
1823 /* We use this for translation, instead of TRT itself. We
1824 fill this in to handle the characters that actually occur
1825 in the pattern. Others don't matter anyway! */
1826 xzero(simple_translate);
1827 for (i = 0; i < REGEXP_FASTMAP_SIZE; i++)
1828 simple_translate[i] = (Bufbyte) i;
1830 while (i != infinity) {
1831 Bufbyte *ptr = base_pat + i;
1837 Emchar ch, untranslated;
1838 int this_translated = 1;
1840 /* Is *PTR the last byte of a character? */
1841 if (pat_end - ptr == 1 || BUFBYTE_FIRST_BYTE_P(ptr[1])) {
1842 Bufbyte *charstart = ptr;
1843 while (!BUFBYTE_FIRST_BYTE_P(*charstart))
1845 untranslated = charptr_emchar(charstart);
1847 (untranslated & ~CHAR_FIELD3_MASK)) {
1848 ch = TRANSLATE(trt, untranslated);
1849 if (!BUFBYTE_FIRST_BYTE_P(*ptr)) {
1850 translate_prev_byte = ptr[-1];
1851 if (!BUFBYTE_FIRST_BYTE_P
1852 (translate_prev_byte))
1853 translate_anteprev_byte
1857 this_translated = 0;
1862 this_translated = 0;
1864 if (ch > REGEXP_FASTMAP_SIZE)
1865 j = ((unsigned char)ch | 0200);
1867 j = (unsigned char)ch;
1870 stride_for_teases = BM_tab[j];
1871 BM_tab[j] = dirlen - i;
1872 /* A translation table is accompanied by its inverse --
1873 see comment following downcase_table for details */
1874 if (this_translated) {
1875 Emchar starting_ch = ch;
1876 EMACS_INT starting_j = j;
1878 ch = TRANSLATE(inverse_trt, ch);
1879 if (ch > REGEXP_FASTMAP_SIZE)
1880 j = ((unsigned char)ch | 0200);
1882 j = (unsigned char)ch;
1884 /* For all the characters that map into CH,
1885 set up simple_translate to map the last byte
1887 simple_translate[j] = starting_j;
1888 if (ch == starting_ch)
1890 BM_tab[j] = dirlen - i;
1896 k = (j = TRANSLATE(trt, j));
1898 stride_for_teases = BM_tab[j];
1899 BM_tab[j] = dirlen - i;
1900 /* A translation table is accompanied by its inverse --
1901 see comment following downcase_table for details */
1903 while ((j = TRANSLATE(inverse_trt, j)) != k) {
1904 simple_translate[j] = (Bufbyte) k;
1905 BM_tab[j] = dirlen - i;
1912 stride_for_teases = BM_tab[j];
1913 BM_tab[j] = dirlen - i;
1915 /* stride_for_teases tells how much to stride if we get a
1916 match on the far character but are subsequently
1917 disappointed, by recording what the stride would have been
1918 for that character if the last character had been
1921 infinity = dirlen - infinity;
1922 pos += dirlen - ((direction > 0) ? direction : 0);
1923 /* loop invariant - pos points at where last char (first char if
1924 reverse) of pattern would align in a possible match. */
1927 Bufbyte *tail_end_ptr;
1928 /* It's been reported that some (broken) compiler thinks
1929 that Boolean expressions in an arithmetic context are
1930 unsigned. Using an explicit ?1:0 prevents this. */
1931 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
1932 return n * (0 - direction);
1933 /* First we do the part we can by pointers (maybe
1937 limit = pos - dirlen + direction;
1938 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1939 have changed. See buffer.h. */
1940 limit = ((direction > 0)
1941 ? BI_BUF_CEILING_OF(buf, limit) - 1
1942 : BI_BUF_FLOOR_OF(buf, limit + 1));
1943 /* LIMIT is now the last (not beyond-last!) value POS can
1944 take on without hitting edge of buffer or the gap. */
1945 limit = ((direction > 0)
1946 ? min(lim - 1, min(limit, pos + 20000))
1947 : max(lim, max(limit, pos - 20000)));
1948 tail_end = BI_BUF_CEILING_OF(buf, pos);
1949 tail_end_ptr = BI_BUF_BYTE_ADDRESS(buf, tail_end);
1951 if ((limit - pos) * direction > 20) {
1952 p_limit = BI_BUF_BYTE_ADDRESS(buf, limit);
1953 ptr2 = (cursor = BI_BUF_BYTE_ADDRESS(buf, pos));
1954 /* In this loop, pos + cursor - ptr2 is the surrogate
1956 while (1) { /* use one cursor setting as long as i can */
1957 if (direction > 0) { /* worth duplicating */
1958 /* Use signed comparison if appropriate to make
1959 cursor+infinity sure to be > p_limit.
1960 Assuming that the buffer lies in a range of
1961 addresses that are all "positive" (as ints)
1962 or all "negative", either kind of comparison
1963 will work as long as we don't step by
1964 infinity. So pick the kind that works when
1965 we do step by infinity. */
1966 if ((EMACS_INT) (p_limit + infinity) >
1967 (EMACS_INT) p_limit)
1968 while ((EMACS_INT) cursor <=
1969 (EMACS_INT) p_limit)
1973 while ((EMACS_UINT) cursor <=
1974 (EMACS_UINT) p_limit)
1978 if ((EMACS_INT) (p_limit + infinity) <
1979 (EMACS_INT) p_limit)
1980 while ((EMACS_INT) cursor >=
1981 (EMACS_INT) p_limit)
1985 while ((EMACS_UINT) cursor >=
1986 (EMACS_UINT) p_limit)
1990 /* If you are here, cursor is beyond the end of the
1991 searched region. This can happen if you match on
1992 the far character of the pattern, because the
1993 "stride" of that character is infinity, a number
1994 able to throw you well beyond the end of the
1995 search. It can also happen if you fail to match
1996 within the permitted region and would otherwise
1997 try a character beyond that region */
1998 if ((cursor - p_limit) * direction <= len)
1999 break; /* a small overrun is genuine */
2000 cursor -= infinity; /* large overrun = hit */
2001 i = dirlen - direction;
2004 direction) + direction != 0) {
2007 cursor -= direction;
2008 /* Translate only the last byte of a character. */
2009 if ((cursor == tail_end_ptr
2011 BUFBYTE_FIRST_BYTE_P(cursor
2014 (BUFBYTE_FIRST_BYTE_P
2016 || (translate_prev_byte ==
2019 (BUFBYTE_FIRST_BYTE_P
2020 (translate_prev_byte)
2022 translate_anteprev_byte
2024 ch = simple_translate
2040 direction) + direction != 0)
2042 *(cursor -= direction))
2045 cursor += dirlen - i - direction; /* fix cursor */
2046 if (i + direction == 0) {
2047 cursor -= direction;
2051 (pos + cursor - ptr2 +
2055 bytind_to_bufpos(buf,
2058 bytind_to_bufpos(buf,
2062 set_search_regs(buf, bufstart,
2065 clear_unused_search_regs
2069 if ((n -= direction) != 0)
2070 cursor += dirlen; /* to resume search */
2072 return ((direction > 0)
2074 end[0] : search_regs.
2077 cursor += stride_for_teases; /* <sigh> we lose - */
2079 pos += cursor - ptr2;
2081 /* Now we'll pick up a clump that has to be done the hard
2082 way because it covers a discontinuity */
2084 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
2085 have changed. See buffer.h. */
2086 limit = ((direction > 0)
2087 ? BI_BUF_CEILING_OF(buf, pos - dirlen + 1) - 1
2088 : BI_BUF_FLOOR_OF(buf, pos - dirlen));
2089 limit = ((direction > 0)
2090 ? min(limit + len, lim - 1)
2091 : max(limit - len, lim));
2092 /* LIMIT is now the last value POS can have
2093 and still be valid for a possible match. */
2095 /* This loop can be coded for space rather than
2096 speed because it will usually run only once.
2097 (the reach is at most len + 21, and typically
2098 does not exceed len) */
2099 while ((limit - pos) * direction >= 0)
2100 /* *not* BI_BUF_FETCH_CHAR. We are working here
2101 with bytes, not characters. */
2103 BM_tab[*BI_BUF_BYTE_ADDRESS
2105 /* now run the same tests to distinguish going off
2106 the end, a match or a phony match. */
2107 if ((pos - limit) * direction <= len)
2108 break; /* ran off the end */
2109 /* Found what might be a match.
2110 Set POS back to last (first if reverse) char pos. */
2112 i = dirlen - direction;
2113 while ((i -= direction) + direction != 0) {
2120 ptr = BI_BUF_BYTE_ADDRESS(buf, pos);
2121 if ((ptr == tail_end_ptr
2122 || BUFBYTE_FIRST_BYTE_P(ptr[1]))
2123 && (BUFBYTE_FIRST_BYTE_P(ptr[0])
2124 || (translate_prev_byte ==
2127 (BUFBYTE_FIRST_BYTE_P
2128 (translate_prev_byte)
2129 || translate_anteprev_byte
2131 ch = simple_translate[*ptr];
2138 if (pat[i] != TRANSLATE(trt,
2139 *BI_BUF_BYTE_ADDRESS
2144 /* Above loop has moved POS part or all the way back
2145 to the first char pos (last char pos if reverse).
2146 Set it once again at the last (first if reverse)
2148 pos += dirlen - i - direction;
2149 if (i + direction == 0) {
2153 Bytind bytstart = (pos +
2159 bytind_to_bufpos(buf,
2162 bytind_to_bufpos(buf,
2166 set_search_regs(buf, bufstart,
2169 clear_unused_search_regs
2173 if ((n -= direction) != 0)
2174 pos += dirlen; /* to resume search */
2176 return ((direction > 0)
2178 end[0] : search_regs.
2181 pos += stride_for_teases;
2184 /* We have done one clump. Can we continue? */
2185 if ((lim - pos) * direction < 0)
2186 return (0 - n) * direction;
2188 return bytind_to_bufpos(buf, pos);
2191 /* Record the whole-match data (beginning BEG and end BEG + LEN) and the
2192 buffer for a match just found. */
2194 static void set_search_regs(struct buffer *buf, Bufpos beg, Charcount len)
2196 /* This function has been Mule-ized. */
2197 /* Make sure we have registers in which to store
2198 the match position. */
2199 if (search_regs.num_regs == 0) {
2200 search_regs.start = xnew_atomic(regoff_t);
2201 search_regs.end = xnew_atomic(regoff_t);
2202 search_regs.num_regs = 1;
2205 search_regs.start[0] = beg;
2206 search_regs.end[0] = beg + len;
2207 XSETBUFFER(last_thing_searched, buf);
2210 /* Clear unused search registers so match data will be null.
2211 REGP is a pointer to the register structure to clear, usually the global
2213 NO_SUB is the number of subexpressions to allow for. (Does not count
2214 the whole match, ie, for a string search NO_SUB == 0.)
2215 It is an error if NO_SUB > REGP.num_regs - 1. */
2217 static void clear_unused_search_regs(struct re_registers *regp, int no_sub)
2219 /* This function has been Mule-ized. */
2222 assert(no_sub >= 0 && no_sub < regp->num_regs);
2223 for (i = no_sub + 1; i < regp->num_regs; i++)
2224 regp->start[i] = regp->end[i] = -1;
2227 /* Given a string of words separated by word delimiters,
2228 compute a regexp that matches those exact words
2229 separated by arbitrary punctuation. */
2231 static Lisp_Object wordify(Lisp_Object buffer, Lisp_Object string)
2234 EMACS_INT punct_count = 0, word_count = 0;
2235 struct buffer *buf = decode_buffer(buffer, 0);
2236 Lisp_Char_Table *syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
2238 CHECK_STRING(string);
2239 len = XSTRING_CHAR_LENGTH(string);
2241 for (i = 0; i < len; i++)
2243 (syntax_table, string_char(XSTRING(string), i))) {
2245 if (i > 0 && WORD_SYNTAX_P(syntax_table,
2246 string_char(XSTRING(string),
2250 if (WORD_SYNTAX_P(syntax_table, string_char(XSTRING(string), len - 1)))
2253 return build_string("");
2256 /* The following value is an upper bound on the amount of storage we
2257 need. In non-Mule, it is exact. */
2259 (Bufbyte *) alloca(XSTRING_LENGTH(string) - punct_count +
2260 5 * (word_count - 1) + 4);
2261 Bufbyte *o = storage;
2266 for (i = 0; i < len; i++) {
2267 Emchar ch = string_char(XSTRING(string), i);
2269 if (WORD_SYNTAX_P(syntax_table, ch))
2270 o += set_charptr_emchar(o, ch);
2272 && WORD_SYNTAX_P(syntax_table,
2273 string_char(XSTRING(string),
2287 return make_string(storage, o - storage);
2291 DEFUN("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ", /*
2292 Search backward from point for STRING.
2293 Set point to the beginning of the occurrence found, and return point.
2295 Optional second argument LIMIT bounds the search; it is a buffer
2296 position. The match found must not extend before that position.
2297 The value nil is equivalent to (point-min).
2299 Optional third argument NOERROR, if t, means just return nil (no
2300 error) if the search fails. If neither nil nor t, set point to LIMIT
2303 Optional fourth argument COUNT is a repeat count--search for
2304 successive occurrences.
2306 Optional fifth argument BUFFER specifies the buffer to search in and
2307 defaults to the current buffer.
2309 See also the functions `match-beginning', `match-end' and `replace-match'.
2311 (string, limit, noerror, count, buffer))
2313 return search_command(string, limit, noerror, count, buffer, -1, 0, 0);
2316 DEFUN("search-forward", Fsearch_forward, 1, 5, "sSearch: ", /*
2317 Search forward from point for STRING.
2318 Set point to the end of the occurrence found, and return point.
2320 Optional second argument LIMIT bounds the search; it is a buffer
2321 position. The match found must not extend after that position. The
2322 value nil is equivalent to (point-max).
2324 Optional third argument NOERROR, if t, means just return nil (no
2325 error) if the search fails. If neither nil nor t, set point to LIMIT
2328 Optional fourth argument COUNT is a repeat count--search for
2329 successive occurrences.
2331 Optional fifth argument BUFFER specifies the buffer to search in and
2332 defaults to the current buffer.
2334 See also the functions `match-beginning', `match-end' and `replace-match'.
2336 (string, limit, noerror, count, buffer))
2338 return search_command(string, limit, noerror, count, buffer, 1, 0, 0);
2341 DEFUN("word-search-backward", Fword_search_backward, 1, 5, "sWord search backward: ", /*
2342 Search backward from point for STRING, ignoring differences in punctuation.
2343 Set point to the beginning of the occurrence found, and return point.
2345 Optional second argument LIMIT bounds the search; it is a buffer
2346 position. The match found must not extend before that position.
2347 The value nil is equivalent to (point-min).
2349 Optional third argument NOERROR, if t, means just return nil (no
2350 error) if the search fails. If neither nil nor t, set point to LIMIT
2353 Optional fourth argument COUNT is a repeat count--search for
2354 successive occurrences.
2356 Optional fifth argument BUFFER specifies the buffer to search in and
2357 defaults to the current buffer.
2359 See also the functions `match-beginning', `match-end' and `replace-match'.
2361 (string, limit, noerror, count, buffer))
2363 return search_command(wordify(buffer, string), limit, noerror, count,
2367 DEFUN("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ", /*
2368 Search forward from point for STRING, ignoring differences in punctuation.
2369 Set point to the end of the occurrence found, and return point.
2371 Optional second argument LIMIT bounds the search; it is a buffer
2372 position. The match found must not extend after that position. The
2373 value nil is equivalent to (point-max).
2375 Optional third argument NOERROR, if t, means just return nil (no
2376 error) if the search fails. If neither nil nor t, set point to LIMIT
2379 Optional fourth argument COUNT is a repeat count--search for
2380 successive occurrences.
2382 Optional fifth argument BUFFER specifies the buffer to search in and
2383 defaults to the current buffer.
2385 See also the functions `match-beginning', `match-end' and `replace-match'.
2387 (string, limit, noerror, count, buffer))
2389 return search_command(wordify(buffer, string), limit, noerror, count,
2393 DEFUN("re-search-backward", Fre_search_backward, 1, 5, "sRE search backward: ", /*
2394 Search backward from point for match for regular expression REGEXP.
2395 Set point to the beginning of the match, and return point.
2396 The match found is the one starting last in the buffer
2397 and yet ending before the origin of the search.
2399 Optional second argument LIMIT bounds the search; it is a buffer
2400 position. The match found must not extend before that position.
2401 The value nil is equivalent to (point-min).
2403 Optional third argument NOERROR, if t, means just return nil (no
2404 error) if the search fails. If neither nil nor t, set point to LIMIT
2407 Optional fourth argument COUNT is a repeat count--search for
2408 successive occurrences.
2410 Optional fifth argument BUFFER specifies the buffer to search in and
2411 defaults to the current buffer.
2413 See also the functions `match-beginning', `match-end' and `replace-match'.
2415 (regexp, limit, noerror, count, buffer))
2417 return search_command(regexp, limit, noerror, count, buffer, -1, 1, 0);
2420 DEFUN("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ", /*
2421 Search forward from point for regular expression REGEXP.
2422 Set point to the end of the occurrence found, and return point.
2424 Optional second argument LIMIT bounds the search; it is a buffer
2425 position. The match found must not extend after that position. The
2426 value nil is equivalent to (point-max).
2428 Optional third argument NOERROR, if t, means just return nil (no
2429 error) if the search fails. If neither nil nor t, set point to LIMIT
2432 Optional fourth argument COUNT is a repeat count--search for
2433 successive occurrences.
2435 Optional fifth argument BUFFER specifies the buffer to search in and
2436 defaults to the current buffer.
2438 See also the functions `match-beginning', `match-end' and `replace-match'.
2440 (regexp, limit, noerror, count, buffer))
2442 return search_command(regexp, limit, noerror, count, buffer, 1, 1, 0);
2445 DEFUN("posix-search-backward", Fposix_search_backward, 1, 5, "sPosix search backward: ", /*
2446 Search backward from point for match for regular expression REGEXP.
2447 Find the longest match in accord with Posix regular expression rules.
2448 Set point to the beginning of the match, and return point.
2449 The match found is the one starting last in the buffer
2450 and yet ending before the origin of the search.
2452 Optional second argument LIMIT bounds the search; it is a buffer
2453 position. The match found must not extend before that position.
2454 The value nil is equivalent to (point-min).
2456 Optional third argument NOERROR, if t, means just return nil (no
2457 error) if the search fails. If neither nil nor t, set point to LIMIT
2460 Optional fourth argument COUNT is a repeat count--search for
2461 successive occurrences.
2463 Optional fifth argument BUFFER specifies the buffer to search in and
2464 defaults to the current buffer.
2466 See also the functions `match-beginning', `match-end' and `replace-match'.
2468 (regexp, limit, noerror, count, buffer))
2470 return search_command(regexp, limit, noerror, count, buffer, -1, 1, 1);
2473 DEFUN("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ", /*
2474 Search forward from point for regular expression REGEXP.
2475 Find the longest match in accord with Posix regular expression rules.
2476 Set point to the end of the occurrence found, and return point.
2478 Optional second argument LIMIT bounds the search; it is a buffer
2479 position. The match found must not extend after that position. The
2480 value nil is equivalent to (point-max).
2482 Optional third argument NOERROR, if t, means just return nil (no
2483 error) if the search fails. If neither nil nor t, set point to LIMIT
2486 Optional fourth argument COUNT is a repeat count--search for
2487 successive occurrences.
2489 Optional fifth argument BUFFER specifies the buffer to search in and
2490 defaults to the current buffer.
2492 See also the functions `match-beginning', `match-end' and `replace-match'.
2494 (regexp, limit, noerror, count, buffer))
2496 return search_command(regexp, limit, noerror, count, buffer, 1, 1, 1);
2499 static Lisp_Object free_created_dynarrs(Lisp_Object cons)
2501 Dynarr_free(get_opaque_ptr(XCAR(cons)));
2502 Dynarr_free(get_opaque_ptr(XCDR(cons)));
2503 free_opaque_ptr(XCAR(cons));
2504 free_opaque_ptr(XCDR(cons));
2505 free_cons(XCONS(cons));
2509 DEFUN("replace-match", Freplace_match, 1, 5, 0, /*
2510 Replace text matched by last search with REPLACEMENT.
2511 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2512 Otherwise maybe capitalize the whole text, or maybe just word initials,
2513 based on the replaced text.
2514 If the replaced text has only capital letters
2515 and has at least one multiletter word, convert REPLACEMENT to all caps.
2516 If the replaced text has at least one word starting with a capital letter,
2517 then capitalize each word in REPLACEMENT.
2519 If third arg LITERAL is non-nil, insert REPLACEMENT literally.
2520 Otherwise treat `\\' as special:
2521 `\\&' in REPLACEMENT means substitute original matched text.
2522 `\\N' means substitute what matched the Nth `\\(...\\)'.
2523 If Nth parens didn't match, substitute nothing.
2524 `\\\\' means insert one `\\'.
2525 `\\u' means upcase the next character.
2526 `\\l' means downcase the next character.
2527 `\\U' means begin upcasing all following characters.
2528 `\\L' means begin downcasing all following characters.
2529 `\\E' means terminate the effect of any `\\U' or `\\L'.
2530 Case changes made with `\\u', `\\l', `\\U', and `\\L' override
2531 all other case changes that may be made in the replaced text.
2532 FIXEDCASE and LITERAL are optional arguments.
2533 Leaves point at end of replacement text.
2535 The optional fourth argument STRING can be a string to modify.
2536 In that case, this function creates and returns a new string
2537 which is made by replacing the part of STRING that was matched.
2538 When fourth argument is a string, fifth argument STRBUFFER specifies
2539 the buffer to be used for syntax-table and case-table lookup and
2540 defaults to the current buffer. When fourth argument is not a string,
2541 the buffer that the match occurred in has automatically been remembered
2542 and you do not need to specify it.
2544 When fourth argument is nil, STRBUFFER specifies a subexpression of
2545 the match. It says to replace just that subexpression instead of the
2546 whole match. This is useful only after a regular expression search or
2547 match since only regular expressions have distinguished subexpressions.
2549 (replacement, fixedcase, literal, string, strbuffer))
2551 /* This function has been Mule-ized. */
2552 /* This function can GC */
2553 enum { nochange, all_caps, cap_initial } case_action;
2555 int some_multiletter_word;
2558 int some_nonuppercase_initial;
2562 Lisp_Char_Table *syntax_table;
2565 int_dynarr *ul_action_dynarr = 0;
2566 int_dynarr *ul_pos_dynarr = 0;
2570 CHECK_STRING(replacement);
2572 if (!NILP(string)) {
2573 CHECK_STRING(string);
2574 if (!EQ(last_thing_searched, Qt))
2575 error("last thing matched was not a string");
2576 /* If the match data
2577 were abstracted into a special "match data" type instead
2578 of the typical half-assed "let the implementation be
2579 visible" form it's in, we could extend it to include
2580 the last string matched and the buffer used for that
2581 matching. But of course we can't change it as it is. */
2582 buf = decode_buffer(strbuffer, 0);
2583 XSETBUFFER(buffer, buf);
2585 if (!NILP(strbuffer)) {
2586 CHECK_INT(strbuffer);
2587 sub = XINT(strbuffer);
2588 if (sub < 0 || sub >= (int)search_regs.num_regs)
2589 args_out_of_range(strbuffer,
2590 make_int(search_regs.
2593 if (!BUFFERP(last_thing_searched))
2594 error("last thing matched was not a buffer");
2595 buffer = last_thing_searched;
2596 buf = XBUFFER(buffer);
2599 syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
2601 case_action = nochange; /* We tried an initialization */
2602 /* but some C compilers blew it */
2604 if (search_regs.num_regs == 0)
2605 error("replace-match called before any match found");
2608 if (search_regs.start[sub] < BUF_BEGV(buf)
2609 || search_regs.start[sub] > search_regs.end[sub]
2610 || search_regs.end[sub] > BUF_ZV(buf))
2611 args_out_of_range(make_int(search_regs.start[sub]),
2612 make_int(search_regs.end[sub]));
2614 if (search_regs.start[0] < 0
2615 || search_regs.start[0] > search_regs.end[0]
2616 || search_regs.end[0] > XSTRING_CHAR_LENGTH(string))
2617 args_out_of_range(make_int(search_regs.start[0]),
2618 make_int(search_regs.end[0]));
2621 if (NILP(fixedcase)) {
2622 /* Decide how to casify by examining the matched text. */
2624 last = search_regs.end[sub];
2626 case_action = all_caps;
2628 /* some_multiletter_word is set nonzero if any original word
2629 is more than one letter long. */
2630 some_multiletter_word = 0;
2632 some_nonuppercase_initial = 0;
2635 for (pos = search_regs.start[sub]; pos < last; pos++) {
2637 c = BUF_FETCH_CHAR(buf, pos);
2639 c = string_char(XSTRING(string), pos);
2641 if (LOWERCASEP(buf, c)) {
2642 /* Cannot be all caps if any original char is lower case */
2645 if (!WORD_SYNTAX_P(syntax_table, prevc))
2646 some_nonuppercase_initial = 1;
2648 some_multiletter_word = 1;
2649 } else if (!NOCASEP(buf, c)) {
2651 if (!WORD_SYNTAX_P(syntax_table, prevc)) ;
2653 some_multiletter_word = 1;
2655 /* If the initial is a caseless word constituent,
2656 treat that like a lowercase initial. */
2657 if (!WORD_SYNTAX_P(syntax_table, prevc))
2658 some_nonuppercase_initial = 1;
2664 /* Convert to all caps if the old text is all caps
2665 and has at least one multiletter word. */
2666 if (!some_lowercase && some_multiletter_word)
2667 case_action = all_caps;
2668 /* Capitalize each word, if the old text has all capitalized words. */
2669 else if (!some_nonuppercase_initial && some_multiletter_word)
2670 case_action = cap_initial;
2671 else if (!some_nonuppercase_initial && some_uppercase)
2672 /* Should x -> yz, operating on X, give Yz or YZ?
2673 We'll assume the latter. */
2674 case_action = all_caps;
2676 case_action = nochange;
2679 /* Do replacement in a string. */
2680 if (!NILP(string)) {
2681 Lisp_Object before, after;
2683 speccount = specpdl_depth();
2685 Fsubstring(string, Qzero, make_int(search_regs.start[0]));
2686 after = Fsubstring(string, make_int(search_regs.end[0]), Qnil);
2688 /* Do case substitution into REPLACEMENT if desired. */
2689 if (NILP(literal)) {
2690 Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2692 /* XEmacs change: rewrote this loop somewhat to make it
2693 cleaner. Also added \U, \E, etc. */
2694 Charcount literal_start = 0;
2695 /* We build up the substituted string in ACCUM. */
2700 /* OK, the basic idea here is that we scan through the
2701 replacement string until we find a backslash, which
2702 represents a substring of the original string to be
2703 substituted. We then append onto ACCUM the literal
2704 text before the backslash (LASTPOS marks the
2705 beginning of this) followed by the substring of the
2706 original string that needs to be inserted. */
2707 for (strpos = 0; strpos < stlen; strpos++) {
2708 /* If LITERAL_END is set, we've encountered a backslash
2709 (the end of literal text to be inserted). */
2710 Charcount literal_end = -1;
2711 /* If SUBSTART is set, we need to also insert the
2712 text from SUBSTART to SUBEND in the original string. */
2713 Charcount substart = -1;
2714 Charcount subend = -1;
2716 c = string_char(XSTRING(replacement), strpos);
2717 if (c == '\\' && strpos < stlen - 1) {
2718 c = string_char(XSTRING(replacement),
2721 literal_end = strpos - 1;
2722 substart = search_regs.start[0];
2723 subend = search_regs.end[0];
2724 } else if (c >= '1' && c <= '9' &&
2726 search_regs.num_regs + '0') {
2728 start[c - '0'] >= 0) {
2738 } else if (c == 'U' || c == 'u'
2739 || c == 'L' || c == 'l'
2741 /* Keep track of all case changes requested, but don't
2742 make them now. Do them later so we override
2744 if (!ul_pos_dynarr) {
2749 record_unwind_protect
2750 (free_created_dynarrs,
2755 (ul_action_dynarr)));
2757 literal_end = strpos - 1;
2758 Dynarr_add(ul_pos_dynarr,
2766 Dynarr_add(ul_action_dynarr, c);
2767 } else if (c == '\\')
2768 /* So we get just one backslash. */
2769 literal_end = strpos;
2771 if (literal_end >= 0) {
2772 Lisp_Object literal_text = Qnil;
2773 Lisp_Object substring = Qnil;
2774 if (literal_end != literal_start)
2776 Fsubstring(replacement,
2781 if (substart >= 0 && subend != substart)
2782 substring = Fsubstring(string,
2787 if (!NILP(literal_text)
2788 || !NILP(substring))
2790 concat3(accum, literal_text,
2792 literal_start = strpos + 1;
2796 if (strpos != literal_start)
2797 /* some literal text at end to be inserted */
2800 Fsubstring(replacement,
2801 make_int(literal_start),
2804 replacement = accum;
2807 /* replacement can be nil. */
2808 if (NILP(replacement))
2809 replacement = build_string("");
2811 if (case_action == all_caps)
2812 replacement = Fupcase(replacement, buffer);
2813 else if (case_action == cap_initial)
2814 replacement = Fupcase_initials(replacement, buffer);
2816 /* Now finally, we need to process the \U's, \E's, etc. */
2817 if (ul_pos_dynarr) {
2819 int cur_action = 'E';
2820 Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2823 for (strpos = 0; strpos < stlen; strpos++) {
2825 string_char(XSTRING(replacement), strpos);
2826 Emchar newchar = -1;
2827 if (i < Dynarr_length(ul_pos_dynarr) &&
2828 strpos == Dynarr_at(ul_pos_dynarr, i)) {
2830 Dynarr_at(ul_action_dynarr, i);
2832 if (new_action == 'u')
2833 newchar = UPCASE(buf, curchar);
2834 else if (new_action == 'l')
2836 DOWNCASE(buf, curchar);
2838 cur_action = new_action;
2840 if (newchar == -1) {
2841 if (cur_action == 'U')
2842 newchar = UPCASE(buf, curchar);
2843 else if (cur_action == 'L')
2845 DOWNCASE(buf, curchar);
2849 if (newchar != curchar)
2850 set_string_char(XSTRING(replacement),
2855 /* frees the Dynarrs if necessary. */
2856 unbind_to(speccount, Qnil);
2857 return concat3(before, replacement, after);
2860 mc_count = begin_multiple_change(buf, search_regs.start[sub],
2861 search_regs.end[sub]);
2863 /* begin_multiple_change() records an unwind-protect, so we need to
2864 record this value now. */
2865 speccount = specpdl_depth();
2867 /* We insert the replacement text before the old text, and then
2868 delete the original text. This means that markers at the
2869 beginning or end of the original will float to the corresponding
2870 position in the replacement. */
2871 BUF_SET_PT(buf, search_regs.start[sub]);
2873 Finsert(1, &replacement);
2875 Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2877 struct gcpro gcpro1;
2878 GCPRO1(replacement);
2879 for (strpos = 0; strpos < stlen; strpos++) {
2880 /* on the first iteration assert(offset==0),
2881 exactly complementing BUF_SET_PT() above.
2882 During the loop, it keeps track of the amount inserted.
2884 Charcount offset = BUF_PT(buf) - search_regs.start[sub];
2886 c = string_char(XSTRING(replacement), strpos);
2887 if (c == '\\' && strpos < stlen - 1) {
2888 /* XXX FIXME: replacing just a substring non-literally
2889 using backslash refs to the match looks dangerous. But
2890 <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe
2891 <duwe@caldera.de> claims Finsert_buffer_substring already
2892 handles this correctly.
2894 c = string_char(XSTRING(replacement), ++strpos);
2896 Finsert_buffer_substring
2898 make_int(search_regs.start[0] +
2900 make_int(search_regs.end[0] +
2902 else if (c >= '1' && c <= '9'
2903 && c <= search_regs.num_regs + '0') {
2904 if (search_regs.start[c - '0'] >= 1)
2905 Finsert_buffer_substring
2907 make_int(search_regs.
2910 make_int(search_regs.
2913 } else if (c == 'U' || c == 'u' || c == 'L'
2914 || c == 'l' || c == 'E') {
2915 /* Keep track of all case changes requested, but don't
2916 make them now. Do them later so we override
2918 if (!ul_pos_dynarr) {
2919 ul_pos_dynarr = Dynarr_new(int);
2922 record_unwind_protect
2923 (free_created_dynarrs,
2924 Fcons(make_opaque_ptr
2927 (ul_action_dynarr)));
2929 Dynarr_add(ul_pos_dynarr, BUF_PT(buf));
2930 Dynarr_add(ul_action_dynarr, c);
2932 buffer_insert_emacs_char(buf, c);
2934 buffer_insert_emacs_char(buf, c);
2939 inslen = BUF_PT(buf) - (search_regs.start[sub]);
2940 buffer_delete_range(buf, search_regs.start[sub] + inslen,
2941 search_regs.end[sub] + inslen, 0);
2943 if (case_action == all_caps)
2944 Fupcase_region(make_int(BUF_PT(buf) - inslen),
2945 make_int(BUF_PT(buf)), buffer);
2946 else if (case_action == cap_initial)
2947 Fupcase_initials_region(make_int(BUF_PT(buf) - inslen),
2948 make_int(BUF_PT(buf)), buffer);
2950 /* Now go through and make all the case changes that were requested
2951 in the replacement string. */
2952 if (ul_pos_dynarr) {
2953 Bufpos eend = BUF_PT(buf);
2955 int cur_action = 'E';
2957 for (pos = BUF_PT(buf) - inslen; pos < eend; pos++) {
2958 Emchar curchar = BUF_FETCH_CHAR(buf, pos);
2959 Emchar newchar = -1;
2960 if (i < Dynarr_length(ul_pos_dynarr) &&
2961 pos == Dynarr_at(ul_pos_dynarr, i)) {
2962 int new_action = Dynarr_at(ul_action_dynarr, i);
2964 if (new_action == 'u')
2965 newchar = UPCASE(buf, curchar);
2966 else if (new_action == 'l')
2967 newchar = DOWNCASE(buf, curchar);
2969 cur_action = new_action;
2971 if (newchar == -1) {
2972 if (cur_action == 'U')
2973 newchar = UPCASE(buf, curchar);
2974 else if (cur_action == 'L')
2975 newchar = DOWNCASE(buf, curchar);
2979 if (newchar != curchar)
2980 buffer_replace_char(buf, pos, newchar, 0, 0);
2984 /* frees the Dynarrs if necessary. */
2985 unbind_to(speccount, Qnil);
2986 end_multiple_change(buf, mc_count);
2991 static Lisp_Object match_limit(Lisp_Object num, int beginningp)
2993 /* This function has been Mule-ized. */
2998 if (n < 0 || search_regs.num_regs <= 0)
2999 args_out_of_range(num, make_int(search_regs.num_regs));
3000 if (n >= search_regs.num_regs || search_regs.start[n] < 0)
3002 return make_int(beginningp ? search_regs.start[n] : search_regs.end[n]);
3005 DEFUN("match-beginning", Fmatch_beginning, 1, 1, 0, /*
3006 Return position of start of text matched by last regexp search.
3007 NUM, specifies which parenthesized expression in the last regexp.
3008 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
3009 Zero means the entire text matched by the whole regexp or whole string.
3013 return match_limit(num, 1);
3016 DEFUN("match-end", Fmatch_end, 1, 1, 0, /*
3017 Return position of end of text matched by last regexp search.
3018 NUM specifies which parenthesized expression in the last regexp.
3019 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
3020 Zero means the entire text matched by the whole regexp or whole string.
3024 return match_limit(num, 0);
3027 DEFUN("match-data", Fmatch_data, 0, 2, 0, /*
3028 Return a list containing all info on what the last regexp search matched.
3029 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
3030 All the elements are markers or nil (nil if the Nth pair didn't match)
3031 if the last match was on a buffer; integers or nil if a string was matched.
3032 Use `store-match-data' to reinstate the data in this list.
3034 If INTEGERS (the optional first argument) is non-nil, always use integers
3035 \(rather than markers) to represent buffer positions.
3036 If REUSE is a list, reuse it as part of the value. If REUSE is long enough
3037 to hold all the values, and if INTEGERS is non-nil, no consing is done.
3041 /* This function has been Mule-ized. */
3042 Lisp_Object tail, prev;
3047 if (NILP(last_thing_searched))
3048 /*error ("match-data called before any match found"); */
3051 data = alloca_array(Lisp_Object, 2 * search_regs.num_regs);
3054 for (i = 0; i < search_regs.num_regs; i++) {
3055 Bufpos start = search_regs.start[i];
3057 if (EQ(last_thing_searched, Qt)
3058 || !NILP(integers)) {
3059 data[2 * i] = make_int(start);
3060 data[2 * i + 1] = make_int(search_regs.end[i]);
3061 } else if (BUFFERP(last_thing_searched)) {
3062 data[2 * i] = Fmake_marker();
3063 Fset_marker(data[2 * i],
3065 last_thing_searched);
3066 data[2 * i + 1] = Fmake_marker();
3067 Fset_marker(data[2 * i + 1],
3068 make_int(search_regs.end[i]),
3069 last_thing_searched);
3071 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
3076 data[2 * i] = data[2 * i + 1] = Qnil;
3079 return Flist(2 * len + 2, data);
3081 /* If REUSE is a list, store as many value elements as will fit
3082 into the elements of REUSE. */
3083 for (prev = Qnil, i = 0, tail = reuse; CONSP(tail);
3084 i++, tail = XCDR(tail)) {
3085 if (i < 2 * len + 2)
3086 XCAR(tail) = data[i];
3092 /* If we couldn't fit all value elements into REUSE,
3093 cons up the rest of them and add them to the end of REUSE. */
3094 if (i < 2 * len + 2)
3095 XCDR(prev) = Flist(2 * len + 2 - i, data + i);
3100 DEFUN("store-match-data", Fstore_match_data, 1, 1, 0, /*
3101 Set internal data on last search match from elements of LIST.
3102 LIST should have been created by calling `match-data' previously.
3106 /* This function has been Mule-ized. */
3108 REGISTER Lisp_Object marker;
3112 if (running_asynch_code)
3115 CONCHECK_LIST(list);
3117 /* Unless we find a marker with a buffer in LIST, assume that this
3118 match data came from a string. */
3119 last_thing_searched = Qt;
3121 /* Allocate registers if they don't already exist. */
3122 length = XINT(Flength(list)) / 2;
3123 num_regs = search_regs.num_regs;
3125 if (length > num_regs) {
3126 if (search_regs.num_regs == 0) {
3127 search_regs.start = xnew_atomic_array(regoff_t, length);
3128 search_regs.end = xnew_atomic_array(regoff_t, length);
3130 XREALLOC_ARRAY(search_regs.start, regoff_t, length);
3131 XREALLOC_ARRAY(search_regs.end, regoff_t, length);
3134 search_regs.num_regs = length;
3137 for (i = 0; i < num_regs; i++) {
3138 marker = Fcar(list);
3140 search_regs.start[i] = -1;
3143 if (MARKERP(marker)) {
3144 if (XMARKER(marker)->buffer == 0)
3147 XSETBUFFER(last_thing_searched,
3148 XMARKER(marker)->buffer);
3151 CHECK_INT_COERCE_MARKER(marker);
3152 search_regs.start[i] = XINT(marker);
3155 marker = Fcar(list);
3156 if (MARKERP(marker) && XMARKER(marker)->buffer == 0)
3159 CHECK_INT_COERCE_MARKER(marker);
3160 search_regs.end[i] = XINT(marker);
3168 /* If non-zero the match data have been saved in saved_search_regs
3169 during the execution of a sentinel or filter. */
3170 static int search_regs_saved;
3171 static struct re_registers saved_search_regs;
3173 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3174 if asynchronous code (filter or sentinel) is running. */
3175 static void save_search_regs(void)
3177 if (!search_regs_saved) {
3178 saved_search_regs.num_regs = search_regs.num_regs;
3179 saved_search_regs.start = search_regs.start;
3180 saved_search_regs.end = search_regs.end;
3181 search_regs.num_regs = 0;
3182 search_regs.start = 0;
3183 search_regs.end = 0;
3185 search_regs_saved = 1;
3189 /* Called upon exit from filters and sentinels. */
3190 void restore_match_data(void)
3192 if (search_regs_saved) {
3193 if (search_regs.num_regs > 0) {
3194 xfree(search_regs.start);
3195 xfree(search_regs.end);
3197 search_regs.num_regs = saved_search_regs.num_regs;
3198 search_regs.start = saved_search_regs.start;
3199 search_regs.end = saved_search_regs.end;
3201 search_regs_saved = 0;
3205 /* Quote a string to inactivate reg-expr chars */
3207 DEFUN("regexp-quote", Fregexp_quote, 1, 1, 0, /*
3208 Return a regexp string which matches exactly STRING and nothing else.
3212 REGISTER Bufbyte *in, *out, *end;
3213 REGISTER Bufbyte *temp;
3215 CHECK_STRING(string);
3217 temp = (Bufbyte *) alloca(XSTRING_LENGTH(string) * 2);
3219 /* Now copy the data into the new string, inserting escapes. */
3221 in = XSTRING_DATA(string);
3222 end = in + XSTRING_LENGTH(string);
3226 Emchar c = charptr_emchar(in);
3228 if (c == '[' || c == ']'
3229 || c == '*' || c == '.' || c == '\\'
3230 || c == '?' || c == '+' || c == '^' || c == '$')
3232 out += set_charptr_emchar(out, c);
3236 return make_string(temp, out - temp);
3239 DEFUN("set-word-regexp", Fset_word_regexp, 1, 1, 0, /*
3240 Set the regexp to be used to match a word in regular-expression searching.
3241 #### Not yet implemented. Currently does nothing.
3242 #### Do not use this yet. Its calling interface is likely to change.
3249 /************************************************************************/
3250 /* initialization */
3251 /************************************************************************/
3253 void syms_of_search(void)
3256 DEFERROR_STANDARD(Qsearch_failed, Qinvalid_operation);
3257 DEFERROR_STANDARD(Qinvalid_regexp, Qsyntax_error);
3259 #ifdef EF_USE_COMPRE
3260 defsymbol(&Qcompre, "compre");
3261 defsymbol(&Qcomprep, "comprep");
3263 DEFSUBR(Fcompile_regexp);
3264 DEFSUBR(Fdefregexp);
3267 DEFSUBR(Flooking_at);
3268 DEFSUBR(Fposix_looking_at);
3269 DEFSUBR(Fstring_match);
3270 DEFSUBR(Fposix_string_match);
3271 DEFSUBR(Fskip_chars_forward);
3272 DEFSUBR(Fskip_chars_backward);
3273 DEFSUBR(Fskip_syntax_forward);
3274 DEFSUBR(Fskip_syntax_backward);
3275 DEFSUBR(Fsearch_forward);
3276 DEFSUBR(Fsearch_backward);
3277 DEFSUBR(Fword_search_forward);
3278 DEFSUBR(Fword_search_backward);
3279 DEFSUBR(Fre_search_forward);
3280 DEFSUBR(Fre_search_backward);
3281 DEFSUBR(Fposix_search_forward);
3282 DEFSUBR(Fposix_search_backward);
3283 DEFSUBR(Freplace_match);
3284 DEFSUBR(Fmatch_beginning);
3285 DEFSUBR(Fmatch_end);
3286 DEFSUBR(Fmatch_data);
3287 DEFSUBR(Fstore_match_data);
3288 DEFSUBR(Fregexp_quote);
3289 DEFSUBR(Fset_word_regexp);
3292 void reinit_vars_of_search(void)
3296 last_thing_searched = Qnil;
3297 staticpro_nodump(&last_thing_searched);
3299 for (i = 0; i < REGEXP_CACHE_SIZE; ++i) {
3300 searchbufs[i].buf.allocated = 100;
3301 searchbufs[i].buf.buffer = (unsigned char *)xmalloc_atomic(100);
3302 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3303 searchbufs[i].regexp = Qnil;
3304 staticpro_nodump(&searchbufs[i].regexp);
3305 searchbufs[i].next =
3306 (i == REGEXP_CACHE_SIZE - 1 ? 0 : &searchbufs[i + 1]);
3308 searchbuf_head = &searchbufs[0];
3311 void vars_of_search(void)
3313 reinit_vars_of_search();
3315 DEFVAR_LISP("forward-word-regexp", &Vforward_word_regexp /*
3316 *Regular expression to be used in `forward-word'.
3317 #### Not yet implemented.
3319 Vforward_word_regexp = Qnil;
3321 DEFVAR_LISP("backward-word-regexp", &Vbackward_word_regexp /*
3322 *Regular expression to be used in `backward-word'.
3323 #### Not yet implemented.
3325 Vbackward_word_regexp = Qnil;
3328 void complex_vars_of_search(void)
3330 Vskip_chars_range_table = Fmake_range_table();
3331 staticpro(&Vskip_chars_range_table);