1 /* String search routines for SXEmacs.
2 Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of SXEmacs
7 SXEmacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 SXEmacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 /* Synched up with: FSF 19.29, except for region-cache stuff. */
23 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */
25 /* This file has been Mule-ized except for the TRT stuff. */
31 #include "ui/insdel.h"
33 #ifdef REGION_CACHE_NEEDS_WORK
34 #include "region-cache.h"
38 #include <sys/types.h>
45 #define TRANSLATE(table, pos) \
46 (!NILP (table) ? TRT_TABLE_OF (table, (Emchar) pos) : pos)
49 #define REGEXP_CACHE_SIZE 0x80
50 #define REGEXP_CACHE_HASH_MASK (REGEXP_CACHE_SIZE-1)
51 #define REGEXP_FASTMAP_SIZE 0400
53 #define __REGEXP_DEBUG__(args...) fprintf(stderr, "REGEXP " args)
54 #ifndef REGEXP_DEBUG_FLAG
55 #define REGEXP_DEBUG(args...)
57 #define REGEXP_DEBUG(args...) __REGEXP_DEBUG__(args)
59 #define REGEXP_DEBUG_COMPRE(args...) REGEXP_DEBUG("[compre]: " args)
60 #define REGEXP_DEBUG_COMPRE_C(args...) REGEXP_DEBUG("[compre/cache]: " args)
61 #define REGEXP_DEBUG_COMPRE_H(args...) REGEXP_DEBUG("[compre/hash]: " args)
62 #define REGEXP_CRITICAL(args...) __REGEXP_DEBUG__("CRITICAL: " args)
65 /* If the regexp is non-nil, then the buffer contains the compiled form
66 of that regexp, suitable for searching. */
68 struct regexp_cache *next;
70 struct re_pattern_buffer buf;
71 char fastmap[REGEXP_FASTMAP_SIZE];
72 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
76 /* The instances of that struct. */
77 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
79 /* The head of the linked list; points to the most recently used buffer. */
80 static struct regexp_cache *searchbuf_head;
82 /* Every call to re_match, etc., must pass &search_regs as the regs
83 argument unless you can show it is unnecessary (i.e., if re_match
84 is certainly going to be called again before region-around-match
87 Since the registers are now dynamically allocated, we need to make
88 sure not to refer to the Nth register before checking that it has
89 been allocated by checking search_regs.num_regs.
91 The regex code keeps track of whether it has allocated the search
92 buffer using bits in the re_pattern_buffer. This means that whenever
93 you compile a new pattern, it completely forgets whether it has
94 allocated any registers, and will allocate new registers the next
95 time you call a searching or matching function. Therefore, we need
96 to call re_set_registers after compiling a new pattern or after
97 setting the match registers, so that the regex functions will be
98 able to free or re-allocate it properly. */
100 /* Note: things get trickier under Mule because the values returned from
101 the regexp routines are in Bytinds but we need them to be in Bufpos's.
102 We take the easy way out for the moment and just convert them immediately.
103 We could be more clever by not converting them until necessary, but
104 that gets real ugly real fast since the buffer might have changed and
105 the positions might be out of sync or out of range.
107 static struct re_registers search_regs;
109 /* The buffer in which the last search was performed, or
110 Qt if the last search was done in a string;
111 Qnil if no searching has been done yet. */
112 static Lisp_Object last_thing_searched;
114 /* error condition signalled when regexp compile_pattern fails */
116 Lisp_Object Qinvalid_regexp;
118 /* Regular expressions used in forward/backward-word */
119 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
121 /* range table for use with skip_chars. Only needed for Mule. */
122 Lisp_Object Vskip_chars_range_table;
124 static void set_search_regs(struct buffer *buf, Bufpos beg, Charcount len);
125 static void clear_unused_search_regs(struct re_registers *regp, int no_sub);
126 static void save_search_regs(void);
127 static Bufpos simple_search(struct buffer *buf, Bufbyte * base_pat,
128 Bytecount len, Bytind pos, Bytind lim,
129 EMACS_INT n, Lisp_Object trt);
130 static Bufpos boyer_moore(struct buffer *buf, Bufbyte * base_pat,
131 Bytecount len, Bytind pos, Bytind lim,
132 EMACS_INT n, Lisp_Object trt,
133 Lisp_Object inverse_trt, int charset_base);
134 static Bufpos search_buffer(struct buffer *buf, Lisp_Object str,
135 Bufpos bufpos, Bufpos buflim, EMACS_INT n, int RE,
136 Lisp_Object trt, Lisp_Object inverse_trt,
139 static void matcher_overflow(void)
141 error("Stack overflow in regexp matcher");
148 #define COMPRE_T struct re_pattern_buffer
149 #define COMPREP(obj) \
150 (DYNACATP(obj) && EQ(XDYNACAT_TYPE(obj), Qcompre))
151 #define COMPRE_GET(obj) (COMPRE_T*)XDYNACAT(obj)->ptr
152 #define COMPRE_PUT(obj, rec) do { \
153 obj = make_dynacat((void*)rec); \
154 set_dynacat_printer(obj, compre_prfun); \
155 set_dynacat_finaliser(obj, compre_finfun); \
156 XDYNACAT_TYPE(obj) = Qcompre; \
158 #define XCOMPRE_GET(obj) XSTRING(obj)->compre
159 #define XCOMPRE_PUT(obj, b) XSTRING(obj)->compre = b
160 #define XCOMPRE_REM(obj) XSTRING(obj)->compre = Qnil
162 /* idenitifier for hard-cached rexp */
163 Lisp_Object Qcompre, Qcomprep;
164 static Lisp_Object cache_regexp(Lisp_Object, struct re_pattern_buffer*);
165 static COMPRE_T* make_compre(void)
166 #if defined(__GNUC__)
167 __attribute__((unused))
170 static COMPRE_T* clone_compre(COMPRE_T*);
171 static inline void free_compre(COMPRE_T*);
174 /* Compile a regexp and signal a Lisp error if anything goes wrong.
175 PATTERN is the pattern to compile.
176 CP is the place to put the result.
177 TRANSLATE is a translation table for ignoring case, or NULL for none.
178 REGP is the structure that says where to store the "register"
179 values that will result from matching this pattern.
180 If it is 0, we should compile the pattern not to record any
181 subexpression bounds.
182 POSIX is nonzero if we want full backtracking (POSIX style)
183 for this pattern. 0 means backtrack only enough to get a valid match. */
186 compile_pattern_1(struct regexp_cache *cp, Lisp_Object pattern,
187 Lisp_Object translate, struct re_registers *regp, int posix,
194 cp->buf.translate = translate;
196 old = re_set_syntax(RE_SYNTAX_EMACS
197 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
199 re_compile_pattern((char *)XSTRING_DATA(pattern),
200 XSTRING_LENGTH(pattern), &cp->buf);
203 maybe_signal_error(Qinvalid_regexp, list1(build_string(val)),
208 cp->regexp = Fcopy_sequence(pattern);
213 /* extremely simple hash table, no collision management is performed */
214 static struct re_pattern_buffer *
215 try_pattern_from_cache(Lisp_Object pattern,
216 struct re_registers *regp,
217 Lisp_Object translate,
221 struct regexp_cache *cp;
225 pathash = internal_hash(pattern, 0);
226 place = (int)((pathash>>4) & REGEXP_CACHE_HASH_MASK);
227 cp = &(searchbufs[place]);
228 REGEXP_DEBUG_COMPRE_H("trying hash place: %d.\n", place);
230 if (!NILP(Fstring_equal(cp->regexp, pattern))
231 && EQ(cp->buf.translate, translate) && cp->posix == posix) {
233 REGEXP_DEBUG_COMPRE_H("match: %d.\n", place);
236 /* collision or empty slot, dont care just put the
237 * new regexp there */
238 if (!compile_pattern_1(
239 cp, pattern, translate, regp, posix, errb))
241 REGEXP_DEBUG_COMPRE_H("collision: %d.\n", place);
246 #else /* !EF_USE_COMPRE */
247 static struct re_pattern_buffer *
248 try_pattern_from_cache(Lisp_Object pattern,
249 struct re_registers *regp,
250 Lisp_Object translate,
254 struct regexp_cache *cp, **cpp;
256 for (cpp = &searchbuf_head;; cpp = &cp->next) {
258 if (!NILP(Fstring_equal(cp->regexp, pattern))
259 && EQ(cp->buf.translate, translate)
260 && cp->posix == posix)
263 /* If we're at the end of the cache, compile
264 * into the last cell. */
266 if (!compile_pattern_1(
267 cp, pattern, translate, regp, posix, errb))
273 /* When we get here, cp (aka *cpp) contains the compiled pattern,
274 either because we found it in the cache or because we just compiled it.
275 Move it to the front of the queue to mark it as most recently used. */
277 cp->next = searchbuf_head;
282 #endif /* EF_USE_COMPRE */
284 /* Compile a regexp if necessary, but first check to see if there's one in
286 PATTERN is the pattern to compile.
287 TRANSLATE is a translation table for ignoring case, or NULL for none.
288 REGP is the structure that says where to store the "register"
289 values that will result from matching this pattern.
290 If it is 0, we should compile the pattern not to record any
291 subexpression bounds.
292 POSIX is nonzero if we want full backtracking (POSIX style)
293 for this pattern. 0 means backtrack only enough to get a valid match. */
295 struct re_pattern_buffer *
296 compile_pattern(Lisp_Object pattern,
297 struct re_registers *regp,
298 Lisp_Object translate,
302 struct re_pattern_buffer *result = NULL;
305 Lisp_Object rc = XCOMPRE_GET(pattern);
307 if (!NILP(rc) && COMPREP(rc) &&
308 (result = COMPRE_GET(rc)) &&
309 result->re_ngroups >= 0 && result->re_ngroups < 256) {
310 REGEXP_DEBUG_COMPRE_C("using cache: 0x%x.\n",
311 (unsigned int)result);
315 result = try_pattern_from_cache(
316 pattern, regp, translate, posix, errb);
318 cache_regexp(pattern, result);
323 /* Advise the searching functions about the space we have allocated
324 for register data. */
326 re_set_registers(result, regp, regp->num_regs,
327 regp->start, regp->end);
334 compre_prfun(Lisp_Object obj, Lisp_Object pcfun, int escflag)
336 write_fmt_str(pcfun, "#<compiled regexp %lx",
337 (long unsigned int)((COMPRE_GET(obj))->buffer));
342 compre_finfun(Lisp_Object obj, int unused)
344 REGEXP_DEBUG_COMPRE_C("0x%lx@0x%lx will pass away\n",
345 (long unsigned int)COMPRE_GET(obj),
346 (long unsigned int)obj);
347 free_compre(COMPRE_GET(obj));
354 COMPRE_T *result = xnew_and_zero(COMPRE_T);
356 result->fastmap = xmalloc_atomic(REGEXP_FASTMAP_SIZE);
363 clone_compre(COMPRE_T *src)
365 COMPRE_T *result = xnew_and_zero(COMPRE_T);
367 /* alloc and clone fastmap */
368 result->fastmap = (char*)xmalloc_atomic(REGEXP_FASTMAP_SIZE);
369 memcpy(result->fastmap, src->fastmap, REGEXP_FASTMAP_SIZE);
371 /* alloc and clone buffer */
372 result->buffer = (unsigned char *)xmalloc_atomic(src->allocated);
373 memcpy(result->buffer, src->buffer, src->allocated);
374 result->allocated = src->allocated;
375 result->used = src->used;
377 result->syntax = src->syntax;
378 result->translate = src->translate;
379 result->re_nsub = src->re_nsub;
380 result->re_ngroups = src->re_ngroups;
382 result->can_be_null = src->can_be_null;
383 result->regs_allocated = src->regs_allocated;
384 result->fastmap_accurate = src->fastmap_accurate;
386 result->no_sub = src->no_sub;
387 result->not_bol = src->not_bol;
388 result->not_eol = src->not_eol;
389 result->newline_anchor = src->newline_anchor;
391 /* alloc and clone ext_to_int_register */
392 result->external_to_internal_register =
393 (int*)xmalloc_atomic(
394 sizeof(int) * src->external_to_internal_register_size);
395 memcpy(result->external_to_internal_register,
396 src->external_to_internal_register,
397 sizeof(int)*src->external_to_internal_register_size);
398 result->external_to_internal_register_size =
399 src->external_to_internal_register_size;
405 free_compre(COMPRE_T *buf)
415 if (buf->external_to_internal_register) {
416 xfree(buf->external_to_internal_register);
417 buf->external_to_internal_register = NULL;
423 cache_regexp(Lisp_Object regexp, COMPRE_T *buf)
431 resbuf = clone_compre(buf);
432 COMPRE_PUT(rc, resbuf);
433 XCOMPRE_PUT(regexp, rc);
435 REGEXP_DEBUG_COMPRE_C("caching 0x%08x into 0x%08x\n",
436 (unsigned int)resbuf, (unsigned int)rc);
440 DEFUN("compile-regexp", Fcompile_regexp, 1, 1, 0, /*
441 Forcibly compile REGEXP and store the result in object-plist.
445 CHECK_STRING(regexp);
448 compile_pattern(regexp, &search_regs, Qnil, 0, ERROR_ME);
453 DEFUN("defregexp", Fdefregexp, 2, UNEVALLED, 0, /*
454 \(defregexp SYMBOL REGEXP DOCSTRING\)
455 Like `defconst' but for forcing compiled regexps.
457 The same restrictions that apply to `defconst' apply here in regard
458 to user variables. You shouldn't use this for regular expressions
459 that a user might want to customise. Instead, use `defcustom' with
464 /* This function can GC */
465 Lisp_Object sym = XCAR(args);
466 Lisp_Object pat = Feval(XCAR(args = XCDR(args)));
472 pat = Fcompile_regexp(pat);
473 Fset_default(sym, pat);
476 if (!NILP(args = XCDR(args))) {
477 Lisp_Object doc = XCAR(args);
478 Fput(sym, Qvariable_documentation, doc);
479 if (!NILP(args = XCDR(args)))
480 error("too many arguments");
483 if (!NILP(Vfile_domain))
484 Fput(sym, Qvariable_domain, Vfile_domain);
487 LOADHIST_ATTACH(sym);
490 #endif /* EF_USE_COMPRE */
492 /* Error condition used for failing searches */
493 Lisp_Object Qsearch_failed;
495 static Lisp_Object signal_failure(Lisp_Object arg)
498 Fsignal(Qsearch_failed, list1(arg));
499 return Qnil; /* Not reached. */
502 /* Convert the search registers from Bytinds to Bufpos's. Needs to be
503 done after each regexp match that uses the search regs.
505 We could get a potential speedup by not converting the search registers
506 until it's really necessary, e.g. when match-data or replace-match is
507 called. However, this complexifies the code a lot (e.g. the buffer
508 could have changed and the Bytinds stored might be invalid) and is
509 probably not a great time-saver. */
511 static void fixup_search_regs_for_buffer(struct buffer *buf)
514 int num_regs = search_regs.num_regs;
516 for (i = 0; i < num_regs; i++) {
517 if (search_regs.start[i] >= 0)
518 search_regs.start[i] =
519 bytind_to_bufpos(buf, search_regs.start[i]);
520 if (search_regs.end[i] >= 0)
522 bytind_to_bufpos(buf, search_regs.end[i]);
526 /* Similar but for strings. */
527 static void fixup_search_regs_for_string(Lisp_Object string)
530 int num_regs = search_regs.num_regs;
532 /* #### bytecount_to_charcount() is not that efficient. This function
533 could be faster if it did its own conversion (using INC_CHARPTR()
534 and such), because the register ends are likely to be somewhat ordered.
535 (Even if not, you could sort them.)
537 Think about this if this function is a time hog, which it's probably
539 for (i = 0; i < num_regs; i++) {
540 if (search_regs.start[i] > 0) {
541 search_regs.start[i] =
542 bytecount_to_charcount(XSTRING_DATA(string),
543 search_regs.start[i]);
545 if (search_regs.end[i] > 0) {
547 bytecount_to_charcount(XSTRING_DATA(string),
554 looking_at_1(Lisp_Object string, struct buffer *buf, int posix)
556 /* This function has been Mule-ized, except for the trt table handling. */
561 struct re_pattern_buffer *bufp;
563 if (running_asynch_code)
566 CHECK_STRING(string);
567 bufp = compile_pattern(string, &search_regs,
568 (!NILP(buf->case_fold_search)
569 ? XCASE_TABLE_DOWNCASE(buf->case_table) : Qnil),
574 /* Get pointers and sizes of the two strings
575 that make up the visible portion of the buffer. */
577 p1 = BI_BUF_BEGV(buf);
578 p2 = BI_BUF_CEILING_OF(buf, p1);
580 s2 = BI_BUF_ZV(buf) - p2;
582 regex_match_object = Qnil;
583 regex_emacs_buffer = buf;
584 i = re_match_2(bufp, (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
585 s1, (char *)BI_BUF_BYTE_ADDRESS(buf, p2), s2,
586 BI_BUF_PT(buf) - BI_BUF_BEGV(buf), &search_regs,
587 BI_BUF_ZV(buf) - BI_BUF_BEGV(buf));
592 val = (0 <= i ? Qt : Qnil);
596 int num_regs = search_regs.num_regs;
597 for (i = 0; i < num_regs; i++)
598 if (search_regs.start[i] >= 0) {
599 search_regs.start[i] += BI_BUF_BEGV(buf);
600 search_regs.end[i] += BI_BUF_BEGV(buf);
603 XSETBUFFER(last_thing_searched, buf);
604 fixup_search_regs_for_buffer(buf);
608 DEFUN("looking-at", Flooking_at, 1, 2, 0, /*
609 Return t if text after point matches regular expression REGEXP.
610 This function modifies the match data that `match-beginning',
611 `match-end' and `match-data' access; save and restore the match
612 data if you want to preserve them.
614 Optional argument BUFFER defaults to the current buffer.
618 return looking_at_1(regexp, decode_buffer(buffer, 0), 0);
621 DEFUN("posix-looking-at", Fposix_looking_at, 1, 2, 0, /*
622 Return t if text after point matches regular expression REGEXP.
623 Find the longest match, in accord with Posix regular expression rules.
624 This function modifies the match data that `match-beginning',
625 `match-end' and `match-data' access; save and restore the match
626 data if you want to preserve them.
628 Optional argument BUFFER defaults to the current buffer.
632 return looking_at_1(regexp, decode_buffer(buffer, 0), 1);
636 string_match_1(Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
637 struct buffer *buf, int posix)
639 /* This function has been Mule-ized, except for the trt table handling. */
642 struct re_pattern_buffer *bufp;
644 if (running_asynch_code)
647 CHECK_STRING(regexp);
648 CHECK_STRING(string);
653 Charcount len = XSTRING_CHAR_LENGTH(string);
657 if (s < 0 && -s <= len)
659 else if (0 > s || s > len)
660 args_out_of_range(string, start);
663 bufp = compile_pattern(regexp, &search_regs,
664 (!NILP(buf->case_fold_search)
665 ? XCASE_TABLE_DOWNCASE(buf->case_table) : Qnil),
669 Bytecount bis = charcount_to_bytecount(XSTRING_DATA(string), s);
670 regex_match_object = string;
671 regex_emacs_buffer = buf;
672 val = re_search(bufp, (char *)XSTRING_DATA(string),
673 XSTRING_LENGTH(string), bis,
674 XSTRING_LENGTH(string) - bis, &search_regs);
680 last_thing_searched = Qt;
681 fixup_search_regs_for_string(string);
682 return make_int(bytecount_to_charcount(XSTRING_DATA(string), val));
685 DEFUN("string-match", Fstring_match, 2, 4, 0, /*
686 Return index of start of first match for REGEXP in STRING, or nil.
687 If third arg START is non-nil, start search at that index in STRING.
688 For index of first char beyond the match, do (match-end 0).
689 `match-end' and `match-beginning' also give indices of substrings
690 matched by parenthesis constructs in the pattern.
692 Optional arg BUFFER controls how case folding is done (according to
693 the value of `case-fold-search' in that buffer and that buffer's case
694 tables) and defaults to the current buffer.
696 (regexp, string, start, buffer))
698 return string_match_1(regexp, string, start, decode_buffer(buffer, 0),
702 DEFUN("posix-string-match", Fposix_string_match, 2, 4, 0, /*
703 Return index of start of first match for REGEXP in STRING, or nil.
704 Find the longest match, in accord with Posix regular expression rules.
705 If third arg START is non-nil, start search at that index in STRING.
706 For index of first char beyond the match, do (match-end 0).
707 `match-end' and `match-beginning' also give indices of substrings
708 matched by parenthesis constructs in the pattern.
710 Optional arg BUFFER controls how case folding is done (according to
711 the value of `case-fold-search' in that buffer and that buffer's case
712 tables) and defaults to the current buffer.
714 (regexp, string, start, buffer))
716 return string_match_1(regexp, string, start, decode_buffer(buffer, 0),
720 /* Match REGEXP against STRING, searching all of STRING,
721 and return the index of the match, or negative on failure.
722 This does not clobber the match data. */
725 fast_string_match(Lisp_Object regexp, const Bufbyte * nonreloc,
726 Lisp_Object reloc, Bytecount offset,
727 Bytecount length, int case_fold_search,
728 Error_behavior errb, int no_quit)
730 /* This function has been Mule-ized, except for the trt table handling. */
732 const Bufbyte *newnonreloc = (const Bufbyte*)nonreloc;
733 struct re_pattern_buffer *bufp;
735 bufp = compile_pattern(regexp, 0,
737 ? XCASE_TABLE_DOWNCASE(current_buffer->
741 return -1; /* will only do this when errb != ERROR_ME */
745 no_quit_in_re_search = 1;
747 fixup_internal_substring(nonreloc, reloc, offset, &length);
753 newnonreloc = XSTRING_DATA(reloc);
755 /* QUIT could relocate RELOC. Therefore we must
756 alloca() and copy. No way around this except some
757 serious rewriting of re_search(). */
758 /* yeah, let's rewrite this bugger, the warning
759 hereafter is inevitable too */
762 fixup_internal_substring should
763 have updated length, if it didn't
764 return with failure...
767 newnonreloc = alloca(length);
768 memcpy((void*)newnonreloc, (void*)XSTRING_DATA(reloc), length);
772 /* #### evil current-buffer dependency */
773 regex_match_object = reloc;
774 regex_emacs_buffer = current_buffer;
775 val = re_search(bufp, (const char*)newnonreloc + offset, length, 0,
778 no_quit_in_re_search = 0;
782 Bytecount fast_lisp_string_match(Lisp_Object regex, Lisp_Object string)
784 return fast_string_match(regex, 0, string, 0, -1, 0, ERROR_ME, 0);
787 #ifdef REGION_CACHE_NEEDS_WORK
788 /* The newline cache: remembering which sections of text have no newlines. */
790 /* If the user has requested newline caching, make sure it's on.
791 Otherwise, make sure it's off.
792 This is our cheezy way of associating an action with the change of
793 state of a buffer-local variable. */
794 static void newline_cache_on_off(struct buffer *buf)
796 if (NILP(buf->cache_long_line_scans)) {
797 /* It should be off. */
798 if (buf->newline_cache) {
799 free_region_cache(buf->newline_cache);
800 buf->newline_cache = 0;
803 /* It should be on. */
804 if (buf->newline_cache == 0)
805 buf->newline_cache = new_region_cache();
810 /* Search in BUF for COUNT instances of the character TARGET between
813 If COUNT is positive, search forwards; END must be >= START.
814 If COUNT is negative, search backwards for the -COUNTth instance;
815 END must be <= START.
816 If COUNT is zero, do anything you please; run rogue, for all I care.
818 If END is zero, use BEGV or ZV instead, as appropriate for the
819 direction indicated by COUNT.
821 If we find COUNT instances, set *SHORTAGE to zero, and return the
822 position after the COUNTth match. Note that for reverse motion
823 this is not the same as the usual convention for Emacs motion commands.
825 If we don't find COUNT instances before reaching END, set *SHORTAGE
826 to the number of TARGETs left unfound, and return END.
828 If ALLOW_QUIT is non-zero, call QUIT periodically. */
831 bi_scan_buffer(struct buffer *buf, Emchar target, Bytind st, Bytind en,
832 EMACS_INT count, EMACS_INT * shortage, int allow_quit)
834 /* This function has been Mule-ized. */
835 Bytind lim = en > 0 ? en :
836 ((count > 0) ? BI_BUF_ZV(buf) : BI_BUF_BEGV(buf));
838 /* #### newline cache stuff in this function not yet ported */
847 /* Due to the Mule representation of characters in a buffer,
848 we can simply search for characters in the range 0 - 127
849 directly. For other characters, we do it the "hard" way.
850 Note that this way works for all characters but the other
852 if (target >= 0200) {
853 while (st < lim && count > 0) {
854 if (BI_BUF_FETCH_CHAR(buf, st) == target)
861 while (st < lim && count > 0) {
865 _ceil_ = BI_BUF_CEILING_OF(buf, st);
866 _ceil_ = min(lim, _ceil_);
869 memchr(BI_BUF_BYTE_ADDRESS(buf, st),
870 (int)target, _ceil_ - st);
873 st = BI_BUF_PTR_BYTE_POS(buf,
887 if (target >= 0200) {
888 while (st > lim && count < 0) {
890 if (BI_BUF_FETCH_CHAR(buf, st) == target)
896 while (st > lim && count < 0) {
901 _floor_ = BI_BUF_FLOOR_OF(buf, st);
902 _floor_ = max(lim, _floor_);
903 /* No memrchr() ... */
904 bufptr = BI_BUF_BYTE_ADDRESS_BEFORE(buf, st);
905 floorptr = BI_BUF_BYTE_ADDRESS(buf, _floor_);
906 while (bufptr >= floorptr) {
908 /* At this point, both ST and BUFPTR
909 refer to the same character. When
910 the loop terminates, ST will always
911 point to the last character we
913 if (*(unsigned char *)bufptr ==
914 (unsigned char)target) {
930 /* We found the character we were looking for; we have to return
931 the position *after* it due to the strange way that the return
940 scan_buffer(struct buffer * buf, Emchar target, Bufpos start, Bufpos end,
941 EMACS_INT count, EMACS_INT * shortage, int allow_quit)
944 Bytind bi_start, bi_end;
946 bi_start = bufpos_to_bytind(buf, start);
948 bi_end = bufpos_to_bytind(buf, end);
951 bi_retval = bi_scan_buffer(buf, target, bi_start, bi_end, count,
952 shortage, allow_quit);
953 return bytind_to_bufpos(buf, bi_retval);
956 Bytind bi_find_next_newline_no_quit(struct buffer * buf, Bytind from, int count)
958 return bi_scan_buffer(buf, '\n', from, 0, count, 0, 0);
961 Bufpos find_next_newline_no_quit(struct buffer * buf, Bufpos from, int count)
963 return scan_buffer(buf, '\n', from, 0, count, 0, 0);
966 Bufpos find_next_newline(struct buffer * buf, Bufpos from, int count)
968 return scan_buffer(buf, '\n', from, 0, count, 0, 1);
972 bi_find_next_emchar_in_string(Lisp_String * str, Emchar target, Bytind st,
975 /* This function has been Mule-ized. */
976 Bytind lim = string_length(str) - 1;
977 Bufbyte *s = string_data(str);
982 /* Due to the Mule representation of characters in a buffer,
983 we can simply search for characters in the range 0 - 127
984 directly. For other characters, we do it the "hard" way.
985 Note that this way works for all characters but the other
987 if (target >= 0200) {
988 while (st < lim && count > 0) {
989 if (string_char(str, st) == target)
991 INC_CHARBYTIND(s, st);
996 while (st < lim && count > 0) {
998 (Bufbyte *) memchr(charptr_n_addr(s, st),
999 (int)target, lim - st);
1002 st = (Bytind) (bufptr - s) + 1;
1010 /* Like find_next_newline, but returns position before the newline,
1011 not after, and only search up to TO. This isn't just
1012 find_next_newline (...)-1, because you might hit TO. */
1014 find_before_next_newline(struct buffer * buf, Bufpos from, Bufpos to, int count)
1017 Bufpos pos = scan_buffer(buf, '\n', from, to, count, &shortage, 1);
1025 /* This function synched with FSF 21.1 */
1027 skip_chars(struct buffer *buf, int forwardp, int syntaxp,
1028 Lisp_Object string, Lisp_Object lim)
1030 /* This function has been Mule-ized. */
1031 REGISTER Bufbyte *p, *pend;
1033 /* We store the first 256 chars in an array here and the rest in
1035 unsigned char fastmap[REGEXP_FASTMAP_SIZE];
1039 Lisp_Char_Table *syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
1044 limit = forwardp ? BUF_ZV(buf) : BUF_BEGV(buf);
1046 CHECK_INT_COERCE_MARKER(lim);
1049 /* In any case, don't allow scan outside bounds of buffer. */
1050 if (limit > BUF_ZV(buf))
1051 limit = BUF_ZV(buf);
1052 if (limit < BUF_BEGV(buf))
1053 limit = BUF_BEGV(buf);
1056 CHECK_STRING(string);
1057 p = XSTRING_DATA(string);
1058 pend = p + XSTRING_LENGTH(string);
1059 memset(fastmap, 0, sizeof(fastmap));
1061 Fclear_range_table(Vskip_chars_range_table);
1063 if (p != pend && *p == '^') {
1068 /* Find the characters specified and set their elements of fastmap.
1069 If syntaxp, each character counts as itself.
1070 Otherwise, handle backslashes and ranges specially */
1073 c = charptr_emchar(p);
1076 if (c < REGEXP_FASTMAP_SIZE
1077 && syntax_spec_code[c] < (unsigned char)Smax)
1080 signal_simple_error("Invalid syntax designator",
1086 c = charptr_emchar(p);
1089 if (p != pend && *p == '-') {
1092 /* Skip over the dash. */
1096 cend = charptr_emchar(p);
1097 while (c <= cend && c < REGEXP_FASTMAP_SIZE) {
1102 Fput_range_table(make_int(c),
1104 Vskip_chars_range_table);
1107 if (c < REGEXP_FASTMAP_SIZE)
1110 Fput_range_table(make_int(c),
1112 Vskip_chars_range_table);
1117 /* #### Not in FSF 21.1 */
1118 if (syntaxp && fastmap['-'] != 0)
1121 /* If ^ was the first character, complement the fastmap.
1122 We don't complement the range table, however; we just use negate
1123 in the comparisons below. */
1126 for (i = 0; i < (int)(sizeof fastmap); i++)
1130 Bufpos start_point = BUF_PT(buf);
1131 Bufpos pos = start_point;
1132 Bytind pos_byte = BI_BUF_PT(buf);
1135 SETUP_SYNTAX_CACHE_FOR_BUFFER(buf, pos,
1137 /* All syntax designators are normal chars so nothing strange
1141 while (fastmap[(unsigned char)
1143 [(int)SYNTAX_FROM_CACHE
1145 BI_BUF_FETCH_CHAR(buf,
1149 INC_BYTIND(buf, pos_byte);
1152 UPDATE_SYNTAX_CACHE_FORWARD
1156 while (pos > limit) {
1157 Bufpos savepos = pos_byte;
1159 DEC_BYTIND(buf, pos_byte);
1160 UPDATE_SYNTAX_CACHE_BACKWARD(pos);
1161 if (!fastmap[(unsigned char)
1163 [(int)SYNTAX_FROM_CACHE
1165 BI_BUF_FETCH_CHAR(buf,
1176 while (pos < limit) {
1178 BI_BUF_FETCH_CHAR(buf, pos_byte);
1180 REGEXP_FASTMAP_SIZE) ? fastmap[ch]
1184 Vskip_chars_range_table, Qnil))
1187 INC_BYTIND(buf, pos_byte);
1192 while (pos > limit) {
1193 Bufpos prev_pos_byte = pos_byte;
1196 DEC_BYTIND(buf, prev_pos_byte);
1197 ch = BI_BUF_FETCH_CHAR(buf,
1200 REGEXP_FASTMAP_SIZE) ? fastmap[ch]
1204 Vskip_chars_range_table, Qnil))
1207 pos_byte = prev_pos_byte;
1214 BOTH_BUF_SET_PT(buf, pos, pos_byte);
1215 return make_int(BUF_PT(buf) - start_point);
1219 DEFUN("skip-chars-forward", Fskip_chars_forward, 1, 3, 0, /*
1220 Move point forward, stopping before a char not in STRING, or at pos LIMIT.
1221 STRING is like the inside of a `[...]' in a regular expression
1222 except that `]' is never special and `\\' quotes `^', `-' or `\\'.
1223 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1224 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1225 Returns the distance traveled, either zero or positive.
1227 Optional argument BUFFER defaults to the current buffer.
1229 (string, limit, buffer))
1231 return skip_chars(decode_buffer(buffer, 0), 1, 0, string, limit);
1234 DEFUN("skip-chars-backward", Fskip_chars_backward, 1, 3, 0, /*
1235 Move point backward, stopping after a char not in STRING, or at pos LIMIT.
1236 See `skip-chars-forward' for details.
1237 Returns the distance traveled, either zero or negative.
1239 Optional argument BUFFER defaults to the current buffer.
1241 (string, limit, buffer))
1243 return skip_chars(decode_buffer(buffer, 0), 0, 0, string, limit);
1246 DEFUN("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0, /*
1247 Move point forward across chars in specified syntax classes.
1248 SYNTAX is a string of syntax code characters.
1249 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT.
1250 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1251 This function returns the distance traveled, either zero or positive.
1253 Optional argument BUFFER defaults to the current buffer.
1255 (syntax, limit, buffer))
1257 return skip_chars(decode_buffer(buffer, 0), 1, 1, syntax, limit);
1260 DEFUN("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0, /*
1261 Move point backward across chars in specified syntax classes.
1262 SYNTAX is a string of syntax code characters.
1263 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT.
1264 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1265 This function returns the distance traveled, either zero or negative.
1267 Optional argument BUFFER defaults to the current buffer.
1269 (syntax, limit, buffer))
1271 return skip_chars(decode_buffer(buffer, 0), 0, 1, syntax, limit);
1274 /* Subroutines of Lisp buffer search functions. */
1277 search_command(Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
1278 Lisp_Object count, Lisp_Object buffer, int direction,
1281 /* This function has been Mule-ized, except for the trt table handling. */
1284 EMACS_INT n = direction;
1292 buf = decode_buffer(buffer, 0);
1293 CHECK_STRING(string);
1295 lim = n > 0 ? BUF_ZV(buf) : BUF_BEGV(buf);
1297 CHECK_INT_COERCE_MARKER(limit);
1299 if (n > 0 ? lim < BUF_PT(buf) : lim > BUF_PT(buf))
1300 error("Invalid search limit (wrong side of point)");
1301 if (lim > BUF_ZV(buf))
1303 if (lim < BUF_BEGV(buf))
1304 lim = BUF_BEGV(buf);
1307 np = search_buffer(buf, string, BUF_PT(buf), lim, n, RE,
1308 (!NILP(buf->case_fold_search)
1309 ? XCASE_TABLE_CANON(buf->case_table)
1310 : Qnil), (!NILP(buf->case_fold_search)
1311 ? XCASE_TABLE_EQV(buf->case_table)
1316 return signal_failure(string);
1317 if (!EQ(noerror, Qt)) {
1318 if (lim < BUF_BEGV(buf) || lim > BUF_ZV(buf))
1320 BUF_SET_PT(buf, lim);
1322 #if 0 /* This would be clean, but maybe programs depend on
1323 a value of nil here. */
1330 if (np < BUF_BEGV(buf) || np > BUF_ZV(buf))
1333 BUF_SET_PT(buf, np);
1335 return make_int(np);
1338 static int trivial_regexp_p(Lisp_Object regexp)
1340 /* This function has been Mule-ized. */
1341 Bytecount len = XSTRING_LENGTH(regexp);
1342 Bufbyte *s = XSTRING_DATA(regexp);
1343 while (--len >= 0) {
1345 /* ']' doesn't appear here because it's only special after ] */
1375 /* 97/2/25 jhod Added for category matches */
1399 /* Search for the n'th occurrence of STRING in BUF,
1400 starting at position BUFPOS and stopping at position BUFLIM,
1401 treating PAT as a literal string if RE is false or as
1402 a regular expression if RE is true.
1404 If N is positive, searching is forward and BUFLIM must be greater
1406 If N is negative, searching is backward and BUFLIM must be less
1409 Returns -x if only N-x occurrences found (x > 0),
1410 or else the position at the beginning of the Nth occurrence
1411 (if searching backward) or the end (if searching forward).
1413 POSIX is nonzero if we want full backtracking (POSIX style)
1414 for this pattern. 0 means backtrack only enough to get a valid match. */
1416 search_buffer(struct buffer *buf, Lisp_Object string, Bufpos bufpos,
1417 Bufpos buflim, EMACS_INT n, int RE, Lisp_Object trt,
1418 Lisp_Object inverse_trt, int posix)
1420 /* This function has been Mule-ized, except for the trt table handling. */
1421 Bytecount len = XSTRING_LENGTH(string);
1422 Bufbyte *base_pat = XSTRING_DATA(string);
1423 REGISTER EMACS_INT i, j;
1428 if (running_asynch_code)
1431 /* Null string is found at starting position. */
1433 set_search_regs(buf, bufpos, 0);
1434 clear_unused_search_regs(&search_regs, 0);
1438 /* Searching 0 times means noop---don't move, don't touch registers. */
1442 pos = bufpos_to_bytind(buf, bufpos);
1443 lim = bufpos_to_bytind(buf, buflim);
1444 if (RE && !trivial_regexp_p(string)) {
1445 struct re_pattern_buffer *bufp;
1447 bufp = compile_pattern(string, &search_regs, trt, posix,
1450 /* Get pointers and sizes of the two strings
1451 that make up the visible portion of the buffer. */
1453 p1 = BI_BUF_BEGV(buf);
1454 p2 = BI_BUF_CEILING_OF(buf, p1);
1456 s2 = BI_BUF_ZV(buf) - p2;
1457 regex_match_object = Qnil;
1462 regex_emacs_buffer = buf;
1463 val = re_search_2(bufp,
1464 (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
1465 s1, (char *)BI_BUF_BYTE_ADDRESS(buf,
1467 s2, pos - BI_BUF_BEGV(buf), lim - pos,
1468 &search_regs, pos - BI_BUF_BEGV(buf));
1474 int num_regs = search_regs.num_regs;
1475 j = BI_BUF_BEGV(buf);
1476 for (i = 0; i < num_regs; i++)
1477 if (search_regs.start[i] >= 0) {
1478 search_regs.start[i] += j;
1479 search_regs.end[i] += j;
1481 /* re_match (called from re_search et al) does this for us */
1482 /* clear_unused_search_regs (search_regs, bufp->no_sub); */
1483 XSETBUFFER(last_thing_searched, buf);
1484 /* Set pos to the new position. */
1485 pos = search_regs.start[0];
1486 fixup_search_regs_for_buffer(buf);
1487 /* And bufpos too. */
1488 bufpos = search_regs.start[0];
1497 regex_emacs_buffer = buf;
1498 val = re_search_2(bufp,
1499 (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
1500 s1, (char *)BI_BUF_BYTE_ADDRESS(buf,
1502 s2, pos - BI_BUF_BEGV(buf), lim - pos,
1503 &search_regs, lim - BI_BUF_BEGV(buf));
1508 int num_regs = search_regs.num_regs;
1509 j = BI_BUF_BEGV(buf);
1510 for (i = 0; i < num_regs; i++)
1511 if (search_regs.start[i] >= 0) {
1512 search_regs.start[i] += j;
1513 search_regs.end[i] += j;
1515 /* re_match (called from re_search et al) does this for us */
1516 /* clear_unused_search_regs (search_regs, bufp->no_sub); */
1517 XSETBUFFER(last_thing_searched, buf);
1518 /* Set pos to the new position. */
1519 pos = search_regs.end[0];
1520 fixup_search_regs_for_buffer(buf);
1521 /* And bufpos too. */
1522 bufpos = search_regs.end[0];
1529 } else { /* non-RE case */
1531 int charset_base = -1;
1532 int boyer_moore_ok = 1;
1534 Bufbyte *patbuf = alloca_array(Bufbyte, len * MAX_EMCHAR_LEN);
1538 Bufbyte tmp_str[MAX_EMCHAR_LEN];
1539 Emchar c, translated, inverse;
1540 Bytecount orig_bytelen, new_bytelen, inv_bytelen;
1542 /* If we got here and the RE flag is set, it's because
1543 we're dealing with a regexp known to be trivial, so the
1544 backslash just quotes the next character. */
1545 if (RE && *base_pat == '\\') {
1549 c = charptr_emchar(base_pat);
1550 translated = TRANSLATE(trt, c);
1551 inverse = TRANSLATE(inverse_trt, c);
1553 orig_bytelen = charcount_to_bytecount(base_pat, 1);
1554 inv_bytelen = set_charptr_emchar(tmp_str, inverse);
1555 new_bytelen = set_charptr_emchar(tmp_str, translated);
1557 if (new_bytelen != orig_bytelen
1558 || inv_bytelen != orig_bytelen)
1560 if (translated != c || inverse != c) {
1561 /* Keep track of which character set row
1562 contains the characters that need translation. */
1563 int charset_base_code = c & ~CHAR_FIELD3_MASK;
1564 if (charset_base == -1)
1565 charset_base = charset_base_code;
1566 else if (charset_base != charset_base_code)
1567 /* If two different rows appear, needing translation,
1568 then we cannot use boyer_moore search. */
1571 memcpy(pat, tmp_str, new_bytelen);
1573 base_pat += orig_bytelen;
1574 len -= orig_bytelen;
1576 #else /* not MULE */
1577 while (--len >= 0) {
1578 /* If we got here and the RE flag is set, it's because
1579 we're dealing with a regexp known to be trivial, so the
1580 backslash just quotes the next character. */
1581 if (RE && *base_pat == '\\') {
1585 *pat++ = TRANSLATE(trt, *base_pat++);
1589 pat = base_pat = patbuf;
1591 return boyer_moore(buf, base_pat, len, pos, lim, n,
1592 trt, inverse_trt, charset_base);
1594 return simple_search(buf, base_pat, len, pos, lim, n,
1599 /* Do a simple string search N times for the string PAT,
1600 whose length is LEN/LEN_BYTE,
1601 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1602 TRT is the translation table.
1604 Return the character position where the match is found.
1605 Otherwise, if M matches remained to be found, return -M.
1607 This kind of search works regardless of what is in PAT and
1608 regardless of what is in TRT. It is used in cases where
1609 boyer_moore cannot work. */
1612 simple_search(struct buffer *buf, Bufbyte * base_pat, Bytecount len_byte,
1613 Bytind idx, Bytind lim, EMACS_INT n, Lisp_Object trt)
1615 int forward = n > 0;
1616 Bytecount buf_len = 0; /* Shut up compiler. */
1621 Bytecount this_len = len_byte;
1622 Bytind this_idx = idx;
1623 const Bufbyte *p = base_pat;
1627 while (this_len > 0) {
1628 Emchar pat_ch, buf_ch;
1631 pat_ch = charptr_emchar(p);
1633 BI_BUF_FETCH_CHAR(buf, this_idx);
1635 buf_ch = TRANSLATE(trt, buf_ch);
1637 if (buf_ch != pat_ch)
1640 pat_len = charcount_to_bytecount(p, 1);
1642 this_len -= pat_len;
1643 INC_BYTIND(buf, this_idx);
1645 if (this_len == 0) {
1646 buf_len = this_idx - idx;
1650 INC_BYTIND(buf, idx);
1657 Bytecount this_len = len_byte;
1658 Bytind this_idx = idx;
1659 const Bufbyte *p = base_pat + len_byte;
1665 while (this_len > 0) {
1666 Emchar pat_ch, buf_ch;
1669 DEC_BYTIND(buf, this_idx);
1670 pat_ch = charptr_emchar(p);
1672 BI_BUF_FETCH_CHAR(buf, this_idx);
1674 buf_ch = TRANSLATE(trt, buf_ch);
1676 if (buf_ch != pat_ch)
1680 charcount_to_bytecount(p, 1);
1682 if (this_len == 0) {
1683 buf_len = idx - this_idx;
1687 DEC_BYTIND(buf, idx);
1694 Bufpos beg, end, retval;
1696 beg = bytind_to_bufpos(buf, idx - buf_len);
1697 retval = end = bytind_to_bufpos(buf, idx);
1699 retval = beg = bytind_to_bufpos(buf, idx);
1700 end = bytind_to_bufpos(buf, idx + buf_len);
1702 set_search_regs(buf, beg, end - beg);
1703 clear_unused_search_regs(&search_regs, 0);
1713 /* Do Boyer-Moore search N times for the string PAT,
1714 whose length is LEN/LEN_BYTE,
1715 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1716 DIRECTION says which direction we search in.
1717 TRT and INVERSE_TRT are translation tables.
1719 This kind of search works if all the characters in PAT that have
1720 nontrivial translation are the same aside from the last byte. This
1721 makes it possible to translate just the last byte of a character,
1722 and do so after just a simple test of the context.
1724 If that criterion is not satisfied, do not call this function. */
1727 boyer_moore(struct buffer *buf, Bufbyte * base_pat, Bytecount len,
1728 Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
1729 Lisp_Object inverse_trt, int charset_base)
1731 /* #### Someone really really really needs to comment the workings
1732 of this junk somewhat better.
1734 BTW "BM" stands for Boyer-Moore, which is one of the standard
1735 string-searching algorithms. It's the best string-searching
1736 algorithm out there, provided that:
1738 a) You're not fazed by algorithm complexity. (Rabin-Karp, which
1739 uses hashing, is much much easier to code but not as fast.)
1740 b) You can freely move backwards in the string that you're
1743 As the comment below tries to explain (but garbles in typical
1744 programmer-ese), the idea is that you don't have to do a
1745 string match at every successive position in the text. For
1746 example, let's say the pattern is "a very long string". We
1747 compare the last character in the string (`g') with the
1748 corresponding character in the text. If it mismatches, and
1749 it is, say, `z', then we can skip forward by the entire
1750 length of the pattern because `z' does not occur anywhere
1751 in the pattern. If the mismatching character does occur
1752 in the pattern, we can usually still skip forward by more
1753 than one: e.g. if it is `l', then we can skip forward
1754 by the length of the substring "ong string" -- i.e. the
1755 largest end section of the pattern that does not contain
1756 the mismatched character. So what we do is compute, for
1757 each possible character, the distance we can skip forward
1758 (the "stride") and use it in the string matching. This
1759 is what the BM_tab holds. */
1760 REGISTER EMACS_INT *BM_tab;
1761 EMACS_INT *BM_tab_base;
1762 REGISTER Bytecount dirlen;
1765 Bytecount stride_for_teases = 0;
1766 REGISTER EMACS_INT i, j;
1767 Bufbyte *pat, *pat_end;
1768 REGISTER Bufbyte *cursor, *p_limit, *ptr2;
1769 Bufbyte simple_translate[REGEXP_FASTMAP_SIZE];
1770 REGISTER int direction = ((n > 0) ? 1 : -1);
1772 Bufbyte translate_prev_byte = 0;
1773 Bufbyte translate_anteprev_byte = 0;
1776 EMACS_INT BM_tab_space[REGEXP_FASTMAP_SIZE];
1777 BM_tab = &BM_tab_space[0];
1779 BM_tab = alloca_array(EMACS_INT, 256);
1782 /* The general approach is that we are going to maintain that we
1783 know the first (closest to the present position, in whatever
1784 direction we're searching) character that could possibly be
1785 the last (furthest from present position) character of a
1786 valid match. We advance the state of our knowledge by
1787 looking at that character and seeing whether it indeed
1788 matches the last character of the pattern. If it does, we
1789 take a closer look. If it does not, we move our pointer (to
1790 putative last characters) as far as is logically possible.
1791 This amount of movement, which I call a stride, will be the
1792 length of the pattern if the actual character appears nowhere
1793 in the pattern, otherwise it will be the distance from the
1794 last occurrence of that character to the end of the pattern.
1795 As a coding trick, an enormous stride is coded into the table
1796 for characters that match the last character. This allows
1797 use of only a single test, a test for having gone past the
1798 end of the permissible match region, to test for both
1799 possible matches (when the stride goes past the end
1800 immediately) and failure to match (where you get nudged past
1801 the end one stride at a time).
1803 Here we make a "mickey mouse" BM table. The stride of the
1804 search is determined only by the last character of the
1805 putative match. If that character does not match, we will
1806 stride the proper distance to propose a match that
1807 superimposes it on the last instance of a character that
1808 matches it (per trt), or misses it entirely if there is
1811 dirlen = len * direction;
1812 infinity = dirlen - (lim + pos + len + len) * direction;
1813 /* Record position after the end of the pattern. */
1814 pat_end = base_pat + len;
1816 base_pat = pat_end - 1;
1817 BM_tab_base = BM_tab;
1818 BM_tab += REGEXP_FASTMAP_SIZE;
1819 j = dirlen; /* to get it in a register */
1820 /* A character that does not appear in the pattern induces a
1821 stride equal to the pattern length. */
1822 while (BM_tab_base != BM_tab) {
1828 /* We use this for translation, instead of TRT itself. We
1829 fill this in to handle the characters that actually occur
1830 in the pattern. Others don't matter anyway! */
1831 xzero(simple_translate);
1832 for (i = 0; i < REGEXP_FASTMAP_SIZE; i++)
1833 simple_translate[i] = (Bufbyte) i;
1835 while (i != infinity) {
1836 Bufbyte *ptr = base_pat + i;
1842 Emchar ch, untranslated;
1843 int this_translated = 1;
1845 /* Is *PTR the last byte of a character? */
1846 if (pat_end - ptr == 1 || BUFBYTE_FIRST_BYTE_P(ptr[1])) {
1847 Bufbyte *charstart = ptr;
1848 while (!BUFBYTE_FIRST_BYTE_P(*charstart))
1850 untranslated = charptr_emchar(charstart);
1852 (untranslated & ~CHAR_FIELD3_MASK)) {
1853 ch = TRANSLATE(trt, untranslated);
1854 if (!BUFBYTE_FIRST_BYTE_P(*ptr)) {
1855 translate_prev_byte = ptr[-1];
1856 if (!BUFBYTE_FIRST_BYTE_P
1857 (translate_prev_byte))
1858 translate_anteprev_byte
1862 this_translated = 0;
1867 this_translated = 0;
1869 if (ch > REGEXP_FASTMAP_SIZE)
1870 j = ((unsigned char)ch | 0200);
1872 j = (unsigned char)ch;
1875 stride_for_teases = BM_tab[j];
1876 BM_tab[j] = dirlen - i;
1877 /* A translation table is accompanied by its inverse --
1878 see comment following downcase_table for details */
1879 if (this_translated) {
1880 Emchar starting_ch = ch;
1881 EMACS_INT starting_j = j;
1883 ch = TRANSLATE(inverse_trt, ch);
1884 if (ch > REGEXP_FASTMAP_SIZE)
1885 j = ((unsigned char)ch | 0200);
1887 j = (unsigned char)ch;
1889 /* For all the characters that map into CH,
1890 set up simple_translate to map the last byte
1892 simple_translate[j] = starting_j;
1893 if (ch == starting_ch)
1895 BM_tab[j] = dirlen - i;
1901 k = (j = TRANSLATE(trt, j));
1903 stride_for_teases = BM_tab[j];
1904 BM_tab[j] = dirlen - i;
1905 /* A translation table is accompanied by its inverse --
1906 see comment following downcase_table for details */
1908 while ((j = TRANSLATE(inverse_trt, j)) != k) {
1909 simple_translate[j] = (Bufbyte) k;
1910 BM_tab[j] = dirlen - i;
1917 stride_for_teases = BM_tab[j];
1918 BM_tab[j] = dirlen - i;
1920 /* stride_for_teases tells how much to stride if we get a
1921 match on the far character but are subsequently
1922 disappointed, by recording what the stride would have been
1923 for that character if the last character had been
1926 infinity = dirlen - infinity;
1927 pos += dirlen - ((direction > 0) ? direction : 0);
1928 /* loop invariant - pos points at where last char (first char if
1929 reverse) of pattern would align in a possible match. */
1932 Bufbyte *tail_end_ptr;
1933 /* It's been reported that some (broken) compiler thinks
1934 that Boolean expressions in an arithmetic context are
1935 unsigned. Using an explicit ?1:0 prevents this. */
1936 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
1937 return n * (0 - direction);
1938 /* First we do the part we can by pointers (maybe
1942 limit = pos - dirlen + direction;
1943 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1944 have changed. See buffer.h. */
1945 limit = ((direction > 0)
1946 ? BI_BUF_CEILING_OF(buf, limit) - 1
1947 : BI_BUF_FLOOR_OF(buf, limit + 1));
1948 /* LIMIT is now the last (not beyond-last!) value POS can
1949 take on without hitting edge of buffer or the gap. */
1950 limit = ((direction > 0)
1951 ? min(lim - 1, min(limit, pos + 20000))
1952 : max(lim, max(limit, pos - 20000)));
1953 tail_end = BI_BUF_CEILING_OF(buf, pos);
1954 tail_end_ptr = BI_BUF_BYTE_ADDRESS(buf, tail_end);
1956 if ((limit - pos) * direction > 20) {
1957 p_limit = BI_BUF_BYTE_ADDRESS(buf, limit);
1958 ptr2 = (cursor = BI_BUF_BYTE_ADDRESS(buf, pos));
1959 /* In this loop, pos + cursor - ptr2 is the surrogate
1961 while (1) { /* use one cursor setting as long as i can */
1962 if (direction > 0) { /* worth duplicating */
1963 /* Use signed comparison if appropriate to make
1964 cursor+infinity sure to be > p_limit.
1965 Assuming that the buffer lies in a range of
1966 addresses that are all "positive" (as ints)
1967 or all "negative", either kind of comparison
1968 will work as long as we don't step by
1969 infinity. So pick the kind that works when
1970 we do step by infinity. */
1971 if ((EMACS_INT) (p_limit + infinity) >
1972 (EMACS_INT) p_limit)
1973 while ((EMACS_INT) cursor <=
1974 (EMACS_INT) p_limit)
1978 while ((EMACS_UINT) cursor <=
1979 (EMACS_UINT) p_limit)
1983 if ((EMACS_INT) (p_limit + infinity) <
1984 (EMACS_INT) p_limit)
1985 while ((EMACS_INT) cursor >=
1986 (EMACS_INT) p_limit)
1990 while ((EMACS_UINT) cursor >=
1991 (EMACS_UINT) p_limit)
1995 /* If you are here, cursor is beyond the end of the
1996 searched region. This can happen if you match on
1997 the far character of the pattern, because the
1998 "stride" of that character is infinity, a number
1999 able to throw you well beyond the end of the
2000 search. It can also happen if you fail to match
2001 within the permitted region and would otherwise
2002 try a character beyond that region */
2003 if ((cursor - p_limit) * direction <= len)
2004 break; /* a small overrun is genuine */
2005 cursor -= infinity; /* large overrun = hit */
2006 i = dirlen - direction;
2009 direction) + direction != 0) {
2012 cursor -= direction;
2013 /* Translate only the last byte of a character. */
2014 if ((cursor == tail_end_ptr
2016 BUFBYTE_FIRST_BYTE_P(cursor
2019 (BUFBYTE_FIRST_BYTE_P
2021 || (translate_prev_byte ==
2024 (BUFBYTE_FIRST_BYTE_P
2025 (translate_prev_byte)
2027 translate_anteprev_byte
2029 ch = simple_translate
2045 direction) + direction != 0)
2047 *(cursor -= direction))
2050 cursor += dirlen - i - direction; /* fix cursor */
2051 if (i + direction == 0) {
2052 cursor -= direction;
2056 (pos + cursor - ptr2 +
2060 bytind_to_bufpos(buf,
2063 bytind_to_bufpos(buf,
2067 set_search_regs(buf, bufstart,
2070 clear_unused_search_regs
2074 if ((n -= direction) != 0)
2075 cursor += dirlen; /* to resume search */
2077 return ((direction > 0)
2079 end[0] : search_regs.
2082 cursor += stride_for_teases; /* <sigh> we lose - */
2084 pos += cursor - ptr2;
2086 /* Now we'll pick up a clump that has to be done the hard
2087 way because it covers a discontinuity */
2089 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
2090 have changed. See buffer.h. */
2091 limit = ((direction > 0)
2092 ? BI_BUF_CEILING_OF(buf, pos - dirlen + 1) - 1
2093 : BI_BUF_FLOOR_OF(buf, pos - dirlen));
2094 limit = ((direction > 0)
2095 ? min(limit + len, lim - 1)
2096 : max(limit - len, lim));
2097 /* LIMIT is now the last value POS can have
2098 and still be valid for a possible match. */
2100 /* This loop can be coded for space rather than
2101 speed because it will usually run only once.
2102 (the reach is at most len + 21, and typically
2103 does not exceed len) */
2104 while ((limit - pos) * direction >= 0)
2105 /* *not* BI_BUF_FETCH_CHAR. We are working here
2106 with bytes, not characters. */
2108 BM_tab[*BI_BUF_BYTE_ADDRESS
2110 /* now run the same tests to distinguish going off
2111 the end, a match or a phony match. */
2112 if ((pos - limit) * direction <= len)
2113 break; /* ran off the end */
2114 /* Found what might be a match.
2115 Set POS back to last (first if reverse) char pos. */
2117 i = dirlen - direction;
2118 while ((i -= direction) + direction != 0) {
2125 ptr = BI_BUF_BYTE_ADDRESS(buf, pos);
2126 if ((ptr == tail_end_ptr
2127 || BUFBYTE_FIRST_BYTE_P(ptr[1]))
2128 && (BUFBYTE_FIRST_BYTE_P(ptr[0])
2129 || (translate_prev_byte ==
2132 (BUFBYTE_FIRST_BYTE_P
2133 (translate_prev_byte)
2134 || translate_anteprev_byte
2136 ch = simple_translate[*ptr];
2143 if (pat[i] != TRANSLATE(trt,
2144 *BI_BUF_BYTE_ADDRESS
2149 /* Above loop has moved POS part or all the way back
2150 to the first char pos (last char pos if reverse).
2151 Set it once again at the last (first if reverse)
2153 pos += dirlen - i - direction;
2154 if (i + direction == 0) {
2158 Bytind bytstart = (pos +
2164 bytind_to_bufpos(buf,
2167 bytind_to_bufpos(buf,
2171 set_search_regs(buf, bufstart,
2174 clear_unused_search_regs
2178 if ((n -= direction) != 0)
2179 pos += dirlen; /* to resume search */
2181 return ((direction > 0)
2183 end[0] : search_regs.
2186 pos += stride_for_teases;
2189 /* We have done one clump. Can we continue? */
2190 if ((lim - pos) * direction < 0)
2191 return (0 - n) * direction;
2193 return bytind_to_bufpos(buf, pos);
2196 /* Record the whole-match data (beginning BEG and end BEG + LEN) and the
2197 buffer for a match just found. */
2199 static void set_search_regs(struct buffer *buf, Bufpos beg, Charcount len)
2201 /* This function has been Mule-ized. */
2202 /* Make sure we have registers in which to store
2203 the match position. */
2204 if (search_regs.num_regs == 0) {
2205 search_regs.start = xnew_atomic(regoff_t);
2206 search_regs.end = xnew_atomic(regoff_t);
2207 search_regs.num_regs = 1;
2210 search_regs.start[0] = beg;
2211 search_regs.end[0] = beg + len;
2212 XSETBUFFER(last_thing_searched, buf);
2215 /* Clear unused search registers so match data will be null.
2216 REGP is a pointer to the register structure to clear, usually the global
2218 NO_SUB is the number of subexpressions to allow for. (Does not count
2219 the whole match, ie, for a string search NO_SUB == 0.)
2220 It is an error if NO_SUB > REGP.num_regs - 1. */
2222 static void clear_unused_search_regs(struct re_registers *regp, int no_sub)
2224 /* This function has been Mule-ized. */
2227 assert(no_sub >= 0 && no_sub < regp->num_regs);
2228 for (i = no_sub + 1; i < regp->num_regs; i++)
2229 regp->start[i] = regp->end[i] = -1;
2232 /* Given a string of words separated by word delimiters,
2233 compute a regexp that matches those exact words
2234 separated by arbitrary punctuation. */
2236 static Lisp_Object wordify(Lisp_Object buffer, Lisp_Object string)
2239 EMACS_INT punct_count = 0, word_count = 0;
2240 struct buffer *buf = decode_buffer(buffer, 0);
2241 Lisp_Char_Table *syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
2243 CHECK_STRING(string);
2244 len = XSTRING_CHAR_LENGTH(string);
2246 for (i = 0; i < len; i++)
2248 (syntax_table, string_char(XSTRING(string), i))) {
2250 if (i > 0 && WORD_SYNTAX_P(syntax_table,
2251 string_char(XSTRING(string),
2255 if (WORD_SYNTAX_P(syntax_table, string_char(XSTRING(string), len - 1)))
2258 return build_string("");
2261 /* The following value is an upper bound on the amount of storage we
2262 need. In non-Mule, it is exact. */
2264 (Bufbyte *) alloca(XSTRING_LENGTH(string) - punct_count +
2265 5 * (word_count - 1) + 4);
2266 Bufbyte *o = storage;
2271 for (i = 0; i < len; i++) {
2272 Emchar ch = string_char(XSTRING(string), i);
2274 if (WORD_SYNTAX_P(syntax_table, ch))
2275 o += set_charptr_emchar(o, ch);
2277 && WORD_SYNTAX_P(syntax_table,
2278 string_char(XSTRING(string),
2292 return make_string(storage, o - storage);
2296 DEFUN("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ", /*
2297 Search backward from point for STRING.
2298 Set point to the beginning of the occurrence found, and return point.
2300 Optional second argument LIMIT bounds the search; it is a buffer
2301 position. The match found must not extend before that position.
2302 The value nil is equivalent to (point-min).
2304 Optional third argument NOERROR, if t, means just return nil (no
2305 error) if the search fails. If neither nil nor t, set point to LIMIT
2308 Optional fourth argument COUNT is a repeat count--search for
2309 successive occurrences.
2311 Optional fifth argument BUFFER specifies the buffer to search in and
2312 defaults to the current buffer.
2314 See also the functions `match-beginning', `match-end' and `replace-match'.
2316 (string, limit, noerror, count, buffer))
2318 return search_command(string, limit, noerror, count, buffer, -1, 0, 0);
2321 DEFUN("search-forward", Fsearch_forward, 1, 5, "sSearch: ", /*
2322 Search forward from point for STRING.
2323 Set point to the end of the occurrence found, and return point.
2325 Optional second argument LIMIT bounds the search; it is a buffer
2326 position. The match found must not extend after that position. The
2327 value nil is equivalent to (point-max).
2329 Optional third argument NOERROR, if t, means just return nil (no
2330 error) if the search fails. If neither nil nor t, set point to LIMIT
2333 Optional fourth argument COUNT is a repeat count--search for
2334 successive occurrences.
2336 Optional fifth argument BUFFER specifies the buffer to search in and
2337 defaults to the current buffer.
2339 See also the functions `match-beginning', `match-end' and `replace-match'.
2341 (string, limit, noerror, count, buffer))
2343 return search_command(string, limit, noerror, count, buffer, 1, 0, 0);
2346 DEFUN("word-search-backward", Fword_search_backward, 1, 5, "sWord search backward: ", /*
2347 Search backward from point for STRING, ignoring differences in punctuation.
2348 Set point to the beginning of the occurrence found, and return point.
2350 Optional second argument LIMIT bounds the search; it is a buffer
2351 position. The match found must not extend before that position.
2352 The value nil is equivalent to (point-min).
2354 Optional third argument NOERROR, if t, means just return nil (no
2355 error) if the search fails. If neither nil nor t, set point to LIMIT
2358 Optional fourth argument COUNT is a repeat count--search for
2359 successive occurrences.
2361 Optional fifth argument BUFFER specifies the buffer to search in and
2362 defaults to the current buffer.
2364 See also the functions `match-beginning', `match-end' and `replace-match'.
2366 (string, limit, noerror, count, buffer))
2368 return search_command(wordify(buffer, string), limit, noerror, count,
2372 DEFUN("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ", /*
2373 Search forward from point for STRING, ignoring differences in punctuation.
2374 Set point to the end of the occurrence found, and return point.
2376 Optional second argument LIMIT bounds the search; it is a buffer
2377 position. The match found must not extend after that position. The
2378 value nil is equivalent to (point-max).
2380 Optional third argument NOERROR, if t, means just return nil (no
2381 error) if the search fails. If neither nil nor t, set point to LIMIT
2384 Optional fourth argument COUNT is a repeat count--search for
2385 successive occurrences.
2387 Optional fifth argument BUFFER specifies the buffer to search in and
2388 defaults to the current buffer.
2390 See also the functions `match-beginning', `match-end' and `replace-match'.
2392 (string, limit, noerror, count, buffer))
2394 return search_command(wordify(buffer, string), limit, noerror, count,
2398 DEFUN("re-search-backward", Fre_search_backward, 1, 5, "sRE search backward: ", /*
2399 Search backward from point for match for regular expression REGEXP.
2400 Set point to the beginning of the match, and return point.
2401 The match found is the one starting last in the buffer
2402 and yet ending before the origin of the search.
2404 Optional second argument LIMIT bounds the search; it is a buffer
2405 position. The match found must not extend before that position.
2406 The value nil is equivalent to (point-min).
2408 Optional third argument NOERROR, if t, means just return nil (no
2409 error) if the search fails. If neither nil nor t, set point to LIMIT
2412 Optional fourth argument COUNT is a repeat count--search for
2413 successive occurrences.
2415 Optional fifth argument BUFFER specifies the buffer to search in and
2416 defaults to the current buffer.
2418 See also the functions `match-beginning', `match-end' and `replace-match'.
2420 (regexp, limit, noerror, count, buffer))
2422 return search_command(regexp, limit, noerror, count, buffer, -1, 1, 0);
2425 DEFUN("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ", /*
2426 Search forward from point for regular expression REGEXP.
2427 Set point to the end of the occurrence found, and return point.
2429 Optional second argument LIMIT bounds the search; it is a buffer
2430 position. The match found must not extend after that position. The
2431 value nil is equivalent to (point-max).
2433 Optional third argument NOERROR, if t, means just return nil (no
2434 error) if the search fails. If neither nil nor t, set point to LIMIT
2437 Optional fourth argument COUNT is a repeat count--search for
2438 successive occurrences.
2440 Optional fifth argument BUFFER specifies the buffer to search in and
2441 defaults to the current buffer.
2443 See also the functions `match-beginning', `match-end' and `replace-match'.
2445 (regexp, limit, noerror, count, buffer))
2447 return search_command(regexp, limit, noerror, count, buffer, 1, 1, 0);
2450 DEFUN("posix-search-backward", Fposix_search_backward, 1, 5, "sPosix search backward: ", /*
2451 Search backward from point for match for regular expression REGEXP.
2452 Find the longest match in accord with Posix regular expression rules.
2453 Set point to the beginning of the match, and return point.
2454 The match found is the one starting last in the buffer
2455 and yet ending before the origin of the search.
2457 Optional second argument LIMIT bounds the search; it is a buffer
2458 position. The match found must not extend before that position.
2459 The value nil is equivalent to (point-min).
2461 Optional third argument NOERROR, if t, means just return nil (no
2462 error) if the search fails. If neither nil nor t, set point to LIMIT
2465 Optional fourth argument COUNT is a repeat count--search for
2466 successive occurrences.
2468 Optional fifth argument BUFFER specifies the buffer to search in and
2469 defaults to the current buffer.
2471 See also the functions `match-beginning', `match-end' and `replace-match'.
2473 (regexp, limit, noerror, count, buffer))
2475 return search_command(regexp, limit, noerror, count, buffer, -1, 1, 1);
2478 DEFUN("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ", /*
2479 Search forward from point for regular expression REGEXP.
2480 Find the longest match in accord with Posix regular expression rules.
2481 Set point to the end of the occurrence found, and return point.
2483 Optional second argument LIMIT bounds the search; it is a buffer
2484 position. The match found must not extend after that position. The
2485 value nil is equivalent to (point-max).
2487 Optional third argument NOERROR, if t, means just return nil (no
2488 error) if the search fails. If neither nil nor t, set point to LIMIT
2491 Optional fourth argument COUNT is a repeat count--search for
2492 successive occurrences.
2494 Optional fifth argument BUFFER specifies the buffer to search in and
2495 defaults to the current buffer.
2497 See also the functions `match-beginning', `match-end' and `replace-match'.
2499 (regexp, limit, noerror, count, buffer))
2501 return search_command(regexp, limit, noerror, count, buffer, 1, 1, 1);
2504 static Lisp_Object free_created_dynarrs(Lisp_Object cons)
2506 Dynarr_free(get_opaque_ptr(XCAR(cons)));
2507 Dynarr_free(get_opaque_ptr(XCDR(cons)));
2508 free_opaque_ptr(XCAR(cons));
2509 free_opaque_ptr(XCDR(cons));
2510 free_cons(XCONS(cons));
2514 DEFUN("replace-match", Freplace_match, 1, 5, 0, /*
2515 Replace text matched by last search with REPLACEMENT.
2516 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2517 Otherwise maybe capitalize the whole text, or maybe just word initials,
2518 based on the replaced text.
2519 If the replaced text has only capital letters
2520 and has at least one multiletter word, convert REPLACEMENT to all caps.
2521 If the replaced text has at least one word starting with a capital letter,
2522 then capitalize each word in REPLACEMENT.
2524 If third arg LITERAL is non-nil, insert REPLACEMENT literally.
2525 Otherwise treat `\\' as special:
2526 `\\&' in REPLACEMENT means substitute original matched text.
2527 `\\N' means substitute what matched the Nth `\\(...\\)'.
2528 If Nth parens didn't match, substitute nothing.
2529 `\\\\' means insert one `\\'.
2530 `\\u' means upcase the next character.
2531 `\\l' means downcase the next character.
2532 `\\U' means begin upcasing all following characters.
2533 `\\L' means begin downcasing all following characters.
2534 `\\E' means terminate the effect of any `\\U' or `\\L'.
2535 Case changes made with `\\u', `\\l', `\\U', and `\\L' override
2536 all other case changes that may be made in the replaced text.
2537 FIXEDCASE and LITERAL are optional arguments.
2538 Leaves point at end of replacement text.
2540 The optional fourth argument STRING can be a string to modify.
2541 In that case, this function creates and returns a new string
2542 which is made by replacing the part of STRING that was matched.
2543 When fourth argument is a string, fifth argument STRBUFFER specifies
2544 the buffer to be used for syntax-table and case-table lookup and
2545 defaults to the current buffer. When fourth argument is not a string,
2546 the buffer that the match occurred in has automatically been remembered
2547 and you do not need to specify it.
2549 When fourth argument is nil, STRBUFFER specifies a subexpression of
2550 the match. It says to replace just that subexpression instead of the
2551 whole match. This is useful only after a regular expression search or
2552 match since only regular expressions have distinguished subexpressions.
2554 (replacement, fixedcase, literal, string, strbuffer))
2556 /* This function has been Mule-ized. */
2557 /* This function can GC */
2558 enum { nochange, all_caps, cap_initial } case_action;
2560 int some_multiletter_word;
2563 int some_nonuppercase_initial;
2567 Lisp_Char_Table *syntax_table;
2570 int_dynarr *ul_action_dynarr = 0;
2571 int_dynarr *ul_pos_dynarr = 0;
2575 CHECK_STRING(replacement);
2577 if (!NILP(string)) {
2578 CHECK_STRING(string);
2579 if (!EQ(last_thing_searched, Qt))
2580 error("last thing matched was not a string");
2581 /* If the match data
2582 were abstracted into a special "match data" type instead
2583 of the typical half-assed "let the implementation be
2584 visible" form it's in, we could extend it to include
2585 the last string matched and the buffer used for that
2586 matching. But of course we can't change it as it is. */
2587 buf = decode_buffer(strbuffer, 0);
2588 XSETBUFFER(buffer, buf);
2590 if (!NILP(strbuffer)) {
2591 CHECK_INT(strbuffer);
2592 sub = XINT(strbuffer);
2593 if (sub < 0 || sub >= (int)search_regs.num_regs)
2594 args_out_of_range(strbuffer,
2595 make_int(search_regs.
2598 if (!BUFFERP(last_thing_searched))
2599 error("last thing matched was not a buffer");
2600 buffer = last_thing_searched;
2601 buf = XBUFFER(buffer);
2604 syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
2606 case_action = nochange; /* We tried an initialization */
2607 /* but some C compilers blew it */
2609 if (search_regs.num_regs == 0)
2610 error("replace-match called before any match found");
2613 if (search_regs.start[sub] < BUF_BEGV(buf)
2614 || search_regs.start[sub] > search_regs.end[sub]
2615 || search_regs.end[sub] > BUF_ZV(buf))
2616 args_out_of_range(make_int(search_regs.start[sub]),
2617 make_int(search_regs.end[sub]));
2619 if (search_regs.start[0] < 0
2620 || search_regs.start[0] > search_regs.end[0]
2621 || search_regs.end[0] > XSTRING_CHAR_LENGTH(string))
2622 args_out_of_range(make_int(search_regs.start[0]),
2623 make_int(search_regs.end[0]));
2626 if (NILP(fixedcase)) {
2627 /* Decide how to casify by examining the matched text. */
2629 last = search_regs.end[sub];
2631 case_action = all_caps;
2633 /* some_multiletter_word is set nonzero if any original word
2634 is more than one letter long. */
2635 some_multiletter_word = 0;
2637 some_nonuppercase_initial = 0;
2640 for (pos = search_regs.start[sub]; pos < last; pos++) {
2642 c = BUF_FETCH_CHAR(buf, pos);
2644 c = string_char(XSTRING(string), pos);
2646 if (LOWERCASEP(buf, c)) {
2647 /* Cannot be all caps if any original char is lower case */
2650 if (!WORD_SYNTAX_P(syntax_table, prevc))
2651 some_nonuppercase_initial = 1;
2653 some_multiletter_word = 1;
2654 } else if (!NOCASEP(buf, c)) {
2656 if (!WORD_SYNTAX_P(syntax_table, prevc)) ;
2658 some_multiletter_word = 1;
2660 /* If the initial is a caseless word constituent,
2661 treat that like a lowercase initial. */
2662 if (!WORD_SYNTAX_P(syntax_table, prevc))
2663 some_nonuppercase_initial = 1;
2669 /* Convert to all caps if the old text is all caps
2670 and has at least one multiletter word. */
2671 if (!some_lowercase && some_multiletter_word)
2672 case_action = all_caps;
2673 /* Capitalize each word, if the old text has all capitalized words. */
2674 else if (!some_nonuppercase_initial && some_multiletter_word)
2675 case_action = cap_initial;
2676 else if (!some_nonuppercase_initial && some_uppercase)
2677 /* Should x -> yz, operating on X, give Yz or YZ?
2678 We'll assume the latter. */
2679 case_action = all_caps;
2681 case_action = nochange;
2684 /* Do replacement in a string. */
2685 if (!NILP(string)) {
2686 Lisp_Object before, after;
2688 speccount = specpdl_depth();
2690 Fsubstring(string, Qzero, make_int(search_regs.start[0]));
2691 after = Fsubstring(string, make_int(search_regs.end[0]), Qnil);
2693 /* Do case substitution into REPLACEMENT if desired. */
2694 if (NILP(literal)) {
2695 Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2697 /* XEmacs change: rewrote this loop somewhat to make it
2698 cleaner. Also added \U, \E, etc. */
2699 Charcount literal_start = 0;
2700 /* We build up the substituted string in ACCUM. */
2705 /* OK, the basic idea here is that we scan through the
2706 replacement string until we find a backslash, which
2707 represents a substring of the original string to be
2708 substituted. We then append onto ACCUM the literal
2709 text before the backslash (LASTPOS marks the
2710 beginning of this) followed by the substring of the
2711 original string that needs to be inserted. */
2712 for (strpos = 0; strpos < stlen; strpos++) {
2713 /* If LITERAL_END is set, we've encountered a backslash
2714 (the end of literal text to be inserted). */
2715 Charcount literal_end = -1;
2716 /* If SUBSTART is set, we need to also insert the
2717 text from SUBSTART to SUBEND in the original string. */
2718 Charcount substart = -1;
2719 Charcount subend = -1;
2721 c = string_char(XSTRING(replacement), strpos);
2722 if (c == '\\' && strpos < stlen - 1) {
2723 c = string_char(XSTRING(replacement),
2726 literal_end = strpos - 1;
2727 substart = search_regs.start[0];
2728 subend = search_regs.end[0];
2729 } else if (c >= '1' && c <= '9' &&
2731 search_regs.num_regs + '0') {
2733 start[c - '0'] >= 0) {
2743 } else if (c == 'U' || c == 'u'
2744 || c == 'L' || c == 'l'
2746 /* Keep track of all case changes requested, but don't
2747 make them now. Do them later so we override
2749 if (!ul_pos_dynarr) {
2754 record_unwind_protect
2755 (free_created_dynarrs,
2760 (ul_action_dynarr)));
2762 literal_end = strpos - 1;
2763 Dynarr_add(ul_pos_dynarr,
2771 Dynarr_add(ul_action_dynarr, c);
2772 } else if (c == '\\')
2773 /* So we get just one backslash. */
2774 literal_end = strpos;
2776 if (literal_end >= 0) {
2777 Lisp_Object literal_text = Qnil;
2778 Lisp_Object substring = Qnil;
2779 if (literal_end != literal_start)
2781 Fsubstring(replacement,
2786 if (substart >= 0 && subend != substart)
2787 substring = Fsubstring(string,
2792 if (!NILP(literal_text)
2793 || !NILP(substring))
2795 concat3(accum, literal_text,
2797 literal_start = strpos + 1;
2801 if (strpos != literal_start)
2802 /* some literal text at end to be inserted */
2805 Fsubstring(replacement,
2806 make_int(literal_start),
2809 replacement = accum;
2812 /* replacement can be nil. */
2813 if (NILP(replacement))
2814 replacement = build_string("");
2816 if (case_action == all_caps)
2817 replacement = Fupcase(replacement, buffer);
2818 else if (case_action == cap_initial)
2819 replacement = Fupcase_initials(replacement, buffer);
2821 /* Now finally, we need to process the \U's, \E's, etc. */
2822 if (ul_pos_dynarr) {
2824 int cur_action = 'E';
2825 Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2828 for (strpos = 0; strpos < stlen; strpos++) {
2830 string_char(XSTRING(replacement), strpos);
2831 Emchar newchar = -1;
2832 if (i < Dynarr_length(ul_pos_dynarr) &&
2833 strpos == Dynarr_at(ul_pos_dynarr, i)) {
2835 Dynarr_at(ul_action_dynarr, i);
2837 if (new_action == 'u')
2838 newchar = UPCASE(buf, curchar);
2839 else if (new_action == 'l')
2841 DOWNCASE(buf, curchar);
2843 cur_action = new_action;
2845 if (newchar == -1) {
2846 if (cur_action == 'U')
2847 newchar = UPCASE(buf, curchar);
2848 else if (cur_action == 'L')
2850 DOWNCASE(buf, curchar);
2854 if (newchar != curchar)
2855 set_string_char(XSTRING(replacement),
2860 /* frees the Dynarrs if necessary. */
2861 unbind_to(speccount, Qnil);
2862 return concat3(before, replacement, after);
2865 mc_count = begin_multiple_change(buf, search_regs.start[sub],
2866 search_regs.end[sub]);
2868 /* begin_multiple_change() records an unwind-protect, so we need to
2869 record this value now. */
2870 speccount = specpdl_depth();
2872 /* We insert the replacement text before the old text, and then
2873 delete the original text. This means that markers at the
2874 beginning or end of the original will float to the corresponding
2875 position in the replacement. */
2876 BUF_SET_PT(buf, search_regs.start[sub]);
2878 Finsert(1, &replacement);
2880 Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2882 struct gcpro gcpro1;
2883 GCPRO1(replacement);
2884 for (strpos = 0; strpos < stlen; strpos++) {
2885 /* on the first iteration assert(offset==0),
2886 exactly complementing BUF_SET_PT() above.
2887 During the loop, it keeps track of the amount inserted.
2889 Charcount offset = BUF_PT(buf) - search_regs.start[sub];
2891 c = string_char(XSTRING(replacement), strpos);
2892 if (c == '\\' && strpos < stlen - 1) {
2893 /* XXX FIXME: replacing just a substring non-literally
2894 using backslash refs to the match looks dangerous. But
2895 <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe
2896 <duwe@caldera.de> claims Finsert_buffer_substring already
2897 handles this correctly.
2899 c = string_char(XSTRING(replacement), ++strpos);
2901 Finsert_buffer_substring
2903 make_int(search_regs.start[0] +
2905 make_int(search_regs.end[0] +
2907 else if (c >= '1' && c <= '9'
2908 && c <= search_regs.num_regs + '0') {
2909 if (search_regs.start[c - '0'] >= 1)
2910 Finsert_buffer_substring
2912 make_int(search_regs.
2915 make_int(search_regs.
2918 } else if (c == 'U' || c == 'u' || c == 'L'
2919 || c == 'l' || c == 'E') {
2920 /* Keep track of all case changes requested, but don't
2921 make them now. Do them later so we override
2923 if (!ul_pos_dynarr) {
2924 ul_pos_dynarr = Dynarr_new(int);
2927 record_unwind_protect
2928 (free_created_dynarrs,
2929 Fcons(make_opaque_ptr
2932 (ul_action_dynarr)));
2934 Dynarr_add(ul_pos_dynarr, BUF_PT(buf));
2935 Dynarr_add(ul_action_dynarr, c);
2937 buffer_insert_emacs_char(buf, c);
2939 buffer_insert_emacs_char(buf, c);
2944 inslen = BUF_PT(buf) - (search_regs.start[sub]);
2945 buffer_delete_range(buf, search_regs.start[sub] + inslen,
2946 search_regs.end[sub] + inslen, 0);
2948 if (case_action == all_caps)
2949 Fupcase_region(make_int(BUF_PT(buf) - inslen),
2950 make_int(BUF_PT(buf)), buffer);
2951 else if (case_action == cap_initial)
2952 Fupcase_initials_region(make_int(BUF_PT(buf) - inslen),
2953 make_int(BUF_PT(buf)), buffer);
2955 /* Now go through and make all the case changes that were requested
2956 in the replacement string. */
2957 if (ul_pos_dynarr) {
2958 Bufpos eend = BUF_PT(buf);
2960 int cur_action = 'E';
2962 for (pos = BUF_PT(buf) - inslen; pos < eend; pos++) {
2963 Emchar curchar = BUF_FETCH_CHAR(buf, pos);
2964 Emchar newchar = -1;
2965 if (i < Dynarr_length(ul_pos_dynarr) &&
2966 pos == Dynarr_at(ul_pos_dynarr, i)) {
2967 int new_action = Dynarr_at(ul_action_dynarr, i);
2969 if (new_action == 'u')
2970 newchar = UPCASE(buf, curchar);
2971 else if (new_action == 'l')
2972 newchar = DOWNCASE(buf, curchar);
2974 cur_action = new_action;
2976 if (newchar == -1) {
2977 if (cur_action == 'U')
2978 newchar = UPCASE(buf, curchar);
2979 else if (cur_action == 'L')
2980 newchar = DOWNCASE(buf, curchar);
2984 if (newchar != curchar)
2985 buffer_replace_char(buf, pos, newchar, 0, 0);
2989 /* frees the Dynarrs if necessary. */
2990 unbind_to(speccount, Qnil);
2991 end_multiple_change(buf, mc_count);
2996 static Lisp_Object match_limit(Lisp_Object num, int beginningp)
2998 /* This function has been Mule-ized. */
3003 if (n < 0 || search_regs.num_regs <= 0)
3004 args_out_of_range(num, make_int(search_regs.num_regs));
3005 if (n >= search_regs.num_regs || search_regs.start[n] < 0)
3007 return make_int(beginningp ? search_regs.start[n] : search_regs.end[n]);
3010 DEFUN("match-beginning", Fmatch_beginning, 1, 1, 0, /*
3011 Return position of start of text matched by last regexp search.
3012 NUM, specifies which parenthesized expression in the last regexp.
3013 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
3014 Zero means the entire text matched by the whole regexp or whole string.
3018 return match_limit(num, 1);
3021 DEFUN("match-end", Fmatch_end, 1, 1, 0, /*
3022 Return position of end of text matched by last regexp search.
3023 NUM specifies which parenthesized expression in the last regexp.
3024 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
3025 Zero means the entire text matched by the whole regexp or whole string.
3029 return match_limit(num, 0);
3032 DEFUN("match-data", Fmatch_data, 0, 2, 0, /*
3033 Return a list containing all info on what the last regexp search matched.
3034 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
3035 All the elements are markers or nil (nil if the Nth pair didn't match)
3036 if the last match was on a buffer; integers or nil if a string was matched.
3037 Use `store-match-data' to reinstate the data in this list.
3039 If INTEGERS (the optional first argument) is non-nil, always use integers
3040 \(rather than markers) to represent buffer positions.
3041 If REUSE is a list, reuse it as part of the value. If REUSE is long enough
3042 to hold all the values, and if INTEGERS is non-nil, no consing is done.
3046 /* This function has been Mule-ized. */
3047 Lisp_Object tail, prev;
3052 if (NILP(last_thing_searched))
3053 /*error ("match-data called before any match found"); */
3056 data = alloca_array(Lisp_Object, 2 * search_regs.num_regs);
3059 for (i = 0; i < search_regs.num_regs; i++) {
3060 Bufpos start = search_regs.start[i];
3062 if (EQ(last_thing_searched, Qt)
3063 || !NILP(integers)) {
3064 data[2 * i] = make_int(start);
3065 data[2 * i + 1] = make_int(search_regs.end[i]);
3066 } else if (BUFFERP(last_thing_searched)) {
3067 data[2 * i] = Fmake_marker();
3068 Fset_marker(data[2 * i],
3070 last_thing_searched);
3071 data[2 * i + 1] = Fmake_marker();
3072 Fset_marker(data[2 * i + 1],
3073 make_int(search_regs.end[i]),
3074 last_thing_searched);
3076 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
3081 data[2 * i] = data[2 * i + 1] = Qnil;
3084 return Flist(2 * len + 2, data);
3086 /* If REUSE is a list, store as many value elements as will fit
3087 into the elements of REUSE. */
3088 for (prev = Qnil, i = 0, tail = reuse; CONSP(tail);
3089 i++, tail = XCDR(tail)) {
3090 if (i < 2 * len + 2)
3091 XCAR(tail) = data[i];
3097 /* If we couldn't fit all value elements into REUSE,
3098 cons up the rest of them and add them to the end of REUSE. */
3099 if (i < 2 * len + 2)
3100 XCDR(prev) = Flist(2 * len + 2 - i, data + i);
3105 DEFUN("store-match-data", Fstore_match_data, 1, 1, 0, /*
3106 Set internal data on last search match from elements of LIST.
3107 LIST should have been created by calling `match-data' previously.
3111 /* This function has been Mule-ized. */
3113 REGISTER Lisp_Object marker;
3117 if (running_asynch_code)
3120 CONCHECK_LIST(list);
3122 /* Unless we find a marker with a buffer in LIST, assume that this
3123 match data came from a string. */
3124 last_thing_searched = Qt;
3126 /* Allocate registers if they don't already exist. */
3127 length = XINT(Flength(list)) / 2;
3128 num_regs = search_regs.num_regs;
3130 if (length > num_regs) {
3131 if (search_regs.num_regs == 0) {
3132 search_regs.start = xnew_atomic_array(regoff_t, length);
3133 search_regs.end = xnew_atomic_array(regoff_t, length);
3135 XREALLOC_ARRAY(search_regs.start, regoff_t, length);
3136 XREALLOC_ARRAY(search_regs.end, regoff_t, length);
3139 search_regs.num_regs = length;
3142 for (i = 0; i < num_regs; i++) {
3143 marker = Fcar(list);
3145 search_regs.start[i] = -1;
3148 if (MARKERP(marker)) {
3149 if (XMARKER(marker)->buffer == 0)
3152 XSETBUFFER(last_thing_searched,
3153 XMARKER(marker)->buffer);
3156 CHECK_INT_COERCE_MARKER(marker);
3157 search_regs.start[i] = XINT(marker);
3160 marker = Fcar(list);
3161 if (MARKERP(marker) && XMARKER(marker)->buffer == 0)
3164 CHECK_INT_COERCE_MARKER(marker);
3165 search_regs.end[i] = XINT(marker);
3173 /* If non-zero the match data have been saved in saved_search_regs
3174 during the execution of a sentinel or filter. */
3175 static int search_regs_saved;
3176 static struct re_registers saved_search_regs;
3178 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3179 if asynchronous code (filter or sentinel) is running. */
3180 static void save_search_regs(void)
3182 if (!search_regs_saved) {
3183 saved_search_regs.num_regs = search_regs.num_regs;
3184 saved_search_regs.start = search_regs.start;
3185 saved_search_regs.end = search_regs.end;
3186 search_regs.num_regs = 0;
3187 search_regs.start = 0;
3188 search_regs.end = 0;
3190 search_regs_saved = 1;
3194 /* Called upon exit from filters and sentinels. */
3195 void restore_match_data(void)
3197 if (search_regs_saved) {
3198 if (search_regs.num_regs > 0) {
3199 xfree(search_regs.start);
3200 xfree(search_regs.end);
3202 search_regs.num_regs = saved_search_regs.num_regs;
3203 search_regs.start = saved_search_regs.start;
3204 search_regs.end = saved_search_regs.end;
3206 search_regs_saved = 0;
3210 /* Quote a string to inactivate reg-expr chars */
3212 DEFUN("regexp-quote", Fregexp_quote, 1, 1, 0, /*
3213 Return a regexp string which matches exactly STRING and nothing else.
3217 REGISTER Bufbyte *in, *out, *end;
3218 REGISTER Bufbyte *temp;
3220 CHECK_STRING(string);
3222 temp = (Bufbyte *) alloca(XSTRING_LENGTH(string) * 2);
3224 /* Now copy the data into the new string, inserting escapes. */
3226 in = XSTRING_DATA(string);
3227 end = in + XSTRING_LENGTH(string);
3231 Emchar c = charptr_emchar(in);
3233 if (c == '[' || c == ']'
3234 || c == '*' || c == '.' || c == '\\'
3235 || c == '?' || c == '+' || c == '^' || c == '$')
3237 out += set_charptr_emchar(out, c);
3241 return make_string(temp, out - temp);
3244 DEFUN("set-word-regexp", Fset_word_regexp, 1, 1, 0, /*
3245 Set the regexp to be used to match a word in regular-expression searching.
3246 #### Not yet implemented. Currently does nothing.
3247 #### Do not use this yet. Its calling interface is likely to change.
3254 /************************************************************************/
3255 /* initialization */
3256 /************************************************************************/
3258 void syms_of_search(void)
3261 DEFERROR_STANDARD(Qsearch_failed, Qinvalid_operation);
3262 DEFERROR_STANDARD(Qinvalid_regexp, Qsyntax_error);
3264 #ifdef EF_USE_COMPRE
3265 defsymbol(&Qcompre, "compre");
3266 defsymbol(&Qcomprep, "comprep");
3268 DEFSUBR(Fcompile_regexp);
3269 DEFSUBR(Fdefregexp);
3272 DEFSUBR(Flooking_at);
3273 DEFSUBR(Fposix_looking_at);
3274 DEFSUBR(Fstring_match);
3275 DEFSUBR(Fposix_string_match);
3276 DEFSUBR(Fskip_chars_forward);
3277 DEFSUBR(Fskip_chars_backward);
3278 DEFSUBR(Fskip_syntax_forward);
3279 DEFSUBR(Fskip_syntax_backward);
3280 DEFSUBR(Fsearch_forward);
3281 DEFSUBR(Fsearch_backward);
3282 DEFSUBR(Fword_search_forward);
3283 DEFSUBR(Fword_search_backward);
3284 DEFSUBR(Fre_search_forward);
3285 DEFSUBR(Fre_search_backward);
3286 DEFSUBR(Fposix_search_forward);
3287 DEFSUBR(Fposix_search_backward);
3288 DEFSUBR(Freplace_match);
3289 DEFSUBR(Fmatch_beginning);
3290 DEFSUBR(Fmatch_end);
3291 DEFSUBR(Fmatch_data);
3292 DEFSUBR(Fstore_match_data);
3293 DEFSUBR(Fregexp_quote);
3294 DEFSUBR(Fset_word_regexp);
3297 void reinit_vars_of_search(void)
3301 last_thing_searched = Qnil;
3302 staticpro_nodump(&last_thing_searched);
3304 for (i = 0; i < REGEXP_CACHE_SIZE; ++i) {
3305 searchbufs[i].buf.allocated = 100;
3306 searchbufs[i].buf.buffer = (unsigned char *)xmalloc_atomic(100);
3307 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3308 searchbufs[i].regexp = Qnil;
3309 staticpro_nodump(&searchbufs[i].regexp);
3310 searchbufs[i].next =
3311 (i == REGEXP_CACHE_SIZE - 1 ? 0 : &searchbufs[i + 1]);
3313 searchbuf_head = &searchbufs[0];
3316 void vars_of_search(void)
3318 reinit_vars_of_search();
3320 DEFVAR_LISP("forward-word-regexp", &Vforward_word_regexp /*
3321 *Regular expression to be used in `forward-word'.
3322 #### Not yet implemented.
3324 Vforward_word_regexp = Qnil;
3326 DEFVAR_LISP("backward-word-regexp", &Vbackward_word_regexp /*
3327 *Regular expression to be used in `backward-word'.
3328 #### Not yet implemented.
3330 Vbackward_word_regexp = Qnil;
3333 void complex_vars_of_search(void)
3335 Vskip_chars_range_table = Fmake_range_table();
3336 staticpro(&Vskip_chars_range_table);