1 /* String search routines for SXEmacs.
2 Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of SXEmacs
7 SXEmacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 SXEmacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 /* Synched up with: FSF 19.29, except for region-cache stuff. */
23 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */
25 /* This file has been Mule-ized except for the TRT stuff. */
31 #include "ui/insdel.h"
33 #ifdef REGION_CACHE_NEEDS_WORK
34 #include "region-cache.h"
38 #include <sys/types.h>
45 #define TRANSLATE(table, pos) \
46 (!NILP (table) ? TRT_TABLE_OF (table, (Emchar) pos) : pos)
49 /* Make sure these are ALWAYS powers of 2 */
50 #define REGEXP_CACHE_SIZE 0x80
51 #define REGEXP_CACHE_HASH_MASK (REGEXP_CACHE_SIZE-1)
52 #define REGEXP_FASTMAP_SIZE 0400
53 #define REGEXP_FASTMAP_MASK (REGEXP_FASTMAP_SIZE-1)
55 #define __REGEXP_DEBUG__(args...) fprintf(stderr, "REGEXP " args)
56 #ifndef REGEXP_DEBUG_FLAG
57 #define REGEXP_DEBUG(args...)
59 #define REGEXP_DEBUG(args...) __REGEXP_DEBUG__(args)
61 #define REGEXP_DEBUG_COMPRE(args...) REGEXP_DEBUG("[compre]: " args)
62 #define REGEXP_DEBUG_COMPRE_C(args...) REGEXP_DEBUG("[compre/cache]: " args)
63 #define REGEXP_DEBUG_COMPRE_H(args...) REGEXP_DEBUG("[compre/hash]: " args)
64 #define REGEXP_CRITICAL(args...) __REGEXP_DEBUG__("CRITICAL: " args)
67 /* If the regexp is non-nil, then the buffer contains the compiled form
68 of that regexp, suitable for searching. */
70 struct regexp_cache *next;
72 struct re_pattern_buffer buf;
73 char fastmap[REGEXP_FASTMAP_SIZE];
74 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
78 /* The instances of that struct. */
79 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
81 /* The head of the linked list; points to the most recently used buffer. */
82 static struct regexp_cache *searchbuf_head;
84 /* Every call to re_match, etc., must pass &search_regs as the regs
85 argument unless you can show it is unnecessary (i.e., if re_match
86 is certainly going to be called again before region-around-match
89 Since the registers are now dynamically allocated, we need to make
90 sure not to refer to the Nth register before checking that it has
91 been allocated by checking search_regs.num_regs.
93 The regex code keeps track of whether it has allocated the search
94 buffer using bits in the re_pattern_buffer. This means that whenever
95 you compile a new pattern, it completely forgets whether it has
96 allocated any registers, and will allocate new registers the next
97 time you call a searching or matching function. Therefore, we need
98 to call re_set_registers after compiling a new pattern or after
99 setting the match registers, so that the regex functions will be
100 able to free or re-allocate it properly. */
102 /* Note: things get trickier under Mule because the values returned from
103 the regexp routines are in Bytinds but we need them to be in Bufpos's.
104 We take the easy way out for the moment and just convert them immediately.
105 We could be more clever by not converting them until necessary, but
106 that gets real ugly real fast since the buffer might have changed and
107 the positions might be out of sync or out of range.
109 static struct re_registers search_regs;
111 /* The buffer in which the last search was performed, or
112 Qt if the last search was done in a string;
113 Qnil if no searching has been done yet. */
114 static Lisp_Object last_thing_searched;
116 /* error condition signalled when regexp compile_pattern fails */
118 Lisp_Object Qinvalid_regexp;
120 /* Regular expressions used in forward/backward-word */
121 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
123 /* range table for use with skip_chars. Only needed for Mule. */
124 Lisp_Object Vskip_chars_range_table;
126 static void set_search_regs(struct buffer *buf, Bufpos beg, Charcount len);
127 static void clear_unused_search_regs(struct re_registers *regp, int no_sub);
128 static void save_search_regs(void);
129 static Bufpos simple_search(struct buffer *buf, Bufbyte * base_pat,
130 Bytecount len, Bytind pos, Bytind lim,
131 EMACS_INT n, Lisp_Object trt);
132 static Bufpos boyer_moore(struct buffer *buf, Bufbyte * base_pat,
133 Bytecount len, Bytind pos, Bytind lim,
134 EMACS_INT n, Lisp_Object trt,
135 Lisp_Object inverse_trt, int charset_base);
136 static Bufpos search_buffer(struct buffer *buf, Lisp_Object str,
137 Bufpos bufpos, Bufpos buflim, EMACS_INT n, int RE,
138 Lisp_Object trt, Lisp_Object inverse_trt,
141 static void matcher_overflow(void)
143 error("Stack overflow in regexp matcher");
150 #define COMPRE_T struct re_pattern_buffer
151 #define COMPREP(obj) \
152 (DYNACATP(obj) && EQ(XDYNACAT_TYPE(obj), Qcompre))
153 #define COMPRE_GET(obj) (COMPRE_T*)XDYNACAT(obj)->ptr
154 #define COMPRE_PUT(obj, rec) do { \
155 obj = make_dynacat((void*)rec); \
156 set_dynacat_printer(obj, compre_prfun); \
157 set_dynacat_finaliser(obj, compre_finfun); \
158 XDYNACAT_TYPE(obj) = Qcompre; \
160 #define XCOMPRE_GET(obj) XSTRING(obj)->compre
161 #define XCOMPRE_PUT(obj, b) XSTRING(obj)->compre = b
162 #define XCOMPRE_REM(obj) XSTRING(obj)->compre = Qnil
164 /* idenitifier for hard-cached rexp */
165 Lisp_Object Qcompre, Qcomprep;
166 static Lisp_Object cache_regexp(Lisp_Object, struct re_pattern_buffer*);
167 static COMPRE_T* make_compre(void)
168 #if defined(__GNUC__)
169 __attribute__((unused))
172 static COMPRE_T* clone_compre(COMPRE_T*);
173 static inline void free_compre(COMPRE_T*);
176 /* Compile a regexp and signal a Lisp error if anything goes wrong.
177 PATTERN is the pattern to compile.
178 CP is the place to put the result.
179 TRANSLATE is a translation table for ignoring case, or NULL for none.
180 REGP is the structure that says where to store the "register"
181 values that will result from matching this pattern.
182 If it is 0, we should compile the pattern not to record any
183 subexpression bounds.
184 POSIX is nonzero if we want full backtracking (POSIX style)
185 for this pattern. 0 means backtrack only enough to get a valid match. */
188 compile_pattern_1(struct regexp_cache *cp, Lisp_Object pattern,
189 Lisp_Object translate, struct re_registers *regp, int posix,
196 cp->buf.translate = translate;
198 old = re_set_syntax(RE_SYNTAX_EMACS
199 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
201 re_compile_pattern((char *)XSTRING_DATA(pattern),
202 XSTRING_LENGTH(pattern), &cp->buf);
205 maybe_signal_error(Qinvalid_regexp, list1(build_string(val)),
210 cp->regexp = Fcopy_sequence(pattern);
215 /* extremely simple hash table, no collision management is performed */
216 static struct re_pattern_buffer *
217 try_pattern_from_cache(Lisp_Object pattern,
218 struct re_registers *regp,
219 Lisp_Object translate,
223 struct regexp_cache *cp;
227 pathash = internal_hash(pattern, 0);
228 place = (int)((pathash>>4) & REGEXP_CACHE_HASH_MASK);
229 cp = &(searchbufs[place]);
230 REGEXP_DEBUG_COMPRE_H("trying hash place: %d.\n", place);
232 if (!NILP(Fstring_equal(cp->regexp, pattern))
233 && EQ(cp->buf.translate, translate) && cp->posix == posix) {
235 REGEXP_DEBUG_COMPRE_H("match: %d.\n", place);
238 /* collision or empty slot, dont care just put the
239 * new regexp there */
240 if (!compile_pattern_1(
241 cp, pattern, translate, regp, posix, errb))
243 REGEXP_DEBUG_COMPRE_H("collision: %d.\n", place);
248 #else /* !EF_USE_COMPRE */
249 static struct re_pattern_buffer *
250 try_pattern_from_cache(Lisp_Object pattern,
251 struct re_registers *regp,
252 Lisp_Object translate,
256 struct regexp_cache *cp, **cpp;
258 for (cpp = &searchbuf_head;; cpp = &cp->next) {
260 if (!NILP(Fstring_equal(cp->regexp, pattern))
261 && EQ(cp->buf.translate, translate)
262 && cp->posix == posix)
265 /* If we're at the end of the cache, compile
266 * into the last cell. */
268 if (!compile_pattern_1(
269 cp, pattern, translate, regp, posix, errb))
275 /* When we get here, cp (aka *cpp) contains the compiled pattern,
276 either because we found it in the cache or because we just compiled it.
277 Move it to the front of the queue to mark it as most recently used. */
279 cp->next = searchbuf_head;
284 #endif /* EF_USE_COMPRE */
286 /* Compile a regexp if necessary, but first check to see if there's one in
288 PATTERN is the pattern to compile.
289 TRANSLATE is a translation table for ignoring case, or NULL for none.
290 REGP is the structure that says where to store the "register"
291 values that will result from matching this pattern.
292 If it is 0, we should compile the pattern not to record any
293 subexpression bounds.
294 POSIX is nonzero if we want full backtracking (POSIX style)
295 for this pattern. 0 means backtrack only enough to get a valid match. */
297 struct re_pattern_buffer *
298 compile_pattern(Lisp_Object pattern,
299 struct re_registers *regp,
300 Lisp_Object translate,
304 struct re_pattern_buffer *result = NULL;
307 Lisp_Object rc = XCOMPRE_GET(pattern);
309 if (!NILP(rc) && COMPREP(rc) &&
310 (result = COMPRE_GET(rc)) &&
311 result->re_ngroups >= 0 && result->re_ngroups < 256) {
312 REGEXP_DEBUG_COMPRE_C("using cache: 0x%x.\n",
313 (unsigned int)result);
317 result = try_pattern_from_cache(
318 pattern, regp, translate, posix, errb);
320 cache_regexp(pattern, result);
325 /* Advise the searching functions about the space we have allocated
326 for register data. */
328 re_set_registers(result, regp, regp->num_regs,
329 regp->start, regp->end);
336 compre_prfun(Lisp_Object obj, Lisp_Object pcfun, int escflag)
338 write_fmt_str(pcfun, "#<compiled regexp %lx",
339 (long unsigned int)((COMPRE_GET(obj))->buffer));
344 compre_finfun(Lisp_Object obj, int unused)
346 REGEXP_DEBUG_COMPRE_C("0x%lx@0x%lx will pass away\n",
347 (long unsigned int)COMPRE_GET(obj),
348 (long unsigned int)obj);
349 free_compre(COMPRE_GET(obj));
356 COMPRE_T *result = xnew_and_zero(COMPRE_T);
358 result->fastmap = xmalloc_atomic(REGEXP_FASTMAP_SIZE);
365 clone_compre(COMPRE_T *src)
367 COMPRE_T *result = xnew_and_zero(COMPRE_T);
369 /* alloc and clone fastmap */
370 result->fastmap = (char*)xmalloc_atomic(REGEXP_FASTMAP_SIZE);
371 memcpy(result->fastmap, src->fastmap, REGEXP_FASTMAP_SIZE);
373 /* alloc and clone buffer */
374 result->buffer = (unsigned char *)xmalloc_atomic(src->allocated);
375 memcpy(result->buffer, src->buffer, src->allocated);
376 result->allocated = src->allocated;
377 result->used = src->used;
379 result->syntax = src->syntax;
380 result->translate = src->translate;
381 result->re_nsub = src->re_nsub;
382 result->re_ngroups = src->re_ngroups;
384 result->can_be_null = src->can_be_null;
385 result->regs_allocated = src->regs_allocated;
386 result->fastmap_accurate = src->fastmap_accurate;
388 result->no_sub = src->no_sub;
389 result->not_bol = src->not_bol;
390 result->not_eol = src->not_eol;
391 result->newline_anchor = src->newline_anchor;
393 /* alloc and clone ext_to_int_register */
394 result->external_to_internal_register =
395 (int*)xmalloc_atomic(
396 sizeof(int) * src->external_to_internal_register_size);
397 memcpy(result->external_to_internal_register,
398 src->external_to_internal_register,
399 sizeof(int)*src->external_to_internal_register_size);
400 result->external_to_internal_register_size =
401 src->external_to_internal_register_size;
407 free_compre(COMPRE_T *buf)
417 if (buf->external_to_internal_register) {
418 xfree(buf->external_to_internal_register);
419 buf->external_to_internal_register = NULL;
425 cache_regexp(Lisp_Object regexp, COMPRE_T *buf)
433 resbuf = clone_compre(buf);
434 COMPRE_PUT(rc, resbuf);
435 XCOMPRE_PUT(regexp, rc);
437 REGEXP_DEBUG_COMPRE_C("caching 0x%08x into 0x%08x\n",
438 (unsigned int)resbuf, (unsigned int)rc);
442 DEFUN("compile-regexp", Fcompile_regexp, 1, 1, 0, /*
443 Forcibly compile REGEXP and store the result in object-plist.
447 CHECK_STRING(regexp);
450 compile_pattern(regexp, &search_regs, Qnil, 0, ERROR_ME);
455 DEFUN("defregexp", Fdefregexp, 2, UNEVALLED, 0, /*
456 \(defregexp SYMBOL REGEXP DOCSTRING\)
457 Like `defconst' but for forcing compiled regexps.
459 The same restrictions that apply to `defconst' apply here in regard
460 to user variables. You shouldn't use this for regular expressions
461 that a user might want to customise. Instead, use `defcustom' with
466 /* This function can GC */
467 Lisp_Object sym = XCAR(args);
468 Lisp_Object pat = Feval(XCAR(args = XCDR(args)));
474 pat = Fcompile_regexp(pat);
475 Fset_default(sym, pat);
478 if (!NILP(args = XCDR(args))) {
479 Lisp_Object doc = XCAR(args);
480 Fput(sym, Qvariable_documentation, doc);
481 if (!NILP(args = XCDR(args)))
482 error("too many arguments");
485 if (!NILP(Vfile_domain))
486 Fput(sym, Qvariable_domain, Vfile_domain);
489 LOADHIST_ATTACH(sym);
492 #endif /* EF_USE_COMPRE */
494 /* Error condition used for failing searches */
495 Lisp_Object Qsearch_failed;
497 static Lisp_Object signal_failure(Lisp_Object arg)
500 Fsignal(Qsearch_failed, list1(arg));
501 return Qnil; /* Not reached. */
504 /* Convert the search registers from Bytinds to Bufpos's. Needs to be
505 done after each regexp match that uses the search regs.
507 We could get a potential speedup by not converting the search registers
508 until it's really necessary, e.g. when match-data or replace-match is
509 called. However, this complexifies the code a lot (e.g. the buffer
510 could have changed and the Bytinds stored might be invalid) and is
511 probably not a great time-saver. */
513 static void fixup_search_regs_for_buffer(struct buffer *buf)
516 int num_regs = search_regs.num_regs;
518 for (i = 0; i < num_regs; i++) {
519 if (search_regs.start[i] >= 0)
520 search_regs.start[i] =
521 bytind_to_bufpos(buf, search_regs.start[i]);
522 if (search_regs.end[i] >= 0)
524 bytind_to_bufpos(buf, search_regs.end[i]);
528 /* Similar but for strings. */
529 static void fixup_search_regs_for_string(Lisp_Object string)
532 int num_regs = search_regs.num_regs;
534 /* #### bytecount_to_charcount() is not that efficient. This function
535 could be faster if it did its own conversion (using INC_CHARPTR()
536 and such), because the register ends are likely to be somewhat ordered.
537 (Even if not, you could sort them.)
539 Think about this if this function is a time hog, which it's probably
541 for (i = 0; i < num_regs; i++) {
542 if (search_regs.start[i] > 0) {
543 search_regs.start[i] =
544 bytecount_to_charcount(XSTRING_DATA(string),
545 search_regs.start[i]);
547 if (search_regs.end[i] > 0) {
549 bytecount_to_charcount(XSTRING_DATA(string),
556 looking_at_1(Lisp_Object string, struct buffer *buf, int posix)
558 /* This function has been Mule-ized, except for the trt table handling. */
563 struct re_pattern_buffer *bufp;
565 if (running_asynch_code)
568 CHECK_STRING(string);
569 bufp = compile_pattern(string, &search_regs,
570 (!NILP(buf->case_fold_search)
571 ? XCASE_TABLE_DOWNCASE(buf->case_table) : Qnil),
576 /* Get pointers and sizes of the two strings
577 that make up the visible portion of the buffer. */
579 p1 = BI_BUF_BEGV(buf);
580 p2 = BI_BUF_CEILING_OF(buf, p1);
582 s2 = BI_BUF_ZV(buf) - p2;
584 regex_match_object = Qnil;
585 regex_emacs_buffer = buf;
586 i = re_match_2(bufp, (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
587 s1, (char *)BI_BUF_BYTE_ADDRESS(buf, p2), s2,
588 BI_BUF_PT(buf) - BI_BUF_BEGV(buf), &search_regs,
589 BI_BUF_ZV(buf) - BI_BUF_BEGV(buf));
594 val = (0 <= i ? Qt : Qnil);
598 int num_regs = search_regs.num_regs;
599 for (i = 0; i < num_regs; i++)
600 if (search_regs.start[i] >= 0) {
601 search_regs.start[i] += BI_BUF_BEGV(buf);
602 search_regs.end[i] += BI_BUF_BEGV(buf);
605 XSETBUFFER(last_thing_searched, buf);
606 fixup_search_regs_for_buffer(buf);
610 DEFUN("looking-at", Flooking_at, 1, 2, 0, /*
611 Return t if text after point matches regular expression REGEXP.
612 This function modifies the match data that `match-beginning',
613 `match-end' and `match-data' access; save and restore the match
614 data if you want to preserve them.
616 Optional argument BUFFER defaults to the current buffer.
620 return looking_at_1(regexp, decode_buffer(buffer, 0), 0);
623 DEFUN("posix-looking-at", Fposix_looking_at, 1, 2, 0, /*
624 Return t if text after point matches regular expression REGEXP.
625 Find the longest match, in accord with Posix regular expression rules.
626 This function modifies the match data that `match-beginning',
627 `match-end' and `match-data' access; save and restore the match
628 data if you want to preserve them.
630 Optional argument BUFFER defaults to the current buffer.
634 return looking_at_1(regexp, decode_buffer(buffer, 0), 1);
638 string_match_1(Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
639 struct buffer *buf, int posix)
641 /* This function has been Mule-ized, except for the trt table handling. */
644 struct re_pattern_buffer *bufp;
646 if (running_asynch_code)
649 CHECK_STRING(regexp);
650 CHECK_STRING(string);
655 Charcount len = XSTRING_CHAR_LENGTH(string);
659 if (s < 0 && -s <= len)
661 else if (0 > s || s > len)
662 args_out_of_range(string, start);
665 bufp = compile_pattern(regexp, &search_regs,
666 (!NILP(buf->case_fold_search)
667 ? XCASE_TABLE_DOWNCASE(buf->case_table) : Qnil),
671 Bytecount bis = charcount_to_bytecount(XSTRING_DATA(string), s);
672 regex_match_object = string;
673 regex_emacs_buffer = buf;
674 val = re_search(bufp, (char *)XSTRING_DATA(string),
675 XSTRING_LENGTH(string), bis,
676 XSTRING_LENGTH(string) - bis, &search_regs);
682 last_thing_searched = Qt;
683 fixup_search_regs_for_string(string);
684 return make_int(bytecount_to_charcount(XSTRING_DATA(string), val));
687 DEFUN("string-match", Fstring_match, 2, 4, 0, /*
688 Return index of start of first match for REGEXP in STRING, or nil.
689 If third arg START is non-nil, start search at that index in STRING.
690 For index of first char beyond the match, do (match-end 0).
691 `match-end' and `match-beginning' also give indices of substrings
692 matched by parenthesis constructs in the pattern.
694 Optional arg BUFFER controls how case folding is done (according to
695 the value of `case-fold-search' in that buffer and that buffer's case
696 tables) and defaults to the current buffer.
698 (regexp, string, start, buffer))
700 return string_match_1(regexp, string, start, decode_buffer(buffer, 0),
704 DEFUN("posix-string-match", Fposix_string_match, 2, 4, 0, /*
705 Return index of start of first match for REGEXP in STRING, or nil.
706 Find the longest match, in accord with Posix regular expression rules.
707 If third arg START is non-nil, start search at that index in STRING.
708 For index of first char beyond the match, do (match-end 0).
709 `match-end' and `match-beginning' also give indices of substrings
710 matched by parenthesis constructs in the pattern.
712 Optional arg BUFFER controls how case folding is done (according to
713 the value of `case-fold-search' in that buffer and that buffer's case
714 tables) and defaults to the current buffer.
716 (regexp, string, start, buffer))
718 return string_match_1(regexp, string, start, decode_buffer(buffer, 0),
722 /* Match REGEXP against STRING, searching all of STRING,
723 and return the index of the match, or negative on failure.
724 This does not clobber the match data. */
727 fast_string_match(Lisp_Object regexp, const Bufbyte * nonreloc,
728 Lisp_Object reloc, Bytecount offset,
729 Bytecount length, int case_fold_search,
730 Error_behavior errb, int no_quit)
732 /* This function has been Mule-ized, except for the trt table handling. */
734 const Bufbyte *newnonreloc = (const Bufbyte*)nonreloc;
735 struct re_pattern_buffer *bufp;
737 bufp = compile_pattern(regexp, 0,
739 ? XCASE_TABLE_DOWNCASE(current_buffer->
743 return -1; /* will only do this when errb != ERROR_ME */
747 no_quit_in_re_search = 1;
749 fixup_internal_substring(nonreloc, reloc, offset, &length);
755 newnonreloc = XSTRING_DATA(reloc);
757 /* QUIT could relocate RELOC. Therefore we must
758 alloca() and copy. No way around this except some
759 serious rewriting of re_search(). */
760 /* yeah, let's rewrite this bugger, the warning
761 hereafter is inevitable too */
764 fixup_internal_substring should
765 have updated length, if it didn't
766 return with failure...
769 Bufbyte *copy = alloca(length);
771 (const void*)XSTRING_DATA(reloc),
777 /* #### evil current-buffer dependency */
778 regex_match_object = reloc;
779 regex_emacs_buffer = current_buffer;
780 val = re_search(bufp, (const char*)newnonreloc + offset, length, 0,
783 no_quit_in_re_search = 0;
787 Bytecount fast_lisp_string_match(Lisp_Object regex, Lisp_Object string)
789 return fast_string_match(regex, 0, string, 0, -1, 0, ERROR_ME, 0);
792 #ifdef REGION_CACHE_NEEDS_WORK
793 /* The newline cache: remembering which sections of text have no newlines. */
795 /* If the user has requested newline caching, make sure it's on.
796 Otherwise, make sure it's off.
797 This is our cheezy way of associating an action with the change of
798 state of a buffer-local variable. */
799 static void newline_cache_on_off(struct buffer *buf)
801 if (NILP(buf->cache_long_line_scans)) {
802 /* It should be off. */
803 if (buf->newline_cache) {
804 free_region_cache(buf->newline_cache);
805 buf->newline_cache = 0;
808 /* It should be on. */
809 if (buf->newline_cache == 0)
810 buf->newline_cache = new_region_cache();
815 /* Search in BUF for COUNT instances of the character TARGET between
818 If COUNT is positive, search forwards; END must be >= START.
819 If COUNT is negative, search backwards for the -COUNTth instance;
820 END must be <= START.
821 If COUNT is zero, do anything you please; run rogue, for all I care.
823 If END is zero, use BEGV or ZV instead, as appropriate for the
824 direction indicated by COUNT.
826 If we find COUNT instances, set *SHORTAGE to zero, and return the
827 position after the COUNTth match. Note that for reverse motion
828 this is not the same as the usual convention for Emacs motion commands.
830 If we don't find COUNT instances before reaching END, set *SHORTAGE
831 to the number of TARGETs left unfound, and return END.
833 If ALLOW_QUIT is non-zero, call QUIT periodically. */
836 bi_scan_buffer(struct buffer *buf, Emchar target, Bytind st, Bytind en,
837 EMACS_INT count, EMACS_INT * shortage, int allow_quit)
839 /* This function has been Mule-ized. */
840 Bytind lim = en > 0 ? en :
841 ((count > 0) ? BI_BUF_ZV(buf) : BI_BUF_BEGV(buf));
843 /* #### newline cache stuff in this function not yet ported */
852 /* Due to the Mule representation of characters in a buffer,
853 we can simply search for characters in the range 0 - 127
854 directly. For other characters, we do it the "hard" way.
855 Note that this way works for all characters but the other
857 if (target >= 0200) {
858 while (st < lim && count > 0) {
859 if (BI_BUF_FETCH_CHAR(buf, st) == target)
866 while (st < lim && count > 0) {
870 _ceil_ = BI_BUF_CEILING_OF(buf, st);
871 _ceil_ = min(lim, _ceil_);
874 memchr(BI_BUF_BYTE_ADDRESS(buf, st),
875 (int)target, _ceil_ - st);
878 st = BI_BUF_PTR_BYTE_POS(buf,
892 if (target >= 0200) {
893 while (st > lim && count < 0) {
895 if (BI_BUF_FETCH_CHAR(buf, st) == target)
901 while (st > lim && count < 0) {
906 _floor_ = BI_BUF_FLOOR_OF(buf, st);
907 _floor_ = max(lim, _floor_);
908 /* No memrchr() ... */
909 bufptr = BI_BUF_BYTE_ADDRESS_BEFORE(buf, st);
910 floorptr = BI_BUF_BYTE_ADDRESS(buf, _floor_);
911 while (bufptr >= floorptr) {
913 /* At this point, both ST and BUFPTR
914 refer to the same character. When
915 the loop terminates, ST will always
916 point to the last character we
918 if (*(unsigned char *)bufptr ==
919 (unsigned char)target) {
935 /* We found the character we were looking for; we have to return
936 the position *after* it due to the strange way that the return
945 scan_buffer(struct buffer * buf, Emchar target, Bufpos start, Bufpos end,
946 EMACS_INT count, EMACS_INT * shortage, int allow_quit)
949 Bytind bi_start, bi_end;
951 bi_start = bufpos_to_bytind(buf, start);
953 bi_end = bufpos_to_bytind(buf, end);
956 bi_retval = bi_scan_buffer(buf, target, bi_start, bi_end, count,
957 shortage, allow_quit);
958 return bytind_to_bufpos(buf, bi_retval);
961 Bytind bi_find_next_newline_no_quit(struct buffer * buf, Bytind from, int count)
963 return bi_scan_buffer(buf, '\n', from, 0, count, 0, 0);
966 Bufpos find_next_newline_no_quit(struct buffer * buf, Bufpos from, int count)
968 return scan_buffer(buf, '\n', from, 0, count, 0, 0);
971 Bufpos find_next_newline(struct buffer * buf, Bufpos from, int count)
973 return scan_buffer(buf, '\n', from, 0, count, 0, 1);
977 bi_find_next_emchar_in_string(Lisp_String * str, Emchar target, Bytind st,
980 /* This function has been Mule-ized. */
981 Bytind lim = string_length(str) - 1;
982 Bufbyte *s = string_data(str);
987 /* Due to the Mule representation of characters in a buffer,
988 we can simply search for characters in the range 0 - 127
989 directly. For other characters, we do it the "hard" way.
990 Note that this way works for all characters but the other
992 if (target >= 0200) {
993 while (st < lim && count > 0) {
994 if (string_char(str, st) == target)
996 INC_CHARBYTIND(s, st);
1001 while (st < lim && count > 0) {
1003 (Bufbyte *) memchr(charptr_n_addr(s, st),
1004 (int)target, lim - st);
1007 st = (Bytind) (bufptr - s) + 1;
1015 /* Like find_next_newline, but returns position before the newline,
1016 not after, and only search up to TO. This isn't just
1017 find_next_newline (...)-1, because you might hit TO. */
1019 find_before_next_newline(struct buffer * buf, Bufpos from, Bufpos to, int count)
1022 Bufpos pos = scan_buffer(buf, '\n', from, to, count, &shortage, 1);
1030 /* This function synched with FSF 21.1 */
1032 skip_chars(struct buffer *buf, int forwardp, int syntaxp,
1033 Lisp_Object string, Lisp_Object lim)
1035 /* This function has been Mule-ized. */
1036 REGISTER Bufbyte *p, *pend;
1038 /* We store the first 256 chars in an array here and the rest in
1040 unsigned char fastmap[REGEXP_FASTMAP_SIZE];
1044 Lisp_Char_Table *syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
1049 limit = forwardp ? BUF_ZV(buf) : BUF_BEGV(buf);
1051 CHECK_INT_COERCE_MARKER(lim);
1054 /* In any case, don't allow scan outside bounds of buffer. */
1055 if (limit > BUF_ZV(buf))
1056 limit = BUF_ZV(buf);
1057 if (limit < BUF_BEGV(buf))
1058 limit = BUF_BEGV(buf);
1061 CHECK_STRING(string);
1062 p = XSTRING_DATA(string);
1063 pend = p + XSTRING_LENGTH(string);
1064 memset(fastmap, 0, sizeof(fastmap));
1066 Fclear_range_table(Vskip_chars_range_table);
1068 if (p != pend && *p == '^') {
1073 /* Find the characters specified and set their elements of fastmap.
1074 If syntaxp, each character counts as itself.
1075 Otherwise, handle backslashes and ranges specially */
1078 c = charptr_emchar(p);
1081 if (c < REGEXP_FASTMAP_SIZE
1082 && syntax_spec_code[c] < (unsigned char)Smax)
1085 signal_simple_error("Invalid syntax designator",
1091 c = charptr_emchar(p);
1094 if (p != pend && *p == '-') {
1097 /* Skip over the dash. */
1101 cend = charptr_emchar(p);
1102 while (c <= cend && c < REGEXP_FASTMAP_SIZE) {
1107 Fput_range_table(make_int(c),
1109 Vskip_chars_range_table);
1112 if (c < REGEXP_FASTMAP_SIZE)
1115 Fput_range_table(make_int(c),
1117 Vskip_chars_range_table);
1122 /* #### Not in FSF 21.1 */
1123 if (syntaxp && fastmap['-'] != 0)
1126 /* If ^ was the first character, complement the fastmap.
1127 We don't complement the range table, however; we just use negate
1128 in the comparisons below. */
1131 for (i = 0; i < (int)(sizeof fastmap); i++)
1135 Bufpos start_point = BUF_PT(buf);
1136 Bufpos pos = start_point;
1137 Bytind pos_byte = BI_BUF_PT(buf);
1140 SETUP_SYNTAX_CACHE_FOR_BUFFER(buf, pos,
1142 /* All syntax designators are normal chars so nothing strange
1146 while (fastmap[(unsigned char)
1148 [(int)SYNTAX_FROM_CACHE
1150 BI_BUF_FETCH_CHAR(buf,
1154 INC_BYTIND(buf, pos_byte);
1157 UPDATE_SYNTAX_CACHE_FORWARD
1161 while (pos > limit) {
1162 Bufpos savepos = pos_byte;
1164 DEC_BYTIND(buf, pos_byte);
1165 UPDATE_SYNTAX_CACHE_BACKWARD(pos);
1166 if (!fastmap[(unsigned char)
1168 [(int)SYNTAX_FROM_CACHE
1170 BI_BUF_FETCH_CHAR(buf,
1181 while (pos < limit) {
1183 BI_BUF_FETCH_CHAR(buf, pos_byte);
1185 REGEXP_FASTMAP_SIZE) ? fastmap[ch]
1189 Vskip_chars_range_table, Qnil))
1192 INC_BYTIND(buf, pos_byte);
1197 while (pos > limit) {
1198 Bufpos prev_pos_byte = pos_byte;
1201 DEC_BYTIND(buf, prev_pos_byte);
1202 ch = BI_BUF_FETCH_CHAR(buf,
1205 REGEXP_FASTMAP_SIZE) ? fastmap[ch]
1209 Vskip_chars_range_table, Qnil))
1212 pos_byte = prev_pos_byte;
1219 BOTH_BUF_SET_PT(buf, pos, pos_byte);
1220 return make_int(BUF_PT(buf) - start_point);
1224 DEFUN("skip-chars-forward", Fskip_chars_forward, 1, 3, 0, /*
1225 Move point forward, stopping before a char not in STRING, or at pos LIMIT.
1226 STRING is like the inside of a `[...]' in a regular expression
1227 except that `]' is never special and `\\' quotes `^', `-' or `\\'.
1228 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1229 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1230 Returns the distance traveled, either zero or positive.
1232 Optional argument BUFFER defaults to the current buffer.
1234 (string, limit, buffer))
1236 return skip_chars(decode_buffer(buffer, 0), 1, 0, string, limit);
1239 DEFUN("skip-chars-backward", Fskip_chars_backward, 1, 3, 0, /*
1240 Move point backward, stopping after a char not in STRING, or at pos LIMIT.
1241 See `skip-chars-forward' for details.
1242 Returns the distance traveled, either zero or negative.
1244 Optional argument BUFFER defaults to the current buffer.
1246 (string, limit, buffer))
1248 return skip_chars(decode_buffer(buffer, 0), 0, 0, string, limit);
1251 DEFUN("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0, /*
1252 Move point forward across chars in specified syntax classes.
1253 SYNTAX is a string of syntax code characters.
1254 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT.
1255 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1256 This function returns the distance traveled, either zero or positive.
1258 Optional argument BUFFER defaults to the current buffer.
1260 (syntax, limit, buffer))
1262 return skip_chars(decode_buffer(buffer, 0), 1, 1, syntax, limit);
1265 DEFUN("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0, /*
1266 Move point backward across chars in specified syntax classes.
1267 SYNTAX is a string of syntax code characters.
1268 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT.
1269 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1270 This function returns the distance traveled, either zero or negative.
1272 Optional argument BUFFER defaults to the current buffer.
1274 (syntax, limit, buffer))
1276 return skip_chars(decode_buffer(buffer, 0), 0, 1, syntax, limit);
1279 /* Subroutines of Lisp buffer search functions. */
1282 search_command(Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
1283 Lisp_Object count, Lisp_Object buffer, int direction,
1286 /* This function has been Mule-ized, except for the trt table handling. */
1289 EMACS_INT n = direction;
1297 buf = decode_buffer(buffer, 0);
1298 CHECK_STRING(string);
1300 lim = n > 0 ? BUF_ZV(buf) : BUF_BEGV(buf);
1302 CHECK_INT_COERCE_MARKER(limit);
1304 if (n > 0 ? lim < BUF_PT(buf) : lim > BUF_PT(buf))
1305 error("Invalid search limit (wrong side of point)");
1306 if (lim > BUF_ZV(buf))
1308 if (lim < BUF_BEGV(buf))
1309 lim = BUF_BEGV(buf);
1312 np = search_buffer(buf, string, BUF_PT(buf), lim, n, RE,
1313 (!NILP(buf->case_fold_search)
1314 ? XCASE_TABLE_CANON(buf->case_table)
1315 : Qnil), (!NILP(buf->case_fold_search)
1316 ? XCASE_TABLE_EQV(buf->case_table)
1321 return signal_failure(string);
1322 if (!EQ(noerror, Qt)) {
1323 if (lim < BUF_BEGV(buf) || lim > BUF_ZV(buf))
1325 BUF_SET_PT(buf, lim);
1327 #if 0 /* This would be clean, but maybe programs depend on
1328 a value of nil here. */
1335 if (np < BUF_BEGV(buf) || np > BUF_ZV(buf))
1338 BUF_SET_PT(buf, np);
1340 return make_int(np);
1343 static int trivial_regexp_p(Lisp_Object regexp)
1345 /* This function has been Mule-ized. */
1346 Bytecount len = XSTRING_LENGTH(regexp);
1347 Bufbyte *s = XSTRING_DATA(regexp);
1348 while (--len >= 0) {
1350 /* ']' doesn't appear here because it's only special after ] */
1380 /* 97/2/25 jhod Added for category matches */
1404 /* Search for the n'th occurrence of STRING in BUF,
1405 starting at position BUFPOS and stopping at position BUFLIM,
1406 treating PAT as a literal string if RE is false or as
1407 a regular expression if RE is true.
1409 If N is positive, searching is forward and BUFLIM must be greater
1411 If N is negative, searching is backward and BUFLIM must be less
1414 Returns -x if only N-x occurrences found (x > 0),
1415 or else the position at the beginning of the Nth occurrence
1416 (if searching backward) or the end (if searching forward).
1418 POSIX is nonzero if we want full backtracking (POSIX style)
1419 for this pattern. 0 means backtrack only enough to get a valid match. */
1421 search_buffer(struct buffer *buf, Lisp_Object string, Bufpos bufpos,
1422 Bufpos buflim, EMACS_INT n, int RE, Lisp_Object trt,
1423 Lisp_Object inverse_trt, int posix)
1425 /* This function has been Mule-ized, except for the trt table handling. */
1426 Bytecount len = XSTRING_LENGTH(string);
1427 Bufbyte *base_pat = XSTRING_DATA(string);
1428 REGISTER EMACS_INT i, j;
1433 if (running_asynch_code)
1436 /* Null string is found at starting position. */
1438 set_search_regs(buf, bufpos, 0);
1439 clear_unused_search_regs(&search_regs, 0);
1443 /* Searching 0 times means noop---don't move, don't touch registers. */
1447 pos = bufpos_to_bytind(buf, bufpos);
1448 lim = bufpos_to_bytind(buf, buflim);
1449 if (RE && !trivial_regexp_p(string)) {
1450 struct re_pattern_buffer *bufp;
1452 bufp = compile_pattern(string, &search_regs, trt, posix,
1455 /* Get pointers and sizes of the two strings
1456 that make up the visible portion of the buffer. */
1458 p1 = BI_BUF_BEGV(buf);
1459 p2 = BI_BUF_CEILING_OF(buf, p1);
1461 s2 = BI_BUF_ZV(buf) - p2;
1462 regex_match_object = Qnil;
1467 regex_emacs_buffer = buf;
1468 val = re_search_2(bufp,
1469 (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
1470 s1, (char *)BI_BUF_BYTE_ADDRESS(buf,
1472 s2, pos - BI_BUF_BEGV(buf), lim - pos,
1473 &search_regs, pos - BI_BUF_BEGV(buf));
1479 int num_regs = search_regs.num_regs;
1480 j = BI_BUF_BEGV(buf);
1481 for (i = 0; i < num_regs; i++)
1482 if (search_regs.start[i] >= 0) {
1483 search_regs.start[i] += j;
1484 search_regs.end[i] += j;
1486 /* re_match (called from re_search et al) does this for us */
1487 /* clear_unused_search_regs (search_regs, bufp->no_sub); */
1488 XSETBUFFER(last_thing_searched, buf);
1489 /* Set pos to the new position. */
1490 pos = search_regs.start[0];
1491 fixup_search_regs_for_buffer(buf);
1492 /* And bufpos too. */
1493 bufpos = search_regs.start[0];
1502 regex_emacs_buffer = buf;
1503 val = re_search_2(bufp,
1504 (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
1505 s1, (char *)BI_BUF_BYTE_ADDRESS(buf,
1507 s2, pos - BI_BUF_BEGV(buf), lim - pos,
1508 &search_regs, lim - BI_BUF_BEGV(buf));
1513 int num_regs = search_regs.num_regs;
1514 j = BI_BUF_BEGV(buf);
1515 for (i = 0; i < num_regs; i++)
1516 if (search_regs.start[i] >= 0) {
1517 search_regs.start[i] += j;
1518 search_regs.end[i] += j;
1520 /* re_match (called from re_search et al) does this for us */
1521 /* clear_unused_search_regs (search_regs, bufp->no_sub); */
1522 XSETBUFFER(last_thing_searched, buf);
1523 /* Set pos to the new position. */
1524 pos = search_regs.end[0];
1525 fixup_search_regs_for_buffer(buf);
1526 /* And bufpos too. */
1527 bufpos = search_regs.end[0];
1534 } else { /* non-RE case */
1536 int charset_base = -1;
1537 int boyer_moore_ok = 1;
1539 Bufbyte *patbuf = alloca_array(Bufbyte, len * MAX_EMCHAR_LEN);
1543 Bufbyte tmp_str[MAX_EMCHAR_LEN];
1544 Emchar c, translated, inverse;
1545 Bytecount orig_bytelen, new_bytelen, inv_bytelen;
1547 /* If we got here and the RE flag is set, it's because
1548 we're dealing with a regexp known to be trivial, so the
1549 backslash just quotes the next character. */
1550 if (RE && *base_pat == '\\') {
1554 c = charptr_emchar(base_pat);
1555 translated = TRANSLATE(trt, c);
1556 inverse = TRANSLATE(inverse_trt, c);
1558 orig_bytelen = charcount_to_bytecount(base_pat, 1);
1559 inv_bytelen = set_charptr_emchar(tmp_str, inverse);
1560 new_bytelen = set_charptr_emchar(tmp_str, translated);
1562 if (new_bytelen != orig_bytelen
1563 || inv_bytelen != orig_bytelen)
1565 if (translated != c || inverse != c) {
1566 /* Keep track of which character set row
1567 contains the characters that need translation. */
1568 int charset_base_code = c & ~CHAR_FIELD3_MASK;
1569 if (charset_base == -1)
1570 charset_base = charset_base_code;
1571 else if (charset_base != charset_base_code)
1572 /* If two different rows appear, needing translation,
1573 then we cannot use boyer_moore search. */
1576 memcpy(pat, tmp_str, new_bytelen);
1578 base_pat += orig_bytelen;
1579 len -= orig_bytelen;
1581 #else /* not MULE */
1582 while (--len >= 0) {
1583 /* If we got here and the RE flag is set, it's because
1584 we're dealing with a regexp known to be trivial, so the
1585 backslash just quotes the next character. */
1586 if (RE && *base_pat == '\\') {
1590 *pat++ = TRANSLATE(trt, *base_pat++);
1594 pat = base_pat = patbuf;
1596 return boyer_moore(buf, base_pat, len, pos, lim, n,
1597 trt, inverse_trt, charset_base);
1599 return simple_search(buf, base_pat, len, pos, lim, n,
1604 /* Do a simple string search N times for the string PAT,
1605 whose length is LEN/LEN_BYTE,
1606 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1607 TRT is the translation table.
1609 Return the character position where the match is found.
1610 Otherwise, if M matches remained to be found, return -M.
1612 This kind of search works regardless of what is in PAT and
1613 regardless of what is in TRT. It is used in cases where
1614 boyer_moore cannot work. */
1617 simple_search(struct buffer *buf, Bufbyte * base_pat, Bytecount len_byte,
1618 Bytind idx, Bytind lim, EMACS_INT n, Lisp_Object trt)
1620 int forward = n > 0;
1621 Bytecount buf_len = 0; /* Shut up compiler. */
1626 Bytecount this_len = len_byte;
1627 Bytind this_idx = idx;
1628 const Bufbyte *p = base_pat;
1632 while (this_len > 0) {
1633 Emchar pat_ch, buf_ch;
1636 pat_ch = charptr_emchar(p);
1638 BI_BUF_FETCH_CHAR(buf, this_idx);
1640 buf_ch = TRANSLATE(trt, buf_ch);
1642 if (buf_ch != pat_ch)
1645 pat_len = charcount_to_bytecount(p, 1);
1647 this_len -= pat_len;
1648 INC_BYTIND(buf, this_idx);
1650 if (this_len == 0) {
1651 buf_len = this_idx - idx;
1655 INC_BYTIND(buf, idx);
1662 Bytecount this_len = len_byte;
1663 Bytind this_idx = idx;
1664 const Bufbyte *p = base_pat + len_byte;
1670 while (this_len > 0) {
1671 Emchar pat_ch, buf_ch;
1674 DEC_BYTIND(buf, this_idx);
1675 pat_ch = charptr_emchar(p);
1677 BI_BUF_FETCH_CHAR(buf, this_idx);
1679 buf_ch = TRANSLATE(trt, buf_ch);
1681 if (buf_ch != pat_ch)
1685 charcount_to_bytecount(p, 1);
1687 if (this_len == 0) {
1688 buf_len = idx - this_idx;
1692 DEC_BYTIND(buf, idx);
1699 Bufpos beg, end, retval;
1701 beg = bytind_to_bufpos(buf, idx - buf_len);
1702 retval = end = bytind_to_bufpos(buf, idx);
1704 retval = beg = bytind_to_bufpos(buf, idx);
1705 end = bytind_to_bufpos(buf, idx + buf_len);
1707 set_search_regs(buf, beg, end - beg);
1708 clear_unused_search_regs(&search_regs, 0);
1718 /* Do Boyer-Moore search N times for the string PAT,
1719 whose length is LEN/LEN_BYTE,
1720 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1721 DIRECTION says which direction we search in.
1722 TRT and INVERSE_TRT are translation tables.
1724 This kind of search works if all the characters in PAT that have
1725 nontrivial translation are the same aside from the last byte. This
1726 makes it possible to translate just the last byte of a character,
1727 and do so after just a simple test of the context.
1729 If that criterion is not satisfied, do not call this function. */
1732 boyer_moore(struct buffer *buf, Bufbyte * base_pat, Bytecount len,
1733 Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
1734 Lisp_Object inverse_trt, int charset_base)
1736 /* #### Someone really really really needs to comment the workings
1737 of this junk somewhat better.
1739 BTW "BM" stands for Boyer-Moore, which is one of the standard
1740 string-searching algorithms. It's the best string-searching
1741 algorithm out there, provided that:
1743 a) You're not fazed by algorithm complexity. (Rabin-Karp, which
1744 uses hashing, is much much easier to code but not as fast.)
1745 b) You can freely move backwards in the string that you're
1748 As the comment below tries to explain (but garbles in typical
1749 programmer-ese), the idea is that you don't have to do a
1750 string match at every successive position in the text. For
1751 example, let's say the pattern is "a very long string". We
1752 compare the last character in the string (`g') with the
1753 corresponding character in the text. If it mismatches, and
1754 it is, say, `z', then we can skip forward by the entire
1755 length of the pattern because `z' does not occur anywhere
1756 in the pattern. If the mismatching character does occur
1757 in the pattern, we can usually still skip forward by more
1758 than one: e.g. if it is `l', then we can skip forward
1759 by the length of the substring "ong string" -- i.e. the
1760 largest end section of the pattern that does not contain
1761 the mismatched character. So what we do is compute, for
1762 each possible character, the distance we can skip forward
1763 (the "stride") and use it in the string matching. This
1764 is what the BM_tab holds. */
1765 REGISTER EMACS_INT *BM_tab;
1766 EMACS_INT *BM_tab_base;
1767 REGISTER Bytecount dirlen;
1770 Bytecount stride_for_teases = 0;
1771 REGISTER EMACS_INT i, j;
1772 Bufbyte *pat, *pat_end;
1773 REGISTER Bufbyte *cursor, *p_limit, *ptr2;
1774 Bufbyte simple_translate[REGEXP_FASTMAP_SIZE];
1775 REGISTER int direction = ((n > 0) ? 1 : -1);
1777 Bufbyte translate_prev_byte = 0;
1778 Bufbyte translate_anteprev_byte = 0;
1781 EMACS_INT BM_tab_space[REGEXP_FASTMAP_SIZE];
1782 BM_tab = &BM_tab_space[0];
1784 BM_tab = alloca_array(EMACS_INT, 256);
1787 /* The general approach is that we are going to maintain that we
1788 know the first (closest to the present position, in whatever
1789 direction we're searching) character that could possibly be
1790 the last (furthest from present position) character of a
1791 valid match. We advance the state of our knowledge by
1792 looking at that character and seeing whether it indeed
1793 matches the last character of the pattern. If it does, we
1794 take a closer look. If it does not, we move our pointer (to
1795 putative last characters) as far as is logically possible.
1796 This amount of movement, which I call a stride, will be the
1797 length of the pattern if the actual character appears nowhere
1798 in the pattern, otherwise it will be the distance from the
1799 last occurrence of that character to the end of the pattern.
1800 As a coding trick, an enormous stride is coded into the table
1801 for characters that match the last character. This allows
1802 use of only a single test, a test for having gone past the
1803 end of the permissible match region, to test for both
1804 possible matches (when the stride goes past the end
1805 immediately) and failure to match (where you get nudged past
1806 the end one stride at a time).
1808 Here we make a "mickey mouse" BM table. The stride of the
1809 search is determined only by the last character of the
1810 putative match. If that character does not match, we will
1811 stride the proper distance to propose a match that
1812 superimposes it on the last instance of a character that
1813 matches it (per trt), or misses it entirely if there is
1816 dirlen = len * direction;
1817 infinity = dirlen - (lim + pos + len + len) * direction;
1818 /* Record position after the end of the pattern. */
1819 pat_end = base_pat + len;
1821 base_pat = pat_end - 1;
1822 BM_tab_base = BM_tab;
1823 BM_tab += REGEXP_FASTMAP_SIZE;
1824 j = dirlen; /* to get it in a register */
1825 /* A character that does not appear in the pattern induces a
1826 stride equal to the pattern length. */
1827 while (BM_tab_base != BM_tab) {
1833 /* We use this for translation, instead of TRT itself. We
1834 fill this in to handle the characters that actually occur
1835 in the pattern. Others don't matter anyway! */
1836 xzero(simple_translate);
1837 for (i = 0; i < REGEXP_FASTMAP_SIZE; i++)
1838 simple_translate[i] = (Bufbyte) i;
1840 while (i != infinity) {
1841 Bufbyte *ptr = base_pat + i;
1847 Emchar ch, untranslated;
1848 int this_translated = 1;
1850 /* Is *PTR the last byte of a character? */
1851 if (pat_end - ptr == 1 || BUFBYTE_FIRST_BYTE_P(ptr[1])) {
1852 Bufbyte *charstart = ptr;
1853 while (!BUFBYTE_FIRST_BYTE_P(*charstart))
1855 untranslated = charptr_emchar(charstart);
1857 (untranslated & ~CHAR_FIELD3_MASK)) {
1858 ch = TRANSLATE(trt, untranslated);
1859 if (!BUFBYTE_FIRST_BYTE_P(*ptr)) {
1860 translate_prev_byte = ptr[-1];
1861 if (!BUFBYTE_FIRST_BYTE_P
1862 (translate_prev_byte))
1863 translate_anteprev_byte
1867 this_translated = 0;
1872 this_translated = 0;
1874 if (ch > REGEXP_FASTMAP_SIZE)
1875 j = ((unsigned char)(ch & REGEXP_FASTMAP_MASK)| 0200);
1877 j = (unsigned char)(ch & REGEXP_FASTMAP_MASK);
1880 stride_for_teases = BM_tab[j];
1881 BM_tab[j] = dirlen - i;
1882 /* A translation table is accompanied by its inverse --
1883 see comment following downcase_table for details */
1884 if (this_translated) {
1885 Emchar starting_ch = ch;
1886 EMACS_INT starting_j = j;
1888 ch = TRANSLATE(inverse_trt, ch);
1889 if (ch > REGEXP_FASTMAP_SIZE)
1890 j = ((unsigned char)(ch & REGEXP_FASTMAP_MASK) | 0200);
1892 j = (unsigned char)(ch & REGEXP_FASTMAP_MASK);
1894 /* For all the characters that map into CH,
1895 set up simple_translate to map the last byte
1897 simple_translate[j] = starting_j;
1898 if (ch == starting_ch)
1900 BM_tab[j] = dirlen - i;
1906 k = (j = TRANSLATE(trt, j));
1908 stride_for_teases = BM_tab[j];
1909 BM_tab[j] = dirlen - i;
1910 /* A translation table is accompanied by its inverse --
1911 see comment following downcase_table for details */
1913 while ((j = TRANSLATE(inverse_trt, j)) != k) {
1914 simple_translate[j] = (Bufbyte) k;
1915 BM_tab[j] = dirlen - i;
1922 stride_for_teases = BM_tab[j];
1923 BM_tab[j] = dirlen - i;
1925 /* stride_for_teases tells how much to stride if we get a
1926 match on the far character but are subsequently
1927 disappointed, by recording what the stride would have been
1928 for that character if the last character had been
1931 infinity = dirlen - infinity;
1932 pos += dirlen - ((direction > 0) ? direction : 0);
1933 /* loop invariant - pos points at where last char (first char if
1934 reverse) of pattern would align in a possible match. */
1937 Bufbyte *tail_end_ptr;
1938 /* It's been reported that some (broken) compiler thinks
1939 that Boolean expressions in an arithmetic context are
1940 unsigned. Using an explicit ?1:0 prevents this. */
1941 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
1942 return n * (0 - direction);
1943 /* First we do the part we can by pointers (maybe
1947 limit = pos - dirlen + direction;
1948 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1949 have changed. See buffer.h. */
1950 limit = ((direction > 0)
1951 ? BI_BUF_CEILING_OF(buf, limit) - 1
1952 : BI_BUF_FLOOR_OF(buf, limit + 1));
1953 /* LIMIT is now the last (not beyond-last!) value POS can
1954 take on without hitting edge of buffer or the gap. */
1955 limit = ((direction > 0)
1956 ? min(lim - 1, min(limit, pos + 20000))
1957 : max(lim, max(limit, pos - 20000)));
1958 tail_end = BI_BUF_CEILING_OF(buf, pos);
1959 tail_end_ptr = BI_BUF_BYTE_ADDRESS(buf, tail_end);
1961 if ((limit - pos) * direction > 20) {
1962 p_limit = BI_BUF_BYTE_ADDRESS(buf, limit);
1963 ptr2 = (cursor = BI_BUF_BYTE_ADDRESS(buf, pos));
1964 /* In this loop, pos + cursor - ptr2 is the surrogate
1966 while (1) { /* use one cursor setting as long as i can */
1967 if (direction > 0) { /* worth duplicating */
1968 /* Use signed comparison if appropriate to make
1969 cursor+infinity sure to be > p_limit.
1970 Assuming that the buffer lies in a range of
1971 addresses that are all "positive" (as ints)
1972 or all "negative", either kind of comparison
1973 will work as long as we don't step by
1974 infinity. So pick the kind that works when
1975 we do step by infinity. */
1976 if ((EMACS_INT) (p_limit + infinity) >
1977 (EMACS_INT) p_limit)
1978 while ((EMACS_INT) cursor <=
1979 (EMACS_INT) p_limit)
1983 while ((EMACS_UINT) cursor <=
1984 (EMACS_UINT) p_limit)
1988 if ((EMACS_INT) (p_limit + infinity) <
1989 (EMACS_INT) p_limit)
1990 while ((EMACS_INT) cursor >=
1991 (EMACS_INT) p_limit)
1995 while ((EMACS_UINT) cursor >=
1996 (EMACS_UINT) p_limit)
2000 /* If you are here, cursor is beyond the end of the
2001 searched region. This can happen if you match on
2002 the far character of the pattern, because the
2003 "stride" of that character is infinity, a number
2004 able to throw you well beyond the end of the
2005 search. It can also happen if you fail to match
2006 within the permitted region and would otherwise
2007 try a character beyond that region */
2008 if ((cursor - p_limit) * direction <= len)
2009 break; /* a small overrun is genuine */
2010 cursor -= infinity; /* large overrun = hit */
2011 i = dirlen - direction;
2014 direction) + direction != 0) {
2017 cursor -= direction;
2018 /* Translate only the last byte of a character. */
2019 if ((cursor == tail_end_ptr
2021 BUFBYTE_FIRST_BYTE_P(cursor
2024 (BUFBYTE_FIRST_BYTE_P
2026 || (translate_prev_byte ==
2029 (BUFBYTE_FIRST_BYTE_P
2030 (translate_prev_byte)
2032 translate_anteprev_byte
2034 ch = simple_translate
2050 direction) + direction != 0)
2052 *(cursor -= direction))
2055 cursor += dirlen - i - direction; /* fix cursor */
2056 if (i + direction == 0) {
2057 cursor -= direction;
2061 (pos + cursor - ptr2 +
2065 bytind_to_bufpos(buf,
2068 bytind_to_bufpos(buf,
2072 set_search_regs(buf, bufstart,
2075 clear_unused_search_regs
2079 if ((n -= direction) != 0)
2080 cursor += dirlen; /* to resume search */
2082 return ((direction > 0)
2084 end[0] : search_regs.
2087 cursor += stride_for_teases; /* <sigh> we lose - */
2089 pos += cursor - ptr2;
2091 /* Now we'll pick up a clump that has to be done the hard
2092 way because it covers a discontinuity */
2094 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
2095 have changed. See buffer.h. */
2096 limit = ((direction > 0)
2097 ? BI_BUF_CEILING_OF(buf, pos - dirlen + 1) - 1
2098 : BI_BUF_FLOOR_OF(buf, pos - dirlen));
2099 limit = ((direction > 0)
2100 ? min(limit + len, lim - 1)
2101 : max(limit - len, lim));
2102 /* LIMIT is now the last value POS can have
2103 and still be valid for a possible match. */
2105 /* This loop can be coded for space rather than
2106 speed because it will usually run only once.
2107 (the reach is at most len + 21, and typically
2108 does not exceed len) */
2109 while ((limit - pos) * direction >= 0)
2110 /* *not* BI_BUF_FETCH_CHAR. We are working here
2111 with bytes, not characters. */
2113 BM_tab[*BI_BUF_BYTE_ADDRESS
2115 /* now run the same tests to distinguish going off
2116 the end, a match or a phony match. */
2117 if ((pos - limit) * direction <= len)
2118 break; /* ran off the end */
2119 /* Found what might be a match.
2120 Set POS back to last (first if reverse) char pos. */
2122 i = dirlen - direction;
2123 while ((i -= direction) + direction != 0) {
2130 ptr = BI_BUF_BYTE_ADDRESS(buf, pos);
2131 if ((ptr == tail_end_ptr
2132 || BUFBYTE_FIRST_BYTE_P(ptr[1]))
2133 && (BUFBYTE_FIRST_BYTE_P(ptr[0])
2134 || (translate_prev_byte ==
2137 (BUFBYTE_FIRST_BYTE_P
2138 (translate_prev_byte)
2139 || translate_anteprev_byte
2141 ch = simple_translate[*ptr];
2148 if (pat[i] != TRANSLATE(trt,
2149 *BI_BUF_BYTE_ADDRESS
2154 /* Above loop has moved POS part or all the way back
2155 to the first char pos (last char pos if reverse).
2156 Set it once again at the last (first if reverse)
2158 pos += dirlen - i - direction;
2159 if (i + direction == 0) {
2163 Bytind bytstart = (pos +
2169 bytind_to_bufpos(buf,
2172 bytind_to_bufpos(buf,
2176 set_search_regs(buf, bufstart,
2179 clear_unused_search_regs
2183 if ((n -= direction) != 0)
2184 pos += dirlen; /* to resume search */
2186 return ((direction > 0)
2188 end[0] : search_regs.
2191 pos += stride_for_teases;
2194 /* We have done one clump. Can we continue? */
2195 if ((lim - pos) * direction < 0)
2196 return (0 - n) * direction;
2198 return bytind_to_bufpos(buf, pos);
2201 /* Record the whole-match data (beginning BEG and end BEG + LEN) and the
2202 buffer for a match just found. */
2204 static void set_search_regs(struct buffer *buf, Bufpos beg, Charcount len)
2206 /* This function has been Mule-ized. */
2207 /* Make sure we have registers in which to store
2208 the match position. */
2209 if (search_regs.num_regs == 0) {
2210 search_regs.start = xnew_atomic(regoff_t);
2211 search_regs.end = xnew_atomic(regoff_t);
2212 search_regs.num_regs = 1;
2215 search_regs.start[0] = beg;
2216 search_regs.end[0] = beg + len;
2217 XSETBUFFER(last_thing_searched, buf);
2220 /* Clear unused search registers so match data will be null.
2221 REGP is a pointer to the register structure to clear, usually the global
2223 NO_SUB is the number of subexpressions to allow for. (Does not count
2224 the whole match, ie, for a string search NO_SUB == 0.)
2225 It is an error if NO_SUB > REGP.num_regs - 1. */
2227 static void clear_unused_search_regs(struct re_registers *regp, int no_sub)
2229 /* This function has been Mule-ized. */
2232 assert(no_sub >= 0 && no_sub < regp->num_regs);
2233 for (i = no_sub + 1; i < regp->num_regs; i++)
2234 regp->start[i] = regp->end[i] = -1;
2237 /* Given a string of words separated by word delimiters,
2238 compute a regexp that matches those exact words
2239 separated by arbitrary punctuation. */
2241 static Lisp_Object wordify(Lisp_Object buffer, Lisp_Object string)
2244 EMACS_INT punct_count = 0, word_count = 0;
2245 struct buffer *buf = decode_buffer(buffer, 0);
2246 Lisp_Char_Table *syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
2248 CHECK_STRING(string);
2249 len = XSTRING_CHAR_LENGTH(string);
2251 for (i = 0; i < len; i++)
2253 (syntax_table, string_char(XSTRING(string), i))) {
2255 if (i > 0 && WORD_SYNTAX_P(syntax_table,
2256 string_char(XSTRING(string),
2260 if (WORD_SYNTAX_P(syntax_table, string_char(XSTRING(string), len - 1)))
2263 return build_string("");
2266 /* The following value is an upper bound on the amount of storage we
2267 need. In non-Mule, it is exact. */
2269 (Bufbyte *) alloca(XSTRING_LENGTH(string) - punct_count +
2270 5 * (word_count - 1) + 4);
2271 Bufbyte *o = storage;
2276 for (i = 0; i < len; i++) {
2277 Emchar ch = string_char(XSTRING(string), i);
2279 if (WORD_SYNTAX_P(syntax_table, ch))
2280 o += set_charptr_emchar(o, ch);
2282 && WORD_SYNTAX_P(syntax_table,
2283 string_char(XSTRING(string),
2297 return make_string(storage, o - storage);
2301 DEFUN("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ", /*
2302 Search backward from point for STRING.
2303 Set point to the beginning of the occurrence found, and return point.
2305 Optional second argument LIMIT bounds the search; it is a buffer
2306 position. The match found must not extend before that position.
2307 The value nil is equivalent to (point-min).
2309 Optional third argument NOERROR, if t, means just return nil (no
2310 error) if the search fails. If neither nil nor t, set point to LIMIT
2313 Optional fourth argument COUNT is a repeat count--search for
2314 successive occurrences.
2316 Optional fifth argument BUFFER specifies the buffer to search in and
2317 defaults to the current buffer.
2319 See also the functions `match-beginning', `match-end' and `replace-match'.
2321 (string, limit, noerror, count, buffer))
2323 return search_command(string, limit, noerror, count, buffer, -1, 0, 0);
2326 DEFUN("search-forward", Fsearch_forward, 1, 5, "sSearch: ", /*
2327 Search forward from point for STRING.
2328 Set point to the end of the occurrence found, and return point.
2330 Optional second argument LIMIT bounds the search; it is a buffer
2331 position. The match found must not extend after that position. The
2332 value nil is equivalent to (point-max).
2334 Optional third argument NOERROR, if t, means just return nil (no
2335 error) if the search fails. If neither nil nor t, set point to LIMIT
2338 Optional fourth argument COUNT is a repeat count--search for
2339 successive occurrences.
2341 Optional fifth argument BUFFER specifies the buffer to search in and
2342 defaults to the current buffer.
2344 See also the functions `match-beginning', `match-end' and `replace-match'.
2346 (string, limit, noerror, count, buffer))
2348 return search_command(string, limit, noerror, count, buffer, 1, 0, 0);
2351 DEFUN("word-search-backward", Fword_search_backward, 1, 5, "sWord search backward: ", /*
2352 Search backward from point for STRING, ignoring differences in punctuation.
2353 Set point to the beginning of the occurrence found, and return point.
2355 Optional second argument LIMIT bounds the search; it is a buffer
2356 position. The match found must not extend before that position.
2357 The value nil is equivalent to (point-min).
2359 Optional third argument NOERROR, if t, means just return nil (no
2360 error) if the search fails. If neither nil nor t, set point to LIMIT
2363 Optional fourth argument COUNT is a repeat count--search for
2364 successive occurrences.
2366 Optional fifth argument BUFFER specifies the buffer to search in and
2367 defaults to the current buffer.
2369 See also the functions `match-beginning', `match-end' and `replace-match'.
2371 (string, limit, noerror, count, buffer))
2373 return search_command(wordify(buffer, string), limit, noerror, count,
2377 DEFUN("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ", /*
2378 Search forward from point for STRING, ignoring differences in punctuation.
2379 Set point to the end of the occurrence found, and return point.
2381 Optional second argument LIMIT bounds the search; it is a buffer
2382 position. The match found must not extend after that position. The
2383 value nil is equivalent to (point-max).
2385 Optional third argument NOERROR, if t, means just return nil (no
2386 error) if the search fails. If neither nil nor t, set point to LIMIT
2389 Optional fourth argument COUNT is a repeat count--search for
2390 successive occurrences.
2392 Optional fifth argument BUFFER specifies the buffer to search in and
2393 defaults to the current buffer.
2395 See also the functions `match-beginning', `match-end' and `replace-match'.
2397 (string, limit, noerror, count, buffer))
2399 return search_command(wordify(buffer, string), limit, noerror, count,
2403 DEFUN("re-search-backward", Fre_search_backward, 1, 5, "sRE search backward: ", /*
2404 Search backward from point for match for regular expression REGEXP.
2405 Set point to the beginning of the match, and return point.
2406 The match found is the one starting last in the buffer
2407 and yet ending before the origin of the search.
2409 Optional second argument LIMIT bounds the search; it is a buffer
2410 position. The match found must not extend before that position.
2411 The value nil is equivalent to (point-min).
2413 Optional third argument NOERROR, if t, means just return nil (no
2414 error) if the search fails. If neither nil nor t, set point to LIMIT
2417 Optional fourth argument COUNT is a repeat count--search for
2418 successive occurrences.
2420 Optional fifth argument BUFFER specifies the buffer to search in and
2421 defaults to the current buffer.
2423 See also the functions `match-beginning', `match-end' and `replace-match'.
2425 (regexp, limit, noerror, count, buffer))
2427 return search_command(regexp, limit, noerror, count, buffer, -1, 1, 0);
2430 DEFUN("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ", /*
2431 Search forward from point for regular expression REGEXP.
2432 Set point to the end of the occurrence found, and return point.
2434 Optional second argument LIMIT bounds the search; it is a buffer
2435 position. The match found must not extend after that position. The
2436 value nil is equivalent to (point-max).
2438 Optional third argument NOERROR, if t, means just return nil (no
2439 error) if the search fails. If neither nil nor t, set point to LIMIT
2442 Optional fourth argument COUNT is a repeat count--search for
2443 successive occurrences.
2445 Optional fifth argument BUFFER specifies the buffer to search in and
2446 defaults to the current buffer.
2448 See also the functions `match-beginning', `match-end' and `replace-match'.
2450 (regexp, limit, noerror, count, buffer))
2452 return search_command(regexp, limit, noerror, count, buffer, 1, 1, 0);
2455 DEFUN("posix-search-backward", Fposix_search_backward, 1, 5, "sPosix search backward: ", /*
2456 Search backward from point for match for regular expression REGEXP.
2457 Find the longest match in accord with Posix regular expression rules.
2458 Set point to the beginning of the match, and return point.
2459 The match found is the one starting last in the buffer
2460 and yet ending before the origin of the search.
2462 Optional second argument LIMIT bounds the search; it is a buffer
2463 position. The match found must not extend before that position.
2464 The value nil is equivalent to (point-min).
2466 Optional third argument NOERROR, if t, means just return nil (no
2467 error) if the search fails. If neither nil nor t, set point to LIMIT
2470 Optional fourth argument COUNT is a repeat count--search for
2471 successive occurrences.
2473 Optional fifth argument BUFFER specifies the buffer to search in and
2474 defaults to the current buffer.
2476 See also the functions `match-beginning', `match-end' and `replace-match'.
2478 (regexp, limit, noerror, count, buffer))
2480 return search_command(regexp, limit, noerror, count, buffer, -1, 1, 1);
2483 DEFUN("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ", /*
2484 Search forward from point for regular expression REGEXP.
2485 Find the longest match in accord with Posix regular expression rules.
2486 Set point to the end of the occurrence found, and return point.
2488 Optional second argument LIMIT bounds the search; it is a buffer
2489 position. The match found must not extend after that position. The
2490 value nil is equivalent to (point-max).
2492 Optional third argument NOERROR, if t, means just return nil (no
2493 error) if the search fails. If neither nil nor t, set point to LIMIT
2496 Optional fourth argument COUNT is a repeat count--search for
2497 successive occurrences.
2499 Optional fifth argument BUFFER specifies the buffer to search in and
2500 defaults to the current buffer.
2502 See also the functions `match-beginning', `match-end' and `replace-match'.
2504 (regexp, limit, noerror, count, buffer))
2506 return search_command(regexp, limit, noerror, count, buffer, 1, 1, 1);
2509 static Lisp_Object free_created_dynarrs(Lisp_Object cons)
2511 Dynarr_free(get_opaque_ptr(XCAR(cons)));
2512 Dynarr_free(get_opaque_ptr(XCDR(cons)));
2513 free_opaque_ptr(XCAR(cons));
2514 free_opaque_ptr(XCDR(cons));
2515 free_cons(XCONS(cons));
2519 DEFUN("replace-match", Freplace_match, 1, 5, 0, /*
2520 Replace text matched by last search with REPLACEMENT.
2521 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2522 Otherwise maybe capitalize the whole text, or maybe just word initials,
2523 based on the replaced text.
2524 If the replaced text has only capital letters
2525 and has at least one multiletter word, convert REPLACEMENT to all caps.
2526 If the replaced text has at least one word starting with a capital letter,
2527 then capitalize each word in REPLACEMENT.
2529 If third arg LITERAL is non-nil, insert REPLACEMENT literally.
2530 Otherwise treat `\\' as special:
2531 `\\&' in REPLACEMENT means substitute original matched text.
2532 `\\N' means substitute what matched the Nth `\\(...\\)'.
2533 If Nth parens didn't match, substitute nothing.
2534 `\\\\' means insert one `\\'.
2535 `\\u' means upcase the next character.
2536 `\\l' means downcase the next character.
2537 `\\U' means begin upcasing all following characters.
2538 `\\L' means begin downcasing all following characters.
2539 `\\E' means terminate the effect of any `\\U' or `\\L'.
2540 Case changes made with `\\u', `\\l', `\\U', and `\\L' override
2541 all other case changes that may be made in the replaced text.
2542 FIXEDCASE and LITERAL are optional arguments.
2543 Leaves point at end of replacement text.
2545 The optional fourth argument STRING can be a string to modify.
2546 In that case, this function creates and returns a new string
2547 which is made by replacing the part of STRING that was matched.
2548 When fourth argument is a string, fifth argument STRBUFFER specifies
2549 the buffer to be used for syntax-table and case-table lookup and
2550 defaults to the current buffer. When fourth argument is not a string,
2551 the buffer that the match occurred in has automatically been remembered
2552 and you do not need to specify it.
2554 When fourth argument is nil, STRBUFFER specifies a subexpression of
2555 the match. It says to replace just that subexpression instead of the
2556 whole match. This is useful only after a regular expression search or
2557 match since only regular expressions have distinguished subexpressions.
2559 (replacement, fixedcase, literal, string, strbuffer))
2561 /* This function has been Mule-ized. */
2562 /* This function can GC */
2563 enum { nochange, all_caps, cap_initial } case_action;
2565 int some_multiletter_word;
2568 int some_nonuppercase_initial;
2572 Lisp_Char_Table *syntax_table;
2575 int_dynarr *ul_action_dynarr = 0;
2576 int_dynarr *ul_pos_dynarr = 0;
2580 CHECK_STRING(replacement);
2582 if (!NILP(string)) {
2583 CHECK_STRING(string);
2584 if (!EQ(last_thing_searched, Qt))
2585 error("last thing matched was not a string");
2586 /* If the match data
2587 were abstracted into a special "match data" type instead
2588 of the typical half-assed "let the implementation be
2589 visible" form it's in, we could extend it to include
2590 the last string matched and the buffer used for that
2591 matching. But of course we can't change it as it is. */
2592 buf = decode_buffer(strbuffer, 0);
2593 XSETBUFFER(buffer, buf);
2595 if (!NILP(strbuffer)) {
2596 CHECK_INT(strbuffer);
2597 sub = XINT(strbuffer);
2598 if (sub < 0 || sub >= (int)search_regs.num_regs)
2599 args_out_of_range(strbuffer,
2600 make_int(search_regs.
2603 if (!BUFFERP(last_thing_searched))
2604 error("last thing matched was not a buffer");
2605 buffer = last_thing_searched;
2606 buf = XBUFFER(buffer);
2609 syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
2611 case_action = nochange; /* We tried an initialization */
2612 /* but some C compilers blew it */
2614 if (search_regs.num_regs == 0)
2615 error("replace-match called before any match found");
2618 if (search_regs.start[sub] < BUF_BEGV(buf)
2619 || search_regs.start[sub] > search_regs.end[sub]
2620 || search_regs.end[sub] > BUF_ZV(buf))
2621 args_out_of_range(make_int(search_regs.start[sub]),
2622 make_int(search_regs.end[sub]));
2624 if (search_regs.start[0] < 0
2625 || search_regs.start[0] > search_regs.end[0]
2626 || search_regs.end[0] > XSTRING_CHAR_LENGTH(string))
2627 args_out_of_range(make_int(search_regs.start[0]),
2628 make_int(search_regs.end[0]));
2631 if (NILP(fixedcase)) {
2632 /* Decide how to casify by examining the matched text. */
2634 last = search_regs.end[sub];
2636 case_action = all_caps;
2638 /* some_multiletter_word is set nonzero if any original word
2639 is more than one letter long. */
2640 some_multiletter_word = 0;
2642 some_nonuppercase_initial = 0;
2645 for (pos = search_regs.start[sub]; pos < last; pos++) {
2647 c = BUF_FETCH_CHAR(buf, pos);
2649 c = string_char(XSTRING(string), pos);
2651 if (LOWERCASEP(buf, c)) {
2652 /* Cannot be all caps if any original char is lower case */
2655 if (!WORD_SYNTAX_P(syntax_table, prevc))
2656 some_nonuppercase_initial = 1;
2658 some_multiletter_word = 1;
2659 } else if (!NOCASEP(buf, c)) {
2661 if (!WORD_SYNTAX_P(syntax_table, prevc)) ;
2663 some_multiletter_word = 1;
2665 /* If the initial is a caseless word constituent,
2666 treat that like a lowercase initial. */
2667 if (!WORD_SYNTAX_P(syntax_table, prevc))
2668 some_nonuppercase_initial = 1;
2674 /* Convert to all caps if the old text is all caps
2675 and has at least one multiletter word. */
2676 if (!some_lowercase && some_multiletter_word)
2677 case_action = all_caps;
2678 /* Capitalize each word, if the old text has all capitalized words. */
2679 else if (!some_nonuppercase_initial && some_multiletter_word)
2680 case_action = cap_initial;
2681 else if (!some_nonuppercase_initial && some_uppercase)
2682 /* Should x -> yz, operating on X, give Yz or YZ?
2683 We'll assume the latter. */
2684 case_action = all_caps;
2686 case_action = nochange;
2689 /* Do replacement in a string. */
2690 if (!NILP(string)) {
2691 Lisp_Object before, after;
2693 speccount = specpdl_depth();
2695 Fsubstring(string, Qzero, make_int(search_regs.start[0]));
2696 after = Fsubstring(string, make_int(search_regs.end[0]), Qnil);
2698 /* Do case substitution into REPLACEMENT if desired. */
2699 if (NILP(literal)) {
2700 Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2702 /* XEmacs change: rewrote this loop somewhat to make it
2703 cleaner. Also added \U, \E, etc. */
2704 Charcount literal_start = 0;
2705 /* We build up the substituted string in ACCUM. */
2710 /* OK, the basic idea here is that we scan through the
2711 replacement string until we find a backslash, which
2712 represents a substring of the original string to be
2713 substituted. We then append onto ACCUM the literal
2714 text before the backslash (LASTPOS marks the
2715 beginning of this) followed by the substring of the
2716 original string that needs to be inserted. */
2717 for (strpos = 0; strpos < stlen; strpos++) {
2718 /* If LITERAL_END is set, we've encountered a backslash
2719 (the end of literal text to be inserted). */
2720 Charcount literal_end = -1;
2721 /* If SUBSTART is set, we need to also insert the
2722 text from SUBSTART to SUBEND in the original string. */
2723 Charcount substart = -1;
2724 Charcount subend = -1;
2726 c = string_char(XSTRING(replacement), strpos);
2727 if (c == '\\' && strpos < stlen - 1) {
2728 c = string_char(XSTRING(replacement),
2731 literal_end = strpos - 1;
2732 substart = search_regs.start[0];
2733 subend = search_regs.end[0];
2734 } else if (c >= '1' && c <= '9' &&
2736 search_regs.num_regs + '0') {
2738 start[c - '0'] >= 0) {
2748 } else if (c == 'U' || c == 'u'
2749 || c == 'L' || c == 'l'
2751 /* Keep track of all case changes requested, but don't
2752 make them now. Do them later so we override
2754 if (!ul_pos_dynarr) {
2759 record_unwind_protect
2760 (free_created_dynarrs,
2765 (ul_action_dynarr)));
2767 literal_end = strpos - 1;
2768 Dynarr_add(ul_pos_dynarr,
2776 Dynarr_add(ul_action_dynarr, c);
2777 } else if (c == '\\')
2778 /* So we get just one backslash. */
2779 literal_end = strpos;
2781 if (literal_end >= 0) {
2782 Lisp_Object literal_text = Qnil;
2783 Lisp_Object substring = Qnil;
2784 if (literal_end != literal_start)
2786 Fsubstring(replacement,
2791 if (substart >= 0 && subend != substart)
2792 substring = Fsubstring(string,
2797 if (!NILP(literal_text)
2798 || !NILP(substring))
2800 concat3(accum, literal_text,
2802 literal_start = strpos + 1;
2806 if (strpos != literal_start)
2807 /* some literal text at end to be inserted */
2810 Fsubstring(replacement,
2811 make_int(literal_start),
2814 replacement = accum;
2817 /* replacement can be nil. */
2818 if (NILP(replacement))
2819 replacement = build_string("");
2821 if (case_action == all_caps)
2822 replacement = Fupcase(replacement, buffer);
2823 else if (case_action == cap_initial)
2824 replacement = Fupcase_initials(replacement, buffer);
2826 /* Now finally, we need to process the \U's, \E's, etc. */
2827 if (ul_pos_dynarr) {
2829 int cur_action = 'E';
2830 Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2833 for (strpos = 0; strpos < stlen; strpos++) {
2835 string_char(XSTRING(replacement), strpos);
2836 Emchar newchar = -1;
2837 if (i < Dynarr_length(ul_pos_dynarr) &&
2838 strpos == Dynarr_at(ul_pos_dynarr, i)) {
2840 Dynarr_at(ul_action_dynarr, i);
2842 if (new_action == 'u')
2843 newchar = UPCASE(buf, curchar);
2844 else if (new_action == 'l')
2846 DOWNCASE(buf, curchar);
2848 cur_action = new_action;
2850 if (newchar == -1) {
2851 if (cur_action == 'U')
2852 newchar = UPCASE(buf, curchar);
2853 else if (cur_action == 'L')
2855 DOWNCASE(buf, curchar);
2859 if (newchar != curchar)
2860 set_string_char(XSTRING(replacement),
2865 /* frees the Dynarrs if necessary. */
2866 unbind_to(speccount, Qnil);
2867 return concat3(before, replacement, after);
2870 mc_count = begin_multiple_change(buf, search_regs.start[sub],
2871 search_regs.end[sub]);
2873 /* begin_multiple_change() records an unwind-protect, so we need to
2874 record this value now. */
2875 speccount = specpdl_depth();
2877 /* We insert the replacement text before the old text, and then
2878 delete the original text. This means that markers at the
2879 beginning or end of the original will float to the corresponding
2880 position in the replacement. */
2881 BUF_SET_PT(buf, search_regs.start[sub]);
2883 Finsert(1, &replacement);
2885 Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2887 struct gcpro gcpro1;
2888 GCPRO1(replacement);
2889 for (strpos = 0; strpos < stlen; strpos++) {
2890 /* on the first iteration assert(offset==0),
2891 exactly complementing BUF_SET_PT() above.
2892 During the loop, it keeps track of the amount inserted.
2894 Charcount offset = BUF_PT(buf) - search_regs.start[sub];
2896 c = string_char(XSTRING(replacement), strpos);
2897 if (c == '\\' && strpos < stlen - 1) {
2898 /* XXX FIXME: replacing just a substring non-literally
2899 using backslash refs to the match looks dangerous. But
2900 <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe
2901 <duwe@caldera.de> claims Finsert_buffer_substring already
2902 handles this correctly.
2904 c = string_char(XSTRING(replacement), ++strpos);
2906 Finsert_buffer_substring
2908 make_int(search_regs.start[0] +
2910 make_int(search_regs.end[0] +
2912 else if (c >= '1' && c <= '9'
2913 && c <= search_regs.num_regs + '0') {
2914 if (search_regs.start[c - '0'] >= 1)
2915 Finsert_buffer_substring
2917 make_int(search_regs.
2920 make_int(search_regs.
2923 } else if (c == 'U' || c == 'u' || c == 'L'
2924 || c == 'l' || c == 'E') {
2925 /* Keep track of all case changes requested, but don't
2926 make them now. Do them later so we override
2928 if (!ul_pos_dynarr) {
2929 ul_pos_dynarr = Dynarr_new(int);
2932 record_unwind_protect
2933 (free_created_dynarrs,
2934 Fcons(make_opaque_ptr
2937 (ul_action_dynarr)));
2939 Dynarr_add(ul_pos_dynarr, BUF_PT(buf));
2940 Dynarr_add(ul_action_dynarr, c);
2942 buffer_insert_emacs_char(buf, c);
2944 buffer_insert_emacs_char(buf, c);
2949 inslen = BUF_PT(buf) - (search_regs.start[sub]);
2950 buffer_delete_range(buf, search_regs.start[sub] + inslen,
2951 search_regs.end[sub] + inslen, 0);
2953 if (case_action == all_caps)
2954 Fupcase_region(make_int(BUF_PT(buf) - inslen),
2955 make_int(BUF_PT(buf)), buffer);
2956 else if (case_action == cap_initial)
2957 Fupcase_initials_region(make_int(BUF_PT(buf) - inslen),
2958 make_int(BUF_PT(buf)), buffer);
2960 /* Now go through and make all the case changes that were requested
2961 in the replacement string. */
2962 if (ul_pos_dynarr) {
2963 Bufpos eend = BUF_PT(buf);
2965 int cur_action = 'E';
2967 for (pos = BUF_PT(buf) - inslen; pos < eend; pos++) {
2968 Emchar curchar = BUF_FETCH_CHAR(buf, pos);
2969 Emchar newchar = -1;
2970 if (i < Dynarr_length(ul_pos_dynarr) &&
2971 pos == Dynarr_at(ul_pos_dynarr, i)) {
2972 int new_action = Dynarr_at(ul_action_dynarr, i);
2974 if (new_action == 'u')
2975 newchar = UPCASE(buf, curchar);
2976 else if (new_action == 'l')
2977 newchar = DOWNCASE(buf, curchar);
2979 cur_action = new_action;
2981 if (newchar == -1) {
2982 if (cur_action == 'U')
2983 newchar = UPCASE(buf, curchar);
2984 else if (cur_action == 'L')
2985 newchar = DOWNCASE(buf, curchar);
2989 if (newchar != curchar)
2990 buffer_replace_char(buf, pos, newchar, 0, 0);
2994 /* frees the Dynarrs if necessary. */
2995 unbind_to(speccount, Qnil);
2996 end_multiple_change(buf, mc_count);
3001 static Lisp_Object match_limit(Lisp_Object num, int beginningp)
3003 /* This function has been Mule-ized. */
3008 if (n < 0 || search_regs.num_regs <= 0)
3009 args_out_of_range(num, make_int(search_regs.num_regs));
3010 if (n >= search_regs.num_regs || search_regs.start[n] < 0)
3012 return make_int(beginningp ? search_regs.start[n] : search_regs.end[n]);
3015 DEFUN("match-beginning", Fmatch_beginning, 1, 1, 0, /*
3016 Return position of start of text matched by last regexp search.
3017 NUM, specifies which parenthesized expression in the last regexp.
3018 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
3019 Zero means the entire text matched by the whole regexp or whole string.
3023 return match_limit(num, 1);
3026 DEFUN("match-end", Fmatch_end, 1, 1, 0, /*
3027 Return position of end of text matched by last regexp search.
3028 NUM specifies which parenthesized expression in the last regexp.
3029 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
3030 Zero means the entire text matched by the whole regexp or whole string.
3034 return match_limit(num, 0);
3037 DEFUN("match-data", Fmatch_data, 0, 2, 0, /*
3038 Return a list containing all info on what the last regexp search matched.
3039 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
3040 All the elements are markers or nil (nil if the Nth pair didn't match)
3041 if the last match was on a buffer; integers or nil if a string was matched.
3042 Use `store-match-data' to reinstate the data in this list.
3044 If INTEGERS (the optional first argument) is non-nil, always use integers
3045 \(rather than markers) to represent buffer positions.
3046 If REUSE is a list, reuse it as part of the value. If REUSE is long enough
3047 to hold all the values, and if INTEGERS is non-nil, no consing is done.
3051 /* This function has been Mule-ized. */
3052 Lisp_Object tail, prev;
3057 if (NILP(last_thing_searched))
3058 /*error ("match-data called before any match found"); */
3061 data = alloca_array(Lisp_Object, 2 * search_regs.num_regs);
3064 for (i = 0; i < search_regs.num_regs; i++) {
3065 Bufpos start = search_regs.start[i];
3067 if (EQ(last_thing_searched, Qt)
3068 || !NILP(integers)) {
3069 data[2 * i] = make_int(start);
3070 data[2 * i + 1] = make_int(search_regs.end[i]);
3071 } else if (BUFFERP(last_thing_searched)) {
3072 data[2 * i] = Fmake_marker();
3073 Fset_marker(data[2 * i],
3075 last_thing_searched);
3076 data[2 * i + 1] = Fmake_marker();
3077 Fset_marker(data[2 * i + 1],
3078 make_int(search_regs.end[i]),
3079 last_thing_searched);
3081 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
3086 data[2 * i] = data[2 * i + 1] = Qnil;
3089 return Flist(2 * len + 2, data);
3091 /* If REUSE is a list, store as many value elements as will fit
3092 into the elements of REUSE. */
3093 for (prev = Qnil, i = 0, tail = reuse; CONSP(tail);
3094 i++, tail = XCDR(tail)) {
3095 if (i < 2 * len + 2)
3096 XCAR(tail) = data[i];
3102 /* If we couldn't fit all value elements into REUSE,
3103 cons up the rest of them and add them to the end of REUSE. */
3104 if (i < 2 * len + 2)
3105 XCDR(prev) = Flist(2 * len + 2 - i, data + i);
3110 DEFUN("store-match-data", Fstore_match_data, 1, 1, 0, /*
3111 Set internal data on last search match from elements of LIST.
3112 LIST should have been created by calling `match-data' previously.
3116 /* This function has been Mule-ized. */
3118 REGISTER Lisp_Object marker;
3122 if (running_asynch_code)
3125 CONCHECK_LIST(list);
3127 /* Unless we find a marker with a buffer in LIST, assume that this
3128 match data came from a string. */
3129 last_thing_searched = Qt;
3131 /* Allocate registers if they don't already exist. */
3132 length = XINT(Flength(list)) / 2;
3133 num_regs = search_regs.num_regs;
3135 if (length > num_regs) {
3136 if (search_regs.num_regs == 0) {
3137 search_regs.start = xnew_atomic_array(regoff_t, length);
3138 search_regs.end = xnew_atomic_array(regoff_t, length);
3140 XREALLOC_ARRAY(search_regs.start, regoff_t, length);
3141 XREALLOC_ARRAY(search_regs.end, regoff_t, length);
3144 search_regs.num_regs = length;
3147 for (i = 0; i < num_regs; i++) {
3148 marker = Fcar(list);
3150 search_regs.start[i] = -1;
3153 if (MARKERP(marker)) {
3154 if (XMARKER(marker)->buffer == 0)
3157 XSETBUFFER(last_thing_searched,
3158 XMARKER(marker)->buffer);
3161 CHECK_INT_COERCE_MARKER(marker);
3162 search_regs.start[i] = XINT(marker);
3165 marker = Fcar(list);
3166 if (MARKERP(marker) && XMARKER(marker)->buffer == 0)
3169 CHECK_INT_COERCE_MARKER(marker);
3170 search_regs.end[i] = XINT(marker);
3178 /* If non-zero the match data have been saved in saved_search_regs
3179 during the execution of a sentinel or filter. */
3180 static int search_regs_saved;
3181 static struct re_registers saved_search_regs;
3183 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3184 if asynchronous code (filter or sentinel) is running. */
3185 static void save_search_regs(void)
3187 if (!search_regs_saved) {
3188 saved_search_regs.num_regs = search_regs.num_regs;
3189 saved_search_regs.start = search_regs.start;
3190 saved_search_regs.end = search_regs.end;
3191 search_regs.num_regs = 0;
3192 search_regs.start = 0;
3193 search_regs.end = 0;
3195 search_regs_saved = 1;
3199 /* Called upon exit from filters and sentinels. */
3200 void restore_match_data(void)
3202 if (search_regs_saved) {
3203 if (search_regs.num_regs > 0) {
3204 xfree(search_regs.start);
3205 xfree(search_regs.end);
3207 search_regs.num_regs = saved_search_regs.num_regs;
3208 search_regs.start = saved_search_regs.start;
3209 search_regs.end = saved_search_regs.end;
3211 search_regs_saved = 0;
3215 /* Quote a string to inactivate reg-expr chars */
3217 DEFUN("regexp-quote", Fregexp_quote, 1, 1, 0, /*
3218 Return a regexp string which matches exactly STRING and nothing else.
3222 REGISTER Bufbyte *in, *out, *end;
3223 REGISTER Bufbyte *temp;
3225 CHECK_STRING(string);
3227 temp = (Bufbyte *) alloca(XSTRING_LENGTH(string) * 2);
3229 /* Now copy the data into the new string, inserting escapes. */
3231 in = XSTRING_DATA(string);
3232 end = in + XSTRING_LENGTH(string);
3236 Emchar c = charptr_emchar(in);
3238 if (c == '[' || c == ']'
3239 || c == '*' || c == '.' || c == '\\'
3240 || c == '?' || c == '+' || c == '^' || c == '$')
3242 out += set_charptr_emchar(out, c);
3246 return make_string(temp, out - temp);
3249 DEFUN("set-word-regexp", Fset_word_regexp, 1, 1, 0, /*
3250 Set the regexp to be used to match a word in regular-expression searching.
3251 #### Not yet implemented. Currently does nothing.
3252 #### Do not use this yet. Its calling interface is likely to change.
3259 /************************************************************************/
3260 /* initialization */
3261 /************************************************************************/
3263 void syms_of_search(void)
3266 DEFERROR_STANDARD(Qsearch_failed, Qinvalid_operation);
3267 DEFERROR_STANDARD(Qinvalid_regexp, Qsyntax_error);
3269 #ifdef EF_USE_COMPRE
3270 defsymbol(&Qcompre, "compre");
3271 defsymbol(&Qcomprep, "comprep");
3273 DEFSUBR(Fcompile_regexp);
3274 DEFSUBR(Fdefregexp);
3277 DEFSUBR(Flooking_at);
3278 DEFSUBR(Fposix_looking_at);
3279 DEFSUBR(Fstring_match);
3280 DEFSUBR(Fposix_string_match);
3281 DEFSUBR(Fskip_chars_forward);
3282 DEFSUBR(Fskip_chars_backward);
3283 DEFSUBR(Fskip_syntax_forward);
3284 DEFSUBR(Fskip_syntax_backward);
3285 DEFSUBR(Fsearch_forward);
3286 DEFSUBR(Fsearch_backward);
3287 DEFSUBR(Fword_search_forward);
3288 DEFSUBR(Fword_search_backward);
3289 DEFSUBR(Fre_search_forward);
3290 DEFSUBR(Fre_search_backward);
3291 DEFSUBR(Fposix_search_forward);
3292 DEFSUBR(Fposix_search_backward);
3293 DEFSUBR(Freplace_match);
3294 DEFSUBR(Fmatch_beginning);
3295 DEFSUBR(Fmatch_end);
3296 DEFSUBR(Fmatch_data);
3297 DEFSUBR(Fstore_match_data);
3298 DEFSUBR(Fregexp_quote);
3299 DEFSUBR(Fset_word_regexp);
3302 void reinit_vars_of_search(void)
3306 last_thing_searched = Qnil;
3307 staticpro_nodump(&last_thing_searched);
3309 for (i = 0; i < REGEXP_CACHE_SIZE; ++i) {
3310 searchbufs[i].buf.allocated = 100;
3311 searchbufs[i].buf.buffer = (unsigned char *)xmalloc_atomic(100);
3312 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3313 searchbufs[i].regexp = Qnil;
3314 staticpro_nodump(&searchbufs[i].regexp);
3315 searchbufs[i].next =
3316 (i == REGEXP_CACHE_SIZE - 1 ? 0 : &searchbufs[i + 1]);
3318 searchbuf_head = &searchbufs[0];
3321 void vars_of_search(void)
3323 reinit_vars_of_search();
3325 DEFVAR_LISP("forward-word-regexp", &Vforward_word_regexp /*
3326 *Regular expression to be used in `forward-word'.
3327 #### Not yet implemented.
3329 Vforward_word_regexp = Qnil;
3331 DEFVAR_LISP("backward-word-regexp", &Vbackward_word_regexp /*
3332 *Regular expression to be used in `backward-word'.
3333 #### Not yet implemented.
3335 Vbackward_word_regexp = Qnil;
3338 void complex_vars_of_search(void)
3340 Vskip_chars_range_table = Fmake_range_table();
3341 staticpro(&Vskip_chars_range_table);