1 /* String search routines for SXEmacs.
2 Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of SXEmacs
7 SXEmacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 SXEmacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 /* Synched up with: FSF 19.29, except for region-cache stuff. */
23 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */
25 /* This file has been Mule-ized except for the TRT stuff. */
31 #include "ui/insdel.h"
33 #ifdef REGION_CACHE_NEEDS_WORK
34 #include "region-cache.h"
38 #include <sys/types.h>
45 #define TRANSLATE(table, pos) \
46 (!NILP (table) ? TRT_TABLE_OF (table, (Emchar) pos) : pos)
49 /* Make sure these are ALWAYS powers of 2 */
50 #define REGEXP_CACHE_SIZE 0x80
51 #define REGEXP_CACHE_HASH_MASK (REGEXP_CACHE_SIZE-1)
52 #define REGEXP_FASTMAP_SIZE 0400
53 #define REGEXP_FASTMAP_MASK (REGEXP_FASTMAP_SIZE-1)
55 #define __REGEXP_DEBUG__(args...) fprintf(stderr, "REGEXP " args)
56 #ifndef REGEXP_DEBUG_FLAG
57 #define REGEXP_DEBUG(args...)
59 #define REGEXP_DEBUG(args...) __REGEXP_DEBUG__(args)
61 #define REGEXP_DEBUG_COMPRE(args...) REGEXP_DEBUG("[compre]: " args)
62 #define REGEXP_DEBUG_COMPRE_C(args...) REGEXP_DEBUG("[compre/cache]: " args)
63 #define REGEXP_DEBUG_COMPRE_H(args...) REGEXP_DEBUG("[compre/hash]: " args)
64 #define REGEXP_CRITICAL(args...) __REGEXP_DEBUG__("CRITICAL: " args)
67 /* If the regexp is non-nil, then the buffer contains the compiled form
68 of that regexp, suitable for searching. */
70 struct regexp_cache *next;
72 struct re_pattern_buffer buf;
73 char fastmap[REGEXP_FASTMAP_SIZE];
74 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
78 /* The instances of that struct. */
79 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
81 /* The head of the linked list; points to the most recently used buffer. */
82 static struct regexp_cache *searchbuf_head;
84 /* Every call to re_match, etc., must pass &search_regs as the regs
85 argument unless you can show it is unnecessary (i.e., if re_match
86 is certainly going to be called again before region-around-match
89 Since the registers are now dynamically allocated, we need to make
90 sure not to refer to the Nth register before checking that it has
91 been allocated by checking search_regs.num_regs.
93 The regex code keeps track of whether it has allocated the search
94 buffer using bits in the re_pattern_buffer. This means that whenever
95 you compile a new pattern, it completely forgets whether it has
96 allocated any registers, and will allocate new registers the next
97 time you call a searching or matching function. Therefore, we need
98 to call re_set_registers after compiling a new pattern or after
99 setting the match registers, so that the regex functions will be
100 able to free or re-allocate it properly. */
102 /* Note: things get trickier under Mule because the values returned from
103 the regexp routines are in Bytinds but we need them to be in Bufpos's.
104 We take the easy way out for the moment and just convert them immediately.
105 We could be more clever by not converting them until necessary, but
106 that gets real ugly real fast since the buffer might have changed and
107 the positions might be out of sync or out of range.
109 static struct re_registers search_regs;
111 /* The buffer in which the last search was performed, or
112 Qt if the last search was done in a string;
113 Qnil if no searching has been done yet. */
114 static Lisp_Object last_thing_searched;
116 /* error condition signalled when regexp compile_pattern fails */
118 Lisp_Object Qinvalid_regexp;
120 /* Regular expressions used in forward/backward-word */
121 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
123 /* range table for use with skip_chars. Only needed for Mule. */
124 Lisp_Object Vskip_chars_range_table;
126 static void set_search_regs(struct buffer *buf, Bufpos beg, Charcount len);
127 static void clear_unused_search_regs(struct re_registers *regp, int no_sub);
128 static void save_search_regs(void);
129 static Bufpos simple_search(struct buffer *buf, Bufbyte * base_pat,
130 Bytecount len, Bytind pos, Bytind lim,
131 EMACS_INT n, Lisp_Object trt);
132 static Bufpos boyer_moore(struct buffer *buf, Bufbyte * base_pat,
133 Bytecount len, Bytind pos, Bytind lim,
134 EMACS_INT n, Lisp_Object trt,
135 Lisp_Object inverse_trt, int charset_base);
136 static Bufpos search_buffer(struct buffer *buf, Lisp_Object str,
137 Bufpos bufpos, Bufpos buflim, EMACS_INT n, int RE,
138 Lisp_Object trt, Lisp_Object inverse_trt,
141 static void matcher_overflow(void)
143 error("Stack overflow in regexp matcher");
150 #define COMPRE_T struct re_pattern_buffer
151 #define COMPREP(obj) \
152 (DYNACATP(obj) && EQ(XDYNACAT_TYPE(obj), Qcompre))
153 #define COMPRE_GET(obj) (COMPRE_T*)XDYNACAT(obj)->ptr
154 #define COMPRE_PUT(obj, rec) do { \
155 obj = make_dynacat((void*)rec); \
156 set_dynacat_printer(obj, compre_prfun); \
157 set_dynacat_finaliser(obj, compre_finfun); \
158 XDYNACAT_TYPE(obj) = Qcompre; \
160 #define XCOMPRE_GET(obj) XSTRING(obj)->compre
161 #define XCOMPRE_PUT(obj, b) XSTRING(obj)->compre = b
162 #define XCOMPRE_REM(obj) XSTRING(obj)->compre = Qnil
164 /* idenitifier for hard-cached rexp */
165 Lisp_Object Qcompre, Qcomprep;
166 static Lisp_Object cache_regexp(Lisp_Object, struct re_pattern_buffer*);
167 static COMPRE_T* make_compre(void)
168 #if defined(__GNUC__)
169 __attribute__((unused))
172 static COMPRE_T* clone_compre(COMPRE_T*);
173 static inline void free_compre(COMPRE_T*);
176 /* Compile a regexp and signal a Lisp error if anything goes wrong.
177 PATTERN is the pattern to compile.
178 CP is the place to put the result.
179 TRANSLATE is a translation table for ignoring case, or NULL for none.
180 REGP is the structure that says where to store the "register"
181 values that will result from matching this pattern.
182 If it is 0, we should compile the pattern not to record any
183 subexpression bounds.
184 POSIX is nonzero if we want full backtracking (POSIX style)
185 for this pattern. 0 means backtrack only enough to get a valid match. */
188 compile_pattern_1(struct regexp_cache *cp, Lisp_Object pattern,
189 Lisp_Object translate, struct re_registers *regp, int posix,
196 cp->buf.translate = translate;
198 old = re_set_syntax(RE_SYNTAX_EMACS
199 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
201 re_compile_pattern((char *)XSTRING_DATA(pattern),
202 XSTRING_LENGTH(pattern), &cp->buf);
205 maybe_signal_error(Qinvalid_regexp, list1(build_string(val)),
210 cp->regexp = Fcopy_sequence(pattern);
215 /* extremely simple hash table, no collision management is performed */
216 static struct re_pattern_buffer *
217 try_pattern_from_cache(Lisp_Object pattern,
218 struct re_registers *regp,
219 Lisp_Object translate,
223 struct regexp_cache *cp;
227 pathash = internal_hash(pattern, 0);
228 place = (int)((pathash>>4) & REGEXP_CACHE_HASH_MASK);
229 cp = &(searchbufs[place]);
230 REGEXP_DEBUG_COMPRE_H("trying hash place: %d.\n", place);
232 if (!NILP(Fstring_equal(cp->regexp, pattern))
233 && EQ(cp->buf.translate, translate) && cp->posix == posix) {
235 REGEXP_DEBUG_COMPRE_H("match: %d.\n", place);
238 /* collision or empty slot, dont care just put the
239 * new regexp there */
240 if (!compile_pattern_1(
241 cp, pattern, translate, regp, posix, errb))
243 REGEXP_DEBUG_COMPRE_H("collision: %d.\n", place);
248 #else /* !EF_USE_COMPRE */
249 static struct re_pattern_buffer *
250 try_pattern_from_cache(Lisp_Object pattern,
251 struct re_registers *regp,
252 Lisp_Object translate,
256 struct regexp_cache *cp, **cpp;
258 for (cpp = &searchbuf_head;; cpp = &cp->next) {
260 if (!NILP(Fstring_equal(cp->regexp, pattern))
261 && EQ(cp->buf.translate, translate)
262 && cp->posix == posix)
265 /* If we're at the end of the cache, compile
266 * into the last cell. */
268 if (!compile_pattern_1(
269 cp, pattern, translate, regp, posix, errb))
275 /* When we get here, cp (aka *cpp) contains the compiled pattern,
276 either because we found it in the cache or because we just compiled it.
277 Move it to the front of the queue to mark it as most recently used. */
279 cp->next = searchbuf_head;
284 #endif /* EF_USE_COMPRE */
286 /* Compile a regexp if necessary, but first check to see if there's one in
288 PATTERN is the pattern to compile.
289 TRANSLATE is a translation table for ignoring case, or NULL for none.
290 REGP is the structure that says where to store the "register"
291 values that will result from matching this pattern.
292 If it is 0, we should compile the pattern not to record any
293 subexpression bounds.
294 POSIX is nonzero if we want full backtracking (POSIX style)
295 for this pattern. 0 means backtrack only enough to get a valid match. */
297 struct re_pattern_buffer *
298 compile_pattern(Lisp_Object pattern,
299 struct re_registers *regp,
300 Lisp_Object translate,
304 struct re_pattern_buffer *result = NULL;
307 Lisp_Object rc = XCOMPRE_GET(pattern);
309 if (!NILP(rc) && COMPREP(rc) &&
310 (result = COMPRE_GET(rc)) &&
311 result->re_ngroups >= 0 && result->re_ngroups < 256) {
312 REGEXP_DEBUG_COMPRE_C("using cache: 0x%x.\n",
313 (unsigned int)result);
317 result = try_pattern_from_cache(
318 pattern, regp, translate, posix, errb);
320 cache_regexp(pattern, result);
325 /* Advise the searching functions about the space we have allocated
326 for register data. */
328 re_set_registers(result, regp, regp->num_regs,
329 regp->start, regp->end);
336 compre_prfun(Lisp_Object obj, Lisp_Object pcfun, int escflag)
338 write_fmt_str(pcfun, "#<compiled regexp %lx",
339 (long unsigned int)((COMPRE_GET(obj))->buffer));
344 compre_finfun(Lisp_Object obj, int unused)
346 REGEXP_DEBUG_COMPRE_C("0x%lx@0x%lx will pass away\n",
347 (long unsigned int)COMPRE_GET(obj),
348 (long unsigned int)obj);
349 free_compre(COMPRE_GET(obj));
356 COMPRE_T *result = xnew_and_zero(COMPRE_T);
358 result->fastmap = xmalloc_atomic(REGEXP_FASTMAP_SIZE);
365 clone_compre(COMPRE_T *src)
367 COMPRE_T *result = xnew_and_zero(COMPRE_T);
369 /* alloc and clone fastmap */
370 result->fastmap = (char*)xmalloc_atomic(REGEXP_FASTMAP_SIZE);
371 memcpy(result->fastmap, src->fastmap, REGEXP_FASTMAP_SIZE);
373 /* alloc and clone buffer */
374 result->buffer = (unsigned char *)xmalloc_atomic(src->allocated);
375 memcpy(result->buffer, src->buffer, src->allocated);
376 result->allocated = src->allocated;
377 result->used = src->used;
379 result->syntax = src->syntax;
380 result->translate = src->translate;
381 result->re_nsub = src->re_nsub;
382 result->re_ngroups = src->re_ngroups;
384 result->can_be_null = src->can_be_null;
385 result->regs_allocated = src->regs_allocated;
386 result->fastmap_accurate = src->fastmap_accurate;
388 result->no_sub = src->no_sub;
389 result->not_bol = src->not_bol;
390 result->not_eol = src->not_eol;
391 result->newline_anchor = src->newline_anchor;
393 /* alloc and clone ext_to_int_register */
394 result->external_to_internal_register =
395 (int*)xmalloc_atomic(
396 sizeof(int) * src->external_to_internal_register_size);
397 memcpy(result->external_to_internal_register,
398 src->external_to_internal_register,
399 sizeof(int)*src->external_to_internal_register_size);
400 result->external_to_internal_register_size =
401 src->external_to_internal_register_size;
407 free_compre(COMPRE_T *buf)
417 if (buf->external_to_internal_register) {
418 xfree(buf->external_to_internal_register);
419 buf->external_to_internal_register = NULL;
425 cache_regexp(Lisp_Object regexp, COMPRE_T *buf)
433 resbuf = clone_compre(buf);
434 COMPRE_PUT(rc, resbuf);
435 XCOMPRE_PUT(regexp, rc);
437 REGEXP_DEBUG_COMPRE_C("caching 0x%08x into 0x%08x\n",
438 (unsigned int)resbuf, (unsigned int)rc);
442 DEFUN("compile-regexp", Fcompile_regexp, 1, 1, 0, /*
443 Forcibly compile REGEXP and store the result in object-plist.
447 CHECK_STRING(regexp);
450 compile_pattern(regexp, &search_regs, Qnil, 0, ERROR_ME);
455 DEFUN("defregexp", Fdefregexp, 2, UNEVALLED, 0, /*
456 \(defregexp SYMBOL REGEXP DOCSTRING\)
457 Like `defconst' but for forcing compiled regexps.
459 The same restrictions that apply to `defconst' apply here in regard
460 to user variables. You shouldn't use this for regular expressions
461 that a user might want to customise. Instead, use `defcustom' with
466 /* This function can GC */
467 Lisp_Object sym = XCAR(args);
468 Lisp_Object pat = Feval(XCAR(args = XCDR(args)));
474 pat = Fcompile_regexp(pat);
475 Fset_default(sym, pat);
478 if (!NILP(args = XCDR(args))) {
479 Lisp_Object doc = XCAR(args);
480 Fput(sym, Qvariable_documentation, doc);
481 if (!NILP(args = XCDR(args)))
482 error("too many arguments");
485 if (!NILP(Vfile_domain))
486 Fput(sym, Qvariable_domain, Vfile_domain);
489 LOADHIST_ATTACH(sym);
492 #endif /* EF_USE_COMPRE */
494 /* Error condition used for failing searches */
495 Lisp_Object Qsearch_failed;
497 static Lisp_Object signal_failure(Lisp_Object arg)
500 Fsignal(Qsearch_failed, list1(arg));
501 return Qnil; /* Not reached. */
504 /* Convert the search registers from Bytinds to Bufpos's. Needs to be
505 done after each regexp match that uses the search regs.
507 We could get a potential speedup by not converting the search registers
508 until it's really necessary, e.g. when match-data or replace-match is
509 called. However, this complexifies the code a lot (e.g. the buffer
510 could have changed and the Bytinds stored might be invalid) and is
511 probably not a great time-saver. */
513 static void fixup_search_regs_for_buffer(struct buffer *buf)
516 int num_regs = search_regs.num_regs;
518 for (i = 0; i < num_regs; i++) {
519 if (search_regs.start[i] >= 0)
520 search_regs.start[i] =
521 bytind_to_bufpos(buf, search_regs.start[i]);
522 if (search_regs.end[i] >= 0)
524 bytind_to_bufpos(buf, search_regs.end[i]);
528 /* Similar but for strings. */
529 static void fixup_search_regs_for_string(Lisp_Object string)
532 int num_regs = search_regs.num_regs;
534 /* #### bytecount_to_charcount() is not that efficient. This function
535 could be faster if it did its own conversion (using INC_CHARPTR()
536 and such), because the register ends are likely to be somewhat ordered.
537 (Even if not, you could sort them.)
539 Think about this if this function is a time hog, which it's probably
541 for (i = 0; i < num_regs; i++) {
542 if (search_regs.start[i] > 0) {
543 search_regs.start[i] =
544 bytecount_to_charcount(XSTRING_DATA(string),
545 search_regs.start[i]);
547 if (search_regs.end[i] > 0) {
549 bytecount_to_charcount(XSTRING_DATA(string),
556 looking_at_1(Lisp_Object string, struct buffer *buf, int posix)
558 /* This function has been Mule-ized, except for the trt table handling. */
563 struct re_pattern_buffer *bufp;
565 if (running_asynch_code)
568 CHECK_STRING(string);
569 bufp = compile_pattern(string, &search_regs,
570 (!NILP(buf->case_fold_search)
571 ? XCASE_TABLE_DOWNCASE(buf->case_table) : Qnil),
576 /* Get pointers and sizes of the two strings
577 that make up the visible portion of the buffer. */
579 p1 = BI_BUF_BEGV(buf);
580 p2 = BI_BUF_CEILING_OF(buf, p1);
582 s2 = BI_BUF_ZV(buf) - p2;
584 regex_match_object = Qnil;
585 regex_emacs_buffer = buf;
586 i = re_match_2(bufp, (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
587 s1, (char *)BI_BUF_BYTE_ADDRESS(buf, p2), s2,
588 BI_BUF_PT(buf) - BI_BUF_BEGV(buf), &search_regs,
589 BI_BUF_ZV(buf) - BI_BUF_BEGV(buf));
594 val = (0 <= i ? Qt : Qnil);
598 int num_regs = search_regs.num_regs;
599 for (i = 0; i < num_regs; i++)
600 if (search_regs.start[i] >= 0) {
601 search_regs.start[i] += BI_BUF_BEGV(buf);
602 search_regs.end[i] += BI_BUF_BEGV(buf);
605 XSETBUFFER(last_thing_searched, buf);
606 fixup_search_regs_for_buffer(buf);
610 DEFUN("looking-at", Flooking_at, 1, 2, 0, /*
611 Return t if text after point matches regular expression REGEXP.
612 This function modifies the match data that `match-beginning',
613 `match-end' and `match-data' access; save and restore the match
614 data if you want to preserve them.
616 Optional argument BUFFER defaults to the current buffer.
620 return looking_at_1(regexp, decode_buffer(buffer, 0), 0);
623 DEFUN("posix-looking-at", Fposix_looking_at, 1, 2, 0, /*
624 Return t if text after point matches regular expression REGEXP.
625 Find the longest match, in accord with Posix regular expression rules.
626 This function modifies the match data that `match-beginning',
627 `match-end' and `match-data' access; save and restore the match
628 data if you want to preserve them.
630 Optional argument BUFFER defaults to the current buffer.
634 return looking_at_1(regexp, decode_buffer(buffer, 0), 1);
638 string_match_1(Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
639 struct buffer *buf, int posix)
641 /* This function has been Mule-ized, except for the trt table handling. */
644 struct re_pattern_buffer *bufp;
646 if (running_asynch_code)
649 CHECK_STRING(regexp);
650 CHECK_STRING(string);
655 Charcount len = XSTRING_CHAR_LENGTH(string);
659 if (s < 0 && -s <= len)
661 else if (0 > s || s > len)
662 args_out_of_range(string, start);
665 bufp = compile_pattern(regexp, &search_regs,
666 (!NILP(buf->case_fold_search)
667 ? XCASE_TABLE_DOWNCASE(buf->case_table) : Qnil),
671 Bytecount bis = charcount_to_bytecount(XSTRING_DATA(string), s);
672 regex_match_object = string;
673 regex_emacs_buffer = buf;
674 val = re_search(bufp, (char *)XSTRING_DATA(string),
675 XSTRING_LENGTH(string), bis,
676 XSTRING_LENGTH(string) - bis, &search_regs);
682 last_thing_searched = Qt;
683 fixup_search_regs_for_string(string);
684 return make_int(bytecount_to_charcount(XSTRING_DATA(string), val));
687 DEFUN("string-match", Fstring_match, 2, 4, 0, /*
688 Return index of start of first match for REGEXP in STRING, or nil.
689 If third arg START is non-nil, start search at that index in STRING.
690 For index of first char beyond the match, do (match-end 0).
691 `match-end' and `match-beginning' also give indices of substrings
692 matched by parenthesis constructs in the pattern.
694 Optional arg BUFFER controls how case folding is done (according to
695 the value of `case-fold-search' in that buffer and that buffer's case
696 tables) and defaults to the current buffer.
698 (regexp, string, start, buffer))
700 return string_match_1(regexp, string, start, decode_buffer(buffer, 0),
704 DEFUN("posix-string-match", Fposix_string_match, 2, 4, 0, /*
705 Return index of start of first match for REGEXP in STRING, or nil.
706 Find the longest match, in accord with Posix regular expression rules.
707 If third arg START is non-nil, start search at that index in STRING.
708 For index of first char beyond the match, do (match-end 0).
709 `match-end' and `match-beginning' also give indices of substrings
710 matched by parenthesis constructs in the pattern.
712 Optional arg BUFFER controls how case folding is done (according to
713 the value of `case-fold-search' in that buffer and that buffer's case
714 tables) and defaults to the current buffer.
716 (regexp, string, start, buffer))
718 return string_match_1(regexp, string, start, decode_buffer(buffer, 0),
722 /* Match REGEXP against STRING, searching all of STRING,
723 and return the index of the match, or negative on failure.
724 This does not clobber the match data. */
727 fast_string_match(Lisp_Object regexp, const Bufbyte * nonreloc,
728 Lisp_Object reloc, Bytecount offset,
729 Bytecount length, int case_fold_search,
730 Error_behavior errb, int no_quit)
732 /* This function has been Mule-ized, except for the trt table handling. */
734 const Bufbyte *newnonreloc = (const Bufbyte*)nonreloc;
735 struct re_pattern_buffer *bufp;
737 bufp = compile_pattern(regexp, 0,
739 ? XCASE_TABLE_DOWNCASE(current_buffer->
743 return -1; /* will only do this when errb != ERROR_ME */
747 no_quit_in_re_search = 1;
749 fixup_internal_substring(nonreloc, reloc, offset, &length);
755 newnonreloc = XSTRING_DATA(reloc);
757 /* QUIT could relocate RELOC. Therefore we must
758 alloca() and copy. No way around this except some
759 serious rewriting of re_search(). */
760 /* yeah, let's rewrite this bugger, the warning
761 hereafter is inevitable too */
764 fixup_internal_substring should
765 have updated length, if it didn't
766 return with failure...
769 newnonreloc = alloca(length);
770 memcpy((void*)newnonreloc, (void*)XSTRING_DATA(reloc), length);
774 /* #### evil current-buffer dependency */
775 regex_match_object = reloc;
776 regex_emacs_buffer = current_buffer;
777 val = re_search(bufp, (const char*)newnonreloc + offset, length, 0,
780 no_quit_in_re_search = 0;
784 Bytecount fast_lisp_string_match(Lisp_Object regex, Lisp_Object string)
786 return fast_string_match(regex, 0, string, 0, -1, 0, ERROR_ME, 0);
789 #ifdef REGION_CACHE_NEEDS_WORK
790 /* The newline cache: remembering which sections of text have no newlines. */
792 /* If the user has requested newline caching, make sure it's on.
793 Otherwise, make sure it's off.
794 This is our cheezy way of associating an action with the change of
795 state of a buffer-local variable. */
796 static void newline_cache_on_off(struct buffer *buf)
798 if (NILP(buf->cache_long_line_scans)) {
799 /* It should be off. */
800 if (buf->newline_cache) {
801 free_region_cache(buf->newline_cache);
802 buf->newline_cache = 0;
805 /* It should be on. */
806 if (buf->newline_cache == 0)
807 buf->newline_cache = new_region_cache();
812 /* Search in BUF for COUNT instances of the character TARGET between
815 If COUNT is positive, search forwards; END must be >= START.
816 If COUNT is negative, search backwards for the -COUNTth instance;
817 END must be <= START.
818 If COUNT is zero, do anything you please; run rogue, for all I care.
820 If END is zero, use BEGV or ZV instead, as appropriate for the
821 direction indicated by COUNT.
823 If we find COUNT instances, set *SHORTAGE to zero, and return the
824 position after the COUNTth match. Note that for reverse motion
825 this is not the same as the usual convention for Emacs motion commands.
827 If we don't find COUNT instances before reaching END, set *SHORTAGE
828 to the number of TARGETs left unfound, and return END.
830 If ALLOW_QUIT is non-zero, call QUIT periodically. */
833 bi_scan_buffer(struct buffer *buf, Emchar target, Bytind st, Bytind en,
834 EMACS_INT count, EMACS_INT * shortage, int allow_quit)
836 /* This function has been Mule-ized. */
837 Bytind lim = en > 0 ? en :
838 ((count > 0) ? BI_BUF_ZV(buf) : BI_BUF_BEGV(buf));
840 /* #### newline cache stuff in this function not yet ported */
849 /* Due to the Mule representation of characters in a buffer,
850 we can simply search for characters in the range 0 - 127
851 directly. For other characters, we do it the "hard" way.
852 Note that this way works for all characters but the other
854 if (target >= 0200) {
855 while (st < lim && count > 0) {
856 if (BI_BUF_FETCH_CHAR(buf, st) == target)
863 while (st < lim && count > 0) {
867 _ceil_ = BI_BUF_CEILING_OF(buf, st);
868 _ceil_ = min(lim, _ceil_);
871 memchr(BI_BUF_BYTE_ADDRESS(buf, st),
872 (int)target, _ceil_ - st);
875 st = BI_BUF_PTR_BYTE_POS(buf,
889 if (target >= 0200) {
890 while (st > lim && count < 0) {
892 if (BI_BUF_FETCH_CHAR(buf, st) == target)
898 while (st > lim && count < 0) {
903 _floor_ = BI_BUF_FLOOR_OF(buf, st);
904 _floor_ = max(lim, _floor_);
905 /* No memrchr() ... */
906 bufptr = BI_BUF_BYTE_ADDRESS_BEFORE(buf, st);
907 floorptr = BI_BUF_BYTE_ADDRESS(buf, _floor_);
908 while (bufptr >= floorptr) {
910 /* At this point, both ST and BUFPTR
911 refer to the same character. When
912 the loop terminates, ST will always
913 point to the last character we
915 if (*(unsigned char *)bufptr ==
916 (unsigned char)target) {
932 /* We found the character we were looking for; we have to return
933 the position *after* it due to the strange way that the return
942 scan_buffer(struct buffer * buf, Emchar target, Bufpos start, Bufpos end,
943 EMACS_INT count, EMACS_INT * shortage, int allow_quit)
946 Bytind bi_start, bi_end;
948 bi_start = bufpos_to_bytind(buf, start);
950 bi_end = bufpos_to_bytind(buf, end);
953 bi_retval = bi_scan_buffer(buf, target, bi_start, bi_end, count,
954 shortage, allow_quit);
955 return bytind_to_bufpos(buf, bi_retval);
958 Bytind bi_find_next_newline_no_quit(struct buffer * buf, Bytind from, int count)
960 return bi_scan_buffer(buf, '\n', from, 0, count, 0, 0);
963 Bufpos find_next_newline_no_quit(struct buffer * buf, Bufpos from, int count)
965 return scan_buffer(buf, '\n', from, 0, count, 0, 0);
968 Bufpos find_next_newline(struct buffer * buf, Bufpos from, int count)
970 return scan_buffer(buf, '\n', from, 0, count, 0, 1);
974 bi_find_next_emchar_in_string(Lisp_String * str, Emchar target, Bytind st,
977 /* This function has been Mule-ized. */
978 Bytind lim = string_length(str) - 1;
979 Bufbyte *s = string_data(str);
984 /* Due to the Mule representation of characters in a buffer,
985 we can simply search for characters in the range 0 - 127
986 directly. For other characters, we do it the "hard" way.
987 Note that this way works for all characters but the other
989 if (target >= 0200) {
990 while (st < lim && count > 0) {
991 if (string_char(str, st) == target)
993 INC_CHARBYTIND(s, st);
998 while (st < lim && count > 0) {
1000 (Bufbyte *) memchr(charptr_n_addr(s, st),
1001 (int)target, lim - st);
1004 st = (Bytind) (bufptr - s) + 1;
1012 /* Like find_next_newline, but returns position before the newline,
1013 not after, and only search up to TO. This isn't just
1014 find_next_newline (...)-1, because you might hit TO. */
1016 find_before_next_newline(struct buffer * buf, Bufpos from, Bufpos to, int count)
1019 Bufpos pos = scan_buffer(buf, '\n', from, to, count, &shortage, 1);
1027 /* This function synched with FSF 21.1 */
1029 skip_chars(struct buffer *buf, int forwardp, int syntaxp,
1030 Lisp_Object string, Lisp_Object lim)
1032 /* This function has been Mule-ized. */
1033 REGISTER Bufbyte *p, *pend;
1035 /* We store the first 256 chars in an array here and the rest in
1037 unsigned char fastmap[REGEXP_FASTMAP_SIZE];
1041 Lisp_Char_Table *syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
1046 limit = forwardp ? BUF_ZV(buf) : BUF_BEGV(buf);
1048 CHECK_INT_COERCE_MARKER(lim);
1051 /* In any case, don't allow scan outside bounds of buffer. */
1052 if (limit > BUF_ZV(buf))
1053 limit = BUF_ZV(buf);
1054 if (limit < BUF_BEGV(buf))
1055 limit = BUF_BEGV(buf);
1058 CHECK_STRING(string);
1059 p = XSTRING_DATA(string);
1060 pend = p + XSTRING_LENGTH(string);
1061 memset(fastmap, 0, sizeof(fastmap));
1063 Fclear_range_table(Vskip_chars_range_table);
1065 if (p != pend && *p == '^') {
1070 /* Find the characters specified and set their elements of fastmap.
1071 If syntaxp, each character counts as itself.
1072 Otherwise, handle backslashes and ranges specially */
1075 c = charptr_emchar(p);
1078 if (c < REGEXP_FASTMAP_SIZE
1079 && syntax_spec_code[c] < (unsigned char)Smax)
1082 signal_simple_error("Invalid syntax designator",
1088 c = charptr_emchar(p);
1091 if (p != pend && *p == '-') {
1094 /* Skip over the dash. */
1098 cend = charptr_emchar(p);
1099 while (c <= cend && c < REGEXP_FASTMAP_SIZE) {
1104 Fput_range_table(make_int(c),
1106 Vskip_chars_range_table);
1109 if (c < REGEXP_FASTMAP_SIZE)
1112 Fput_range_table(make_int(c),
1114 Vskip_chars_range_table);
1119 /* #### Not in FSF 21.1 */
1120 if (syntaxp && fastmap['-'] != 0)
1123 /* If ^ was the first character, complement the fastmap.
1124 We don't complement the range table, however; we just use negate
1125 in the comparisons below. */
1128 for (i = 0; i < (int)(sizeof fastmap); i++)
1132 Bufpos start_point = BUF_PT(buf);
1133 Bufpos pos = start_point;
1134 Bytind pos_byte = BI_BUF_PT(buf);
1137 SETUP_SYNTAX_CACHE_FOR_BUFFER(buf, pos,
1139 /* All syntax designators are normal chars so nothing strange
1143 while (fastmap[(unsigned char)
1145 [(int)SYNTAX_FROM_CACHE
1147 BI_BUF_FETCH_CHAR(buf,
1151 INC_BYTIND(buf, pos_byte);
1154 UPDATE_SYNTAX_CACHE_FORWARD
1158 while (pos > limit) {
1159 Bufpos savepos = pos_byte;
1161 DEC_BYTIND(buf, pos_byte);
1162 UPDATE_SYNTAX_CACHE_BACKWARD(pos);
1163 if (!fastmap[(unsigned char)
1165 [(int)SYNTAX_FROM_CACHE
1167 BI_BUF_FETCH_CHAR(buf,
1178 while (pos < limit) {
1180 BI_BUF_FETCH_CHAR(buf, pos_byte);
1182 REGEXP_FASTMAP_SIZE) ? fastmap[ch]
1186 Vskip_chars_range_table, Qnil))
1189 INC_BYTIND(buf, pos_byte);
1194 while (pos > limit) {
1195 Bufpos prev_pos_byte = pos_byte;
1198 DEC_BYTIND(buf, prev_pos_byte);
1199 ch = BI_BUF_FETCH_CHAR(buf,
1202 REGEXP_FASTMAP_SIZE) ? fastmap[ch]
1206 Vskip_chars_range_table, Qnil))
1209 pos_byte = prev_pos_byte;
1216 BOTH_BUF_SET_PT(buf, pos, pos_byte);
1217 return make_int(BUF_PT(buf) - start_point);
1221 DEFUN("skip-chars-forward", Fskip_chars_forward, 1, 3, 0, /*
1222 Move point forward, stopping before a char not in STRING, or at pos LIMIT.
1223 STRING is like the inside of a `[...]' in a regular expression
1224 except that `]' is never special and `\\' quotes `^', `-' or `\\'.
1225 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1226 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1227 Returns the distance traveled, either zero or positive.
1229 Optional argument BUFFER defaults to the current buffer.
1231 (string, limit, buffer))
1233 return skip_chars(decode_buffer(buffer, 0), 1, 0, string, limit);
1236 DEFUN("skip-chars-backward", Fskip_chars_backward, 1, 3, 0, /*
1237 Move point backward, stopping after a char not in STRING, or at pos LIMIT.
1238 See `skip-chars-forward' for details.
1239 Returns the distance traveled, either zero or negative.
1241 Optional argument BUFFER defaults to the current buffer.
1243 (string, limit, buffer))
1245 return skip_chars(decode_buffer(buffer, 0), 0, 0, string, limit);
1248 DEFUN("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0, /*
1249 Move point forward across chars in specified syntax classes.
1250 SYNTAX is a string of syntax code characters.
1251 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT.
1252 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1253 This function returns the distance traveled, either zero or positive.
1255 Optional argument BUFFER defaults to the current buffer.
1257 (syntax, limit, buffer))
1259 return skip_chars(decode_buffer(buffer, 0), 1, 1, syntax, limit);
1262 DEFUN("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0, /*
1263 Move point backward across chars in specified syntax classes.
1264 SYNTAX is a string of syntax code characters.
1265 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT.
1266 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1267 This function returns the distance traveled, either zero or negative.
1269 Optional argument BUFFER defaults to the current buffer.
1271 (syntax, limit, buffer))
1273 return skip_chars(decode_buffer(buffer, 0), 0, 1, syntax, limit);
1276 /* Subroutines of Lisp buffer search functions. */
1279 search_command(Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
1280 Lisp_Object count, Lisp_Object buffer, int direction,
1283 /* This function has been Mule-ized, except for the trt table handling. */
1286 EMACS_INT n = direction;
1294 buf = decode_buffer(buffer, 0);
1295 CHECK_STRING(string);
1297 lim = n > 0 ? BUF_ZV(buf) : BUF_BEGV(buf);
1299 CHECK_INT_COERCE_MARKER(limit);
1301 if (n > 0 ? lim < BUF_PT(buf) : lim > BUF_PT(buf))
1302 error("Invalid search limit (wrong side of point)");
1303 if (lim > BUF_ZV(buf))
1305 if (lim < BUF_BEGV(buf))
1306 lim = BUF_BEGV(buf);
1309 np = search_buffer(buf, string, BUF_PT(buf), lim, n, RE,
1310 (!NILP(buf->case_fold_search)
1311 ? XCASE_TABLE_CANON(buf->case_table)
1312 : Qnil), (!NILP(buf->case_fold_search)
1313 ? XCASE_TABLE_EQV(buf->case_table)
1318 return signal_failure(string);
1319 if (!EQ(noerror, Qt)) {
1320 if (lim < BUF_BEGV(buf) || lim > BUF_ZV(buf))
1322 BUF_SET_PT(buf, lim);
1324 #if 0 /* This would be clean, but maybe programs depend on
1325 a value of nil here. */
1332 if (np < BUF_BEGV(buf) || np > BUF_ZV(buf))
1335 BUF_SET_PT(buf, np);
1337 return make_int(np);
1340 static int trivial_regexp_p(Lisp_Object regexp)
1342 /* This function has been Mule-ized. */
1343 Bytecount len = XSTRING_LENGTH(regexp);
1344 Bufbyte *s = XSTRING_DATA(regexp);
1345 while (--len >= 0) {
1347 /* ']' doesn't appear here because it's only special after ] */
1377 /* 97/2/25 jhod Added for category matches */
1401 /* Search for the n'th occurrence of STRING in BUF,
1402 starting at position BUFPOS and stopping at position BUFLIM,
1403 treating PAT as a literal string if RE is false or as
1404 a regular expression if RE is true.
1406 If N is positive, searching is forward and BUFLIM must be greater
1408 If N is negative, searching is backward and BUFLIM must be less
1411 Returns -x if only N-x occurrences found (x > 0),
1412 or else the position at the beginning of the Nth occurrence
1413 (if searching backward) or the end (if searching forward).
1415 POSIX is nonzero if we want full backtracking (POSIX style)
1416 for this pattern. 0 means backtrack only enough to get a valid match. */
1418 search_buffer(struct buffer *buf, Lisp_Object string, Bufpos bufpos,
1419 Bufpos buflim, EMACS_INT n, int RE, Lisp_Object trt,
1420 Lisp_Object inverse_trt, int posix)
1422 /* This function has been Mule-ized, except for the trt table handling. */
1423 Bytecount len = XSTRING_LENGTH(string);
1424 Bufbyte *base_pat = XSTRING_DATA(string);
1425 REGISTER EMACS_INT i, j;
1430 if (running_asynch_code)
1433 /* Null string is found at starting position. */
1435 set_search_regs(buf, bufpos, 0);
1436 clear_unused_search_regs(&search_regs, 0);
1440 /* Searching 0 times means noop---don't move, don't touch registers. */
1444 pos = bufpos_to_bytind(buf, bufpos);
1445 lim = bufpos_to_bytind(buf, buflim);
1446 if (RE && !trivial_regexp_p(string)) {
1447 struct re_pattern_buffer *bufp;
1449 bufp = compile_pattern(string, &search_regs, trt, posix,
1452 /* Get pointers and sizes of the two strings
1453 that make up the visible portion of the buffer. */
1455 p1 = BI_BUF_BEGV(buf);
1456 p2 = BI_BUF_CEILING_OF(buf, p1);
1458 s2 = BI_BUF_ZV(buf) - p2;
1459 regex_match_object = Qnil;
1464 regex_emacs_buffer = buf;
1465 val = re_search_2(bufp,
1466 (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
1467 s1, (char *)BI_BUF_BYTE_ADDRESS(buf,
1469 s2, pos - BI_BUF_BEGV(buf), lim - pos,
1470 &search_regs, pos - BI_BUF_BEGV(buf));
1476 int num_regs = search_regs.num_regs;
1477 j = BI_BUF_BEGV(buf);
1478 for (i = 0; i < num_regs; i++)
1479 if (search_regs.start[i] >= 0) {
1480 search_regs.start[i] += j;
1481 search_regs.end[i] += j;
1483 /* re_match (called from re_search et al) does this for us */
1484 /* clear_unused_search_regs (search_regs, bufp->no_sub); */
1485 XSETBUFFER(last_thing_searched, buf);
1486 /* Set pos to the new position. */
1487 pos = search_regs.start[0];
1488 fixup_search_regs_for_buffer(buf);
1489 /* And bufpos too. */
1490 bufpos = search_regs.start[0];
1499 regex_emacs_buffer = buf;
1500 val = re_search_2(bufp,
1501 (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
1502 s1, (char *)BI_BUF_BYTE_ADDRESS(buf,
1504 s2, pos - BI_BUF_BEGV(buf), lim - pos,
1505 &search_regs, lim - BI_BUF_BEGV(buf));
1510 int num_regs = search_regs.num_regs;
1511 j = BI_BUF_BEGV(buf);
1512 for (i = 0; i < num_regs; i++)
1513 if (search_regs.start[i] >= 0) {
1514 search_regs.start[i] += j;
1515 search_regs.end[i] += j;
1517 /* re_match (called from re_search et al) does this for us */
1518 /* clear_unused_search_regs (search_regs, bufp->no_sub); */
1519 XSETBUFFER(last_thing_searched, buf);
1520 /* Set pos to the new position. */
1521 pos = search_regs.end[0];
1522 fixup_search_regs_for_buffer(buf);
1523 /* And bufpos too. */
1524 bufpos = search_regs.end[0];
1531 } else { /* non-RE case */
1533 int charset_base = -1;
1534 int boyer_moore_ok = 1;
1536 Bufbyte *patbuf = alloca_array(Bufbyte, len * MAX_EMCHAR_LEN);
1540 Bufbyte tmp_str[MAX_EMCHAR_LEN];
1541 Emchar c, translated, inverse;
1542 Bytecount orig_bytelen, new_bytelen, inv_bytelen;
1544 /* If we got here and the RE flag is set, it's because
1545 we're dealing with a regexp known to be trivial, so the
1546 backslash just quotes the next character. */
1547 if (RE && *base_pat == '\\') {
1551 c = charptr_emchar(base_pat);
1552 translated = TRANSLATE(trt, c);
1553 inverse = TRANSLATE(inverse_trt, c);
1555 orig_bytelen = charcount_to_bytecount(base_pat, 1);
1556 inv_bytelen = set_charptr_emchar(tmp_str, inverse);
1557 new_bytelen = set_charptr_emchar(tmp_str, translated);
1559 if (new_bytelen != orig_bytelen
1560 || inv_bytelen != orig_bytelen)
1562 if (translated != c || inverse != c) {
1563 /* Keep track of which character set row
1564 contains the characters that need translation. */
1565 int charset_base_code = c & ~CHAR_FIELD3_MASK;
1566 if (charset_base == -1)
1567 charset_base = charset_base_code;
1568 else if (charset_base != charset_base_code)
1569 /* If two different rows appear, needing translation,
1570 then we cannot use boyer_moore search. */
1573 memcpy(pat, tmp_str, new_bytelen);
1575 base_pat += orig_bytelen;
1576 len -= orig_bytelen;
1578 #else /* not MULE */
1579 while (--len >= 0) {
1580 /* If we got here and the RE flag is set, it's because
1581 we're dealing with a regexp known to be trivial, so the
1582 backslash just quotes the next character. */
1583 if (RE && *base_pat == '\\') {
1587 *pat++ = TRANSLATE(trt, *base_pat++);
1591 pat = base_pat = patbuf;
1593 return boyer_moore(buf, base_pat, len, pos, lim, n,
1594 trt, inverse_trt, charset_base);
1596 return simple_search(buf, base_pat, len, pos, lim, n,
1601 /* Do a simple string search N times for the string PAT,
1602 whose length is LEN/LEN_BYTE,
1603 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1604 TRT is the translation table.
1606 Return the character position where the match is found.
1607 Otherwise, if M matches remained to be found, return -M.
1609 This kind of search works regardless of what is in PAT and
1610 regardless of what is in TRT. It is used in cases where
1611 boyer_moore cannot work. */
1614 simple_search(struct buffer *buf, Bufbyte * base_pat, Bytecount len_byte,
1615 Bytind idx, Bytind lim, EMACS_INT n, Lisp_Object trt)
1617 int forward = n > 0;
1618 Bytecount buf_len = 0; /* Shut up compiler. */
1623 Bytecount this_len = len_byte;
1624 Bytind this_idx = idx;
1625 const Bufbyte *p = base_pat;
1629 while (this_len > 0) {
1630 Emchar pat_ch, buf_ch;
1633 pat_ch = charptr_emchar(p);
1635 BI_BUF_FETCH_CHAR(buf, this_idx);
1637 buf_ch = TRANSLATE(trt, buf_ch);
1639 if (buf_ch != pat_ch)
1642 pat_len = charcount_to_bytecount(p, 1);
1644 this_len -= pat_len;
1645 INC_BYTIND(buf, this_idx);
1647 if (this_len == 0) {
1648 buf_len = this_idx - idx;
1652 INC_BYTIND(buf, idx);
1659 Bytecount this_len = len_byte;
1660 Bytind this_idx = idx;
1661 const Bufbyte *p = base_pat + len_byte;
1667 while (this_len > 0) {
1668 Emchar pat_ch, buf_ch;
1671 DEC_BYTIND(buf, this_idx);
1672 pat_ch = charptr_emchar(p);
1674 BI_BUF_FETCH_CHAR(buf, this_idx);
1676 buf_ch = TRANSLATE(trt, buf_ch);
1678 if (buf_ch != pat_ch)
1682 charcount_to_bytecount(p, 1);
1684 if (this_len == 0) {
1685 buf_len = idx - this_idx;
1689 DEC_BYTIND(buf, idx);
1696 Bufpos beg, end, retval;
1698 beg = bytind_to_bufpos(buf, idx - buf_len);
1699 retval = end = bytind_to_bufpos(buf, idx);
1701 retval = beg = bytind_to_bufpos(buf, idx);
1702 end = bytind_to_bufpos(buf, idx + buf_len);
1704 set_search_regs(buf, beg, end - beg);
1705 clear_unused_search_regs(&search_regs, 0);
1715 /* Do Boyer-Moore search N times for the string PAT,
1716 whose length is LEN/LEN_BYTE,
1717 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1718 DIRECTION says which direction we search in.
1719 TRT and INVERSE_TRT are translation tables.
1721 This kind of search works if all the characters in PAT that have
1722 nontrivial translation are the same aside from the last byte. This
1723 makes it possible to translate just the last byte of a character,
1724 and do so after just a simple test of the context.
1726 If that criterion is not satisfied, do not call this function. */
1729 boyer_moore(struct buffer *buf, Bufbyte * base_pat, Bytecount len,
1730 Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
1731 Lisp_Object inverse_trt, int charset_base)
1733 /* #### Someone really really really needs to comment the workings
1734 of this junk somewhat better.
1736 BTW "BM" stands for Boyer-Moore, which is one of the standard
1737 string-searching algorithms. It's the best string-searching
1738 algorithm out there, provided that:
1740 a) You're not fazed by algorithm complexity. (Rabin-Karp, which
1741 uses hashing, is much much easier to code but not as fast.)
1742 b) You can freely move backwards in the string that you're
1745 As the comment below tries to explain (but garbles in typical
1746 programmer-ese), the idea is that you don't have to do a
1747 string match at every successive position in the text. For
1748 example, let's say the pattern is "a very long string". We
1749 compare the last character in the string (`g') with the
1750 corresponding character in the text. If it mismatches, and
1751 it is, say, `z', then we can skip forward by the entire
1752 length of the pattern because `z' does not occur anywhere
1753 in the pattern. If the mismatching character does occur
1754 in the pattern, we can usually still skip forward by more
1755 than one: e.g. if it is `l', then we can skip forward
1756 by the length of the substring "ong string" -- i.e. the
1757 largest end section of the pattern that does not contain
1758 the mismatched character. So what we do is compute, for
1759 each possible character, the distance we can skip forward
1760 (the "stride") and use it in the string matching. This
1761 is what the BM_tab holds. */
1762 REGISTER EMACS_INT *BM_tab;
1763 EMACS_INT *BM_tab_base;
1764 REGISTER Bytecount dirlen;
1767 Bytecount stride_for_teases = 0;
1768 REGISTER EMACS_INT i, j;
1769 Bufbyte *pat, *pat_end;
1770 REGISTER Bufbyte *cursor, *p_limit, *ptr2;
1771 Bufbyte simple_translate[REGEXP_FASTMAP_SIZE];
1772 REGISTER int direction = ((n > 0) ? 1 : -1);
1774 Bufbyte translate_prev_byte = 0;
1775 Bufbyte translate_anteprev_byte = 0;
1778 EMACS_INT BM_tab_space[REGEXP_FASTMAP_SIZE];
1779 BM_tab = &BM_tab_space[0];
1781 BM_tab = alloca_array(EMACS_INT, 256);
1784 /* The general approach is that we are going to maintain that we
1785 know the first (closest to the present position, in whatever
1786 direction we're searching) character that could possibly be
1787 the last (furthest from present position) character of a
1788 valid match. We advance the state of our knowledge by
1789 looking at that character and seeing whether it indeed
1790 matches the last character of the pattern. If it does, we
1791 take a closer look. If it does not, we move our pointer (to
1792 putative last characters) as far as is logically possible.
1793 This amount of movement, which I call a stride, will be the
1794 length of the pattern if the actual character appears nowhere
1795 in the pattern, otherwise it will be the distance from the
1796 last occurrence of that character to the end of the pattern.
1797 As a coding trick, an enormous stride is coded into the table
1798 for characters that match the last character. This allows
1799 use of only a single test, a test for having gone past the
1800 end of the permissible match region, to test for both
1801 possible matches (when the stride goes past the end
1802 immediately) and failure to match (where you get nudged past
1803 the end one stride at a time).
1805 Here we make a "mickey mouse" BM table. The stride of the
1806 search is determined only by the last character of the
1807 putative match. If that character does not match, we will
1808 stride the proper distance to propose a match that
1809 superimposes it on the last instance of a character that
1810 matches it (per trt), or misses it entirely if there is
1813 dirlen = len * direction;
1814 infinity = dirlen - (lim + pos + len + len) * direction;
1815 /* Record position after the end of the pattern. */
1816 pat_end = base_pat + len;
1818 base_pat = pat_end - 1;
1819 BM_tab_base = BM_tab;
1820 BM_tab += REGEXP_FASTMAP_SIZE;
1821 j = dirlen; /* to get it in a register */
1822 /* A character that does not appear in the pattern induces a
1823 stride equal to the pattern length. */
1824 while (BM_tab_base != BM_tab) {
1830 /* We use this for translation, instead of TRT itself. We
1831 fill this in to handle the characters that actually occur
1832 in the pattern. Others don't matter anyway! */
1833 xzero(simple_translate);
1834 for (i = 0; i < REGEXP_FASTMAP_SIZE; i++)
1835 simple_translate[i] = (Bufbyte) i;
1837 while (i != infinity) {
1838 Bufbyte *ptr = base_pat + i;
1844 Emchar ch, untranslated;
1845 int this_translated = 1;
1847 /* Is *PTR the last byte of a character? */
1848 if (pat_end - ptr == 1 || BUFBYTE_FIRST_BYTE_P(ptr[1])) {
1849 Bufbyte *charstart = ptr;
1850 while (!BUFBYTE_FIRST_BYTE_P(*charstart))
1852 untranslated = charptr_emchar(charstart);
1854 (untranslated & ~CHAR_FIELD3_MASK)) {
1855 ch = TRANSLATE(trt, untranslated);
1856 if (!BUFBYTE_FIRST_BYTE_P(*ptr)) {
1857 translate_prev_byte = ptr[-1];
1858 if (!BUFBYTE_FIRST_BYTE_P
1859 (translate_prev_byte))
1860 translate_anteprev_byte
1864 this_translated = 0;
1869 this_translated = 0;
1871 if (ch > REGEXP_FASTMAP_SIZE)
1872 j = ((unsigned char)(ch & REGEXP_FASTMAP_SIZE)| 0200);
1874 j = (unsigned char)(ch & REGEXP_FASTMAP_SIZE);
1877 stride_for_teases = BM_tab[j];
1878 BM_tab[j] = dirlen - i;
1879 /* A translation table is accompanied by its inverse --
1880 see comment following downcase_table for details */
1881 if (this_translated) {
1882 Emchar starting_ch = ch;
1883 EMACS_INT starting_j = j;
1885 ch = TRANSLATE(inverse_trt, ch);
1886 if (ch > REGEXP_FASTMAP_SIZE)
1887 j = ((unsigned char)(ch & REGEXP_FASTMAP_SIZE) | 0200);
1889 j = (unsigned char)(ch & REGEXP_FASTMAP_SIZE);
1891 /* For all the characters that map into CH,
1892 set up simple_translate to map the last byte
1894 simple_translate[j] = starting_j;
1895 if (ch == starting_ch)
1897 BM_tab[j] = dirlen - i;
1903 k = (j = TRANSLATE(trt, j));
1905 stride_for_teases = BM_tab[j];
1906 BM_tab[j] = dirlen - i;
1907 /* A translation table is accompanied by its inverse --
1908 see comment following downcase_table for details */
1910 while ((j = TRANSLATE(inverse_trt, j)) != k) {
1911 simple_translate[j] = (Bufbyte) k;
1912 BM_tab[j] = dirlen - i;
1919 stride_for_teases = BM_tab[j];
1920 BM_tab[j] = dirlen - i;
1922 /* stride_for_teases tells how much to stride if we get a
1923 match on the far character but are subsequently
1924 disappointed, by recording what the stride would have been
1925 for that character if the last character had been
1928 infinity = dirlen - infinity;
1929 pos += dirlen - ((direction > 0) ? direction : 0);
1930 /* loop invariant - pos points at where last char (first char if
1931 reverse) of pattern would align in a possible match. */
1934 Bufbyte *tail_end_ptr;
1935 /* It's been reported that some (broken) compiler thinks
1936 that Boolean expressions in an arithmetic context are
1937 unsigned. Using an explicit ?1:0 prevents this. */
1938 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
1939 return n * (0 - direction);
1940 /* First we do the part we can by pointers (maybe
1944 limit = pos - dirlen + direction;
1945 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1946 have changed. See buffer.h. */
1947 limit = ((direction > 0)
1948 ? BI_BUF_CEILING_OF(buf, limit) - 1
1949 : BI_BUF_FLOOR_OF(buf, limit + 1));
1950 /* LIMIT is now the last (not beyond-last!) value POS can
1951 take on without hitting edge of buffer or the gap. */
1952 limit = ((direction > 0)
1953 ? min(lim - 1, min(limit, pos + 20000))
1954 : max(lim, max(limit, pos - 20000)));
1955 tail_end = BI_BUF_CEILING_OF(buf, pos);
1956 tail_end_ptr = BI_BUF_BYTE_ADDRESS(buf, tail_end);
1958 if ((limit - pos) * direction > 20) {
1959 p_limit = BI_BUF_BYTE_ADDRESS(buf, limit);
1960 ptr2 = (cursor = BI_BUF_BYTE_ADDRESS(buf, pos));
1961 /* In this loop, pos + cursor - ptr2 is the surrogate
1963 while (1) { /* use one cursor setting as long as i can */
1964 if (direction > 0) { /* worth duplicating */
1965 /* Use signed comparison if appropriate to make
1966 cursor+infinity sure to be > p_limit.
1967 Assuming that the buffer lies in a range of
1968 addresses that are all "positive" (as ints)
1969 or all "negative", either kind of comparison
1970 will work as long as we don't step by
1971 infinity. So pick the kind that works when
1972 we do step by infinity. */
1973 if ((EMACS_INT) (p_limit + infinity) >
1974 (EMACS_INT) p_limit)
1975 while ((EMACS_INT) cursor <=
1976 (EMACS_INT) p_limit)
1980 while ((EMACS_UINT) cursor <=
1981 (EMACS_UINT) p_limit)
1985 if ((EMACS_INT) (p_limit + infinity) <
1986 (EMACS_INT) p_limit)
1987 while ((EMACS_INT) cursor >=
1988 (EMACS_INT) p_limit)
1992 while ((EMACS_UINT) cursor >=
1993 (EMACS_UINT) p_limit)
1997 /* If you are here, cursor is beyond the end of the
1998 searched region. This can happen if you match on
1999 the far character of the pattern, because the
2000 "stride" of that character is infinity, a number
2001 able to throw you well beyond the end of the
2002 search. It can also happen if you fail to match
2003 within the permitted region and would otherwise
2004 try a character beyond that region */
2005 if ((cursor - p_limit) * direction <= len)
2006 break; /* a small overrun is genuine */
2007 cursor -= infinity; /* large overrun = hit */
2008 i = dirlen - direction;
2011 direction) + direction != 0) {
2014 cursor -= direction;
2015 /* Translate only the last byte of a character. */
2016 if ((cursor == tail_end_ptr
2018 BUFBYTE_FIRST_BYTE_P(cursor
2021 (BUFBYTE_FIRST_BYTE_P
2023 || (translate_prev_byte ==
2026 (BUFBYTE_FIRST_BYTE_P
2027 (translate_prev_byte)
2029 translate_anteprev_byte
2031 ch = simple_translate
2047 direction) + direction != 0)
2049 *(cursor -= direction))
2052 cursor += dirlen - i - direction; /* fix cursor */
2053 if (i + direction == 0) {
2054 cursor -= direction;
2058 (pos + cursor - ptr2 +
2062 bytind_to_bufpos(buf,
2065 bytind_to_bufpos(buf,
2069 set_search_regs(buf, bufstart,
2072 clear_unused_search_regs
2076 if ((n -= direction) != 0)
2077 cursor += dirlen; /* to resume search */
2079 return ((direction > 0)
2081 end[0] : search_regs.
2084 cursor += stride_for_teases; /* <sigh> we lose - */
2086 pos += cursor - ptr2;
2088 /* Now we'll pick up a clump that has to be done the hard
2089 way because it covers a discontinuity */
2091 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
2092 have changed. See buffer.h. */
2093 limit = ((direction > 0)
2094 ? BI_BUF_CEILING_OF(buf, pos - dirlen + 1) - 1
2095 : BI_BUF_FLOOR_OF(buf, pos - dirlen));
2096 limit = ((direction > 0)
2097 ? min(limit + len, lim - 1)
2098 : max(limit - len, lim));
2099 /* LIMIT is now the last value POS can have
2100 and still be valid for a possible match. */
2102 /* This loop can be coded for space rather than
2103 speed because it will usually run only once.
2104 (the reach is at most len + 21, and typically
2105 does not exceed len) */
2106 while ((limit - pos) * direction >= 0)
2107 /* *not* BI_BUF_FETCH_CHAR. We are working here
2108 with bytes, not characters. */
2110 BM_tab[*BI_BUF_BYTE_ADDRESS
2112 /* now run the same tests to distinguish going off
2113 the end, a match or a phony match. */
2114 if ((pos - limit) * direction <= len)
2115 break; /* ran off the end */
2116 /* Found what might be a match.
2117 Set POS back to last (first if reverse) char pos. */
2119 i = dirlen - direction;
2120 while ((i -= direction) + direction != 0) {
2127 ptr = BI_BUF_BYTE_ADDRESS(buf, pos);
2128 if ((ptr == tail_end_ptr
2129 || BUFBYTE_FIRST_BYTE_P(ptr[1]))
2130 && (BUFBYTE_FIRST_BYTE_P(ptr[0])
2131 || (translate_prev_byte ==
2134 (BUFBYTE_FIRST_BYTE_P
2135 (translate_prev_byte)
2136 || translate_anteprev_byte
2138 ch = simple_translate[*ptr];
2145 if (pat[i] != TRANSLATE(trt,
2146 *BI_BUF_BYTE_ADDRESS
2151 /* Above loop has moved POS part or all the way back
2152 to the first char pos (last char pos if reverse).
2153 Set it once again at the last (first if reverse)
2155 pos += dirlen - i - direction;
2156 if (i + direction == 0) {
2160 Bytind bytstart = (pos +
2166 bytind_to_bufpos(buf,
2169 bytind_to_bufpos(buf,
2173 set_search_regs(buf, bufstart,
2176 clear_unused_search_regs
2180 if ((n -= direction) != 0)
2181 pos += dirlen; /* to resume search */
2183 return ((direction > 0)
2185 end[0] : search_regs.
2188 pos += stride_for_teases;
2191 /* We have done one clump. Can we continue? */
2192 if ((lim - pos) * direction < 0)
2193 return (0 - n) * direction;
2195 return bytind_to_bufpos(buf, pos);
2198 /* Record the whole-match data (beginning BEG and end BEG + LEN) and the
2199 buffer for a match just found. */
2201 static void set_search_regs(struct buffer *buf, Bufpos beg, Charcount len)
2203 /* This function has been Mule-ized. */
2204 /* Make sure we have registers in which to store
2205 the match position. */
2206 if (search_regs.num_regs == 0) {
2207 search_regs.start = xnew_atomic(regoff_t);
2208 search_regs.end = xnew_atomic(regoff_t);
2209 search_regs.num_regs = 1;
2212 search_regs.start[0] = beg;
2213 search_regs.end[0] = beg + len;
2214 XSETBUFFER(last_thing_searched, buf);
2217 /* Clear unused search registers so match data will be null.
2218 REGP is a pointer to the register structure to clear, usually the global
2220 NO_SUB is the number of subexpressions to allow for. (Does not count
2221 the whole match, ie, for a string search NO_SUB == 0.)
2222 It is an error if NO_SUB > REGP.num_regs - 1. */
2224 static void clear_unused_search_regs(struct re_registers *regp, int no_sub)
2226 /* This function has been Mule-ized. */
2229 assert(no_sub >= 0 && no_sub < regp->num_regs);
2230 for (i = no_sub + 1; i < regp->num_regs; i++)
2231 regp->start[i] = regp->end[i] = -1;
2234 /* Given a string of words separated by word delimiters,
2235 compute a regexp that matches those exact words
2236 separated by arbitrary punctuation. */
2238 static Lisp_Object wordify(Lisp_Object buffer, Lisp_Object string)
2241 EMACS_INT punct_count = 0, word_count = 0;
2242 struct buffer *buf = decode_buffer(buffer, 0);
2243 Lisp_Char_Table *syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
2245 CHECK_STRING(string);
2246 len = XSTRING_CHAR_LENGTH(string);
2248 for (i = 0; i < len; i++)
2250 (syntax_table, string_char(XSTRING(string), i))) {
2252 if (i > 0 && WORD_SYNTAX_P(syntax_table,
2253 string_char(XSTRING(string),
2257 if (WORD_SYNTAX_P(syntax_table, string_char(XSTRING(string), len - 1)))
2260 return build_string("");
2263 /* The following value is an upper bound on the amount of storage we
2264 need. In non-Mule, it is exact. */
2266 (Bufbyte *) alloca(XSTRING_LENGTH(string) - punct_count +
2267 5 * (word_count - 1) + 4);
2268 Bufbyte *o = storage;
2273 for (i = 0; i < len; i++) {
2274 Emchar ch = string_char(XSTRING(string), i);
2276 if (WORD_SYNTAX_P(syntax_table, ch))
2277 o += set_charptr_emchar(o, ch);
2279 && WORD_SYNTAX_P(syntax_table,
2280 string_char(XSTRING(string),
2294 return make_string(storage, o - storage);
2298 DEFUN("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ", /*
2299 Search backward from point for STRING.
2300 Set point to the beginning of the occurrence found, and return point.
2302 Optional second argument LIMIT bounds the search; it is a buffer
2303 position. The match found must not extend before that position.
2304 The value nil is equivalent to (point-min).
2306 Optional third argument NOERROR, if t, means just return nil (no
2307 error) if the search fails. If neither nil nor t, set point to LIMIT
2310 Optional fourth argument COUNT is a repeat count--search for
2311 successive occurrences.
2313 Optional fifth argument BUFFER specifies the buffer to search in and
2314 defaults to the current buffer.
2316 See also the functions `match-beginning', `match-end' and `replace-match'.
2318 (string, limit, noerror, count, buffer))
2320 return search_command(string, limit, noerror, count, buffer, -1, 0, 0);
2323 DEFUN("search-forward", Fsearch_forward, 1, 5, "sSearch: ", /*
2324 Search forward from point for STRING.
2325 Set point to the end of the occurrence found, and return point.
2327 Optional second argument LIMIT bounds the search; it is a buffer
2328 position. The match found must not extend after that position. The
2329 value nil is equivalent to (point-max).
2331 Optional third argument NOERROR, if t, means just return nil (no
2332 error) if the search fails. If neither nil nor t, set point to LIMIT
2335 Optional fourth argument COUNT is a repeat count--search for
2336 successive occurrences.
2338 Optional fifth argument BUFFER specifies the buffer to search in and
2339 defaults to the current buffer.
2341 See also the functions `match-beginning', `match-end' and `replace-match'.
2343 (string, limit, noerror, count, buffer))
2345 return search_command(string, limit, noerror, count, buffer, 1, 0, 0);
2348 DEFUN("word-search-backward", Fword_search_backward, 1, 5, "sWord search backward: ", /*
2349 Search backward from point for STRING, ignoring differences in punctuation.
2350 Set point to the beginning of the occurrence found, and return point.
2352 Optional second argument LIMIT bounds the search; it is a buffer
2353 position. The match found must not extend before that position.
2354 The value nil is equivalent to (point-min).
2356 Optional third argument NOERROR, if t, means just return nil (no
2357 error) if the search fails. If neither nil nor t, set point to LIMIT
2360 Optional fourth argument COUNT is a repeat count--search for
2361 successive occurrences.
2363 Optional fifth argument BUFFER specifies the buffer to search in and
2364 defaults to the current buffer.
2366 See also the functions `match-beginning', `match-end' and `replace-match'.
2368 (string, limit, noerror, count, buffer))
2370 return search_command(wordify(buffer, string), limit, noerror, count,
2374 DEFUN("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ", /*
2375 Search forward from point for STRING, ignoring differences in punctuation.
2376 Set point to the end of the occurrence found, and return point.
2378 Optional second argument LIMIT bounds the search; it is a buffer
2379 position. The match found must not extend after that position. The
2380 value nil is equivalent to (point-max).
2382 Optional third argument NOERROR, if t, means just return nil (no
2383 error) if the search fails. If neither nil nor t, set point to LIMIT
2386 Optional fourth argument COUNT is a repeat count--search for
2387 successive occurrences.
2389 Optional fifth argument BUFFER specifies the buffer to search in and
2390 defaults to the current buffer.
2392 See also the functions `match-beginning', `match-end' and `replace-match'.
2394 (string, limit, noerror, count, buffer))
2396 return search_command(wordify(buffer, string), limit, noerror, count,
2400 DEFUN("re-search-backward", Fre_search_backward, 1, 5, "sRE search backward: ", /*
2401 Search backward from point for match for regular expression REGEXP.
2402 Set point to the beginning of the match, and return point.
2403 The match found is the one starting last in the buffer
2404 and yet ending before the origin of the search.
2406 Optional second argument LIMIT bounds the search; it is a buffer
2407 position. The match found must not extend before that position.
2408 The value nil is equivalent to (point-min).
2410 Optional third argument NOERROR, if t, means just return nil (no
2411 error) if the search fails. If neither nil nor t, set point to LIMIT
2414 Optional fourth argument COUNT is a repeat count--search for
2415 successive occurrences.
2417 Optional fifth argument BUFFER specifies the buffer to search in and
2418 defaults to the current buffer.
2420 See also the functions `match-beginning', `match-end' and `replace-match'.
2422 (regexp, limit, noerror, count, buffer))
2424 return search_command(regexp, limit, noerror, count, buffer, -1, 1, 0);
2427 DEFUN("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ", /*
2428 Search forward from point for regular expression REGEXP.
2429 Set point to the end of the occurrence found, and return point.
2431 Optional second argument LIMIT bounds the search; it is a buffer
2432 position. The match found must not extend after that position. The
2433 value nil is equivalent to (point-max).
2435 Optional third argument NOERROR, if t, means just return nil (no
2436 error) if the search fails. If neither nil nor t, set point to LIMIT
2439 Optional fourth argument COUNT is a repeat count--search for
2440 successive occurrences.
2442 Optional fifth argument BUFFER specifies the buffer to search in and
2443 defaults to the current buffer.
2445 See also the functions `match-beginning', `match-end' and `replace-match'.
2447 (regexp, limit, noerror, count, buffer))
2449 return search_command(regexp, limit, noerror, count, buffer, 1, 1, 0);
2452 DEFUN("posix-search-backward", Fposix_search_backward, 1, 5, "sPosix search backward: ", /*
2453 Search backward from point for match for regular expression REGEXP.
2454 Find the longest match in accord with Posix regular expression rules.
2455 Set point to the beginning of the match, and return point.
2456 The match found is the one starting last in the buffer
2457 and yet ending before the origin of the search.
2459 Optional second argument LIMIT bounds the search; it is a buffer
2460 position. The match found must not extend before that position.
2461 The value nil is equivalent to (point-min).
2463 Optional third argument NOERROR, if t, means just return nil (no
2464 error) if the search fails. If neither nil nor t, set point to LIMIT
2467 Optional fourth argument COUNT is a repeat count--search for
2468 successive occurrences.
2470 Optional fifth argument BUFFER specifies the buffer to search in and
2471 defaults to the current buffer.
2473 See also the functions `match-beginning', `match-end' and `replace-match'.
2475 (regexp, limit, noerror, count, buffer))
2477 return search_command(regexp, limit, noerror, count, buffer, -1, 1, 1);
2480 DEFUN("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ", /*
2481 Search forward from point for regular expression REGEXP.
2482 Find the longest match in accord with Posix regular expression rules.
2483 Set point to the end of the occurrence found, and return point.
2485 Optional second argument LIMIT bounds the search; it is a buffer
2486 position. The match found must not extend after that position. The
2487 value nil is equivalent to (point-max).
2489 Optional third argument NOERROR, if t, means just return nil (no
2490 error) if the search fails. If neither nil nor t, set point to LIMIT
2493 Optional fourth argument COUNT is a repeat count--search for
2494 successive occurrences.
2496 Optional fifth argument BUFFER specifies the buffer to search in and
2497 defaults to the current buffer.
2499 See also the functions `match-beginning', `match-end' and `replace-match'.
2501 (regexp, limit, noerror, count, buffer))
2503 return search_command(regexp, limit, noerror, count, buffer, 1, 1, 1);
2506 static Lisp_Object free_created_dynarrs(Lisp_Object cons)
2508 Dynarr_free(get_opaque_ptr(XCAR(cons)));
2509 Dynarr_free(get_opaque_ptr(XCDR(cons)));
2510 free_opaque_ptr(XCAR(cons));
2511 free_opaque_ptr(XCDR(cons));
2512 free_cons(XCONS(cons));
2516 DEFUN("replace-match", Freplace_match, 1, 5, 0, /*
2517 Replace text matched by last search with REPLACEMENT.
2518 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2519 Otherwise maybe capitalize the whole text, or maybe just word initials,
2520 based on the replaced text.
2521 If the replaced text has only capital letters
2522 and has at least one multiletter word, convert REPLACEMENT to all caps.
2523 If the replaced text has at least one word starting with a capital letter,
2524 then capitalize each word in REPLACEMENT.
2526 If third arg LITERAL is non-nil, insert REPLACEMENT literally.
2527 Otherwise treat `\\' as special:
2528 `\\&' in REPLACEMENT means substitute original matched text.
2529 `\\N' means substitute what matched the Nth `\\(...\\)'.
2530 If Nth parens didn't match, substitute nothing.
2531 `\\\\' means insert one `\\'.
2532 `\\u' means upcase the next character.
2533 `\\l' means downcase the next character.
2534 `\\U' means begin upcasing all following characters.
2535 `\\L' means begin downcasing all following characters.
2536 `\\E' means terminate the effect of any `\\U' or `\\L'.
2537 Case changes made with `\\u', `\\l', `\\U', and `\\L' override
2538 all other case changes that may be made in the replaced text.
2539 FIXEDCASE and LITERAL are optional arguments.
2540 Leaves point at end of replacement text.
2542 The optional fourth argument STRING can be a string to modify.
2543 In that case, this function creates and returns a new string
2544 which is made by replacing the part of STRING that was matched.
2545 When fourth argument is a string, fifth argument STRBUFFER specifies
2546 the buffer to be used for syntax-table and case-table lookup and
2547 defaults to the current buffer. When fourth argument is not a string,
2548 the buffer that the match occurred in has automatically been remembered
2549 and you do not need to specify it.
2551 When fourth argument is nil, STRBUFFER specifies a subexpression of
2552 the match. It says to replace just that subexpression instead of the
2553 whole match. This is useful only after a regular expression search or
2554 match since only regular expressions have distinguished subexpressions.
2556 (replacement, fixedcase, literal, string, strbuffer))
2558 /* This function has been Mule-ized. */
2559 /* This function can GC */
2560 enum { nochange, all_caps, cap_initial } case_action;
2562 int some_multiletter_word;
2565 int some_nonuppercase_initial;
2569 Lisp_Char_Table *syntax_table;
2572 int_dynarr *ul_action_dynarr = 0;
2573 int_dynarr *ul_pos_dynarr = 0;
2577 CHECK_STRING(replacement);
2579 if (!NILP(string)) {
2580 CHECK_STRING(string);
2581 if (!EQ(last_thing_searched, Qt))
2582 error("last thing matched was not a string");
2583 /* If the match data
2584 were abstracted into a special "match data" type instead
2585 of the typical half-assed "let the implementation be
2586 visible" form it's in, we could extend it to include
2587 the last string matched and the buffer used for that
2588 matching. But of course we can't change it as it is. */
2589 buf = decode_buffer(strbuffer, 0);
2590 XSETBUFFER(buffer, buf);
2592 if (!NILP(strbuffer)) {
2593 CHECK_INT(strbuffer);
2594 sub = XINT(strbuffer);
2595 if (sub < 0 || sub >= (int)search_regs.num_regs)
2596 args_out_of_range(strbuffer,
2597 make_int(search_regs.
2600 if (!BUFFERP(last_thing_searched))
2601 error("last thing matched was not a buffer");
2602 buffer = last_thing_searched;
2603 buf = XBUFFER(buffer);
2606 syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
2608 case_action = nochange; /* We tried an initialization */
2609 /* but some C compilers blew it */
2611 if (search_regs.num_regs == 0)
2612 error("replace-match called before any match found");
2615 if (search_regs.start[sub] < BUF_BEGV(buf)
2616 || search_regs.start[sub] > search_regs.end[sub]
2617 || search_regs.end[sub] > BUF_ZV(buf))
2618 args_out_of_range(make_int(search_regs.start[sub]),
2619 make_int(search_regs.end[sub]));
2621 if (search_regs.start[0] < 0
2622 || search_regs.start[0] > search_regs.end[0]
2623 || search_regs.end[0] > XSTRING_CHAR_LENGTH(string))
2624 args_out_of_range(make_int(search_regs.start[0]),
2625 make_int(search_regs.end[0]));
2628 if (NILP(fixedcase)) {
2629 /* Decide how to casify by examining the matched text. */
2631 last = search_regs.end[sub];
2633 case_action = all_caps;
2635 /* some_multiletter_word is set nonzero if any original word
2636 is more than one letter long. */
2637 some_multiletter_word = 0;
2639 some_nonuppercase_initial = 0;
2642 for (pos = search_regs.start[sub]; pos < last; pos++) {
2644 c = BUF_FETCH_CHAR(buf, pos);
2646 c = string_char(XSTRING(string), pos);
2648 if (LOWERCASEP(buf, c)) {
2649 /* Cannot be all caps if any original char is lower case */
2652 if (!WORD_SYNTAX_P(syntax_table, prevc))
2653 some_nonuppercase_initial = 1;
2655 some_multiletter_word = 1;
2656 } else if (!NOCASEP(buf, c)) {
2658 if (!WORD_SYNTAX_P(syntax_table, prevc)) ;
2660 some_multiletter_word = 1;
2662 /* If the initial is a caseless word constituent,
2663 treat that like a lowercase initial. */
2664 if (!WORD_SYNTAX_P(syntax_table, prevc))
2665 some_nonuppercase_initial = 1;
2671 /* Convert to all caps if the old text is all caps
2672 and has at least one multiletter word. */
2673 if (!some_lowercase && some_multiletter_word)
2674 case_action = all_caps;
2675 /* Capitalize each word, if the old text has all capitalized words. */
2676 else if (!some_nonuppercase_initial && some_multiletter_word)
2677 case_action = cap_initial;
2678 else if (!some_nonuppercase_initial && some_uppercase)
2679 /* Should x -> yz, operating on X, give Yz or YZ?
2680 We'll assume the latter. */
2681 case_action = all_caps;
2683 case_action = nochange;
2686 /* Do replacement in a string. */
2687 if (!NILP(string)) {
2688 Lisp_Object before, after;
2690 speccount = specpdl_depth();
2692 Fsubstring(string, Qzero, make_int(search_regs.start[0]));
2693 after = Fsubstring(string, make_int(search_regs.end[0]), Qnil);
2695 /* Do case substitution into REPLACEMENT if desired. */
2696 if (NILP(literal)) {
2697 Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2699 /* XEmacs change: rewrote this loop somewhat to make it
2700 cleaner. Also added \U, \E, etc. */
2701 Charcount literal_start = 0;
2702 /* We build up the substituted string in ACCUM. */
2707 /* OK, the basic idea here is that we scan through the
2708 replacement string until we find a backslash, which
2709 represents a substring of the original string to be
2710 substituted. We then append onto ACCUM the literal
2711 text before the backslash (LASTPOS marks the
2712 beginning of this) followed by the substring of the
2713 original string that needs to be inserted. */
2714 for (strpos = 0; strpos < stlen; strpos++) {
2715 /* If LITERAL_END is set, we've encountered a backslash
2716 (the end of literal text to be inserted). */
2717 Charcount literal_end = -1;
2718 /* If SUBSTART is set, we need to also insert the
2719 text from SUBSTART to SUBEND in the original string. */
2720 Charcount substart = -1;
2721 Charcount subend = -1;
2723 c = string_char(XSTRING(replacement), strpos);
2724 if (c == '\\' && strpos < stlen - 1) {
2725 c = string_char(XSTRING(replacement),
2728 literal_end = strpos - 1;
2729 substart = search_regs.start[0];
2730 subend = search_regs.end[0];
2731 } else if (c >= '1' && c <= '9' &&
2733 search_regs.num_regs + '0') {
2735 start[c - '0'] >= 0) {
2745 } else if (c == 'U' || c == 'u'
2746 || c == 'L' || c == 'l'
2748 /* Keep track of all case changes requested, but don't
2749 make them now. Do them later so we override
2751 if (!ul_pos_dynarr) {
2756 record_unwind_protect
2757 (free_created_dynarrs,
2762 (ul_action_dynarr)));
2764 literal_end = strpos - 1;
2765 Dynarr_add(ul_pos_dynarr,
2773 Dynarr_add(ul_action_dynarr, c);
2774 } else if (c == '\\')
2775 /* So we get just one backslash. */
2776 literal_end = strpos;
2778 if (literal_end >= 0) {
2779 Lisp_Object literal_text = Qnil;
2780 Lisp_Object substring = Qnil;
2781 if (literal_end != literal_start)
2783 Fsubstring(replacement,
2788 if (substart >= 0 && subend != substart)
2789 substring = Fsubstring(string,
2794 if (!NILP(literal_text)
2795 || !NILP(substring))
2797 concat3(accum, literal_text,
2799 literal_start = strpos + 1;
2803 if (strpos != literal_start)
2804 /* some literal text at end to be inserted */
2807 Fsubstring(replacement,
2808 make_int(literal_start),
2811 replacement = accum;
2814 /* replacement can be nil. */
2815 if (NILP(replacement))
2816 replacement = build_string("");
2818 if (case_action == all_caps)
2819 replacement = Fupcase(replacement, buffer);
2820 else if (case_action == cap_initial)
2821 replacement = Fupcase_initials(replacement, buffer);
2823 /* Now finally, we need to process the \U's, \E's, etc. */
2824 if (ul_pos_dynarr) {
2826 int cur_action = 'E';
2827 Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2830 for (strpos = 0; strpos < stlen; strpos++) {
2832 string_char(XSTRING(replacement), strpos);
2833 Emchar newchar = -1;
2834 if (i < Dynarr_length(ul_pos_dynarr) &&
2835 strpos == Dynarr_at(ul_pos_dynarr, i)) {
2837 Dynarr_at(ul_action_dynarr, i);
2839 if (new_action == 'u')
2840 newchar = UPCASE(buf, curchar);
2841 else if (new_action == 'l')
2843 DOWNCASE(buf, curchar);
2845 cur_action = new_action;
2847 if (newchar == -1) {
2848 if (cur_action == 'U')
2849 newchar = UPCASE(buf, curchar);
2850 else if (cur_action == 'L')
2852 DOWNCASE(buf, curchar);
2856 if (newchar != curchar)
2857 set_string_char(XSTRING(replacement),
2862 /* frees the Dynarrs if necessary. */
2863 unbind_to(speccount, Qnil);
2864 return concat3(before, replacement, after);
2867 mc_count = begin_multiple_change(buf, search_regs.start[sub],
2868 search_regs.end[sub]);
2870 /* begin_multiple_change() records an unwind-protect, so we need to
2871 record this value now. */
2872 speccount = specpdl_depth();
2874 /* We insert the replacement text before the old text, and then
2875 delete the original text. This means that markers at the
2876 beginning or end of the original will float to the corresponding
2877 position in the replacement. */
2878 BUF_SET_PT(buf, search_regs.start[sub]);
2880 Finsert(1, &replacement);
2882 Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2884 struct gcpro gcpro1;
2885 GCPRO1(replacement);
2886 for (strpos = 0; strpos < stlen; strpos++) {
2887 /* on the first iteration assert(offset==0),
2888 exactly complementing BUF_SET_PT() above.
2889 During the loop, it keeps track of the amount inserted.
2891 Charcount offset = BUF_PT(buf) - search_regs.start[sub];
2893 c = string_char(XSTRING(replacement), strpos);
2894 if (c == '\\' && strpos < stlen - 1) {
2895 /* XXX FIXME: replacing just a substring non-literally
2896 using backslash refs to the match looks dangerous. But
2897 <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe
2898 <duwe@caldera.de> claims Finsert_buffer_substring already
2899 handles this correctly.
2901 c = string_char(XSTRING(replacement), ++strpos);
2903 Finsert_buffer_substring
2905 make_int(search_regs.start[0] +
2907 make_int(search_regs.end[0] +
2909 else if (c >= '1' && c <= '9'
2910 && c <= search_regs.num_regs + '0') {
2911 if (search_regs.start[c - '0'] >= 1)
2912 Finsert_buffer_substring
2914 make_int(search_regs.
2917 make_int(search_regs.
2920 } else if (c == 'U' || c == 'u' || c == 'L'
2921 || c == 'l' || c == 'E') {
2922 /* Keep track of all case changes requested, but don't
2923 make them now. Do them later so we override
2925 if (!ul_pos_dynarr) {
2926 ul_pos_dynarr = Dynarr_new(int);
2929 record_unwind_protect
2930 (free_created_dynarrs,
2931 Fcons(make_opaque_ptr
2934 (ul_action_dynarr)));
2936 Dynarr_add(ul_pos_dynarr, BUF_PT(buf));
2937 Dynarr_add(ul_action_dynarr, c);
2939 buffer_insert_emacs_char(buf, c);
2941 buffer_insert_emacs_char(buf, c);
2946 inslen = BUF_PT(buf) - (search_regs.start[sub]);
2947 buffer_delete_range(buf, search_regs.start[sub] + inslen,
2948 search_regs.end[sub] + inslen, 0);
2950 if (case_action == all_caps)
2951 Fupcase_region(make_int(BUF_PT(buf) - inslen),
2952 make_int(BUF_PT(buf)), buffer);
2953 else if (case_action == cap_initial)
2954 Fupcase_initials_region(make_int(BUF_PT(buf) - inslen),
2955 make_int(BUF_PT(buf)), buffer);
2957 /* Now go through and make all the case changes that were requested
2958 in the replacement string. */
2959 if (ul_pos_dynarr) {
2960 Bufpos eend = BUF_PT(buf);
2962 int cur_action = 'E';
2964 for (pos = BUF_PT(buf) - inslen; pos < eend; pos++) {
2965 Emchar curchar = BUF_FETCH_CHAR(buf, pos);
2966 Emchar newchar = -1;
2967 if (i < Dynarr_length(ul_pos_dynarr) &&
2968 pos == Dynarr_at(ul_pos_dynarr, i)) {
2969 int new_action = Dynarr_at(ul_action_dynarr, i);
2971 if (new_action == 'u')
2972 newchar = UPCASE(buf, curchar);
2973 else if (new_action == 'l')
2974 newchar = DOWNCASE(buf, curchar);
2976 cur_action = new_action;
2978 if (newchar == -1) {
2979 if (cur_action == 'U')
2980 newchar = UPCASE(buf, curchar);
2981 else if (cur_action == 'L')
2982 newchar = DOWNCASE(buf, curchar);
2986 if (newchar != curchar)
2987 buffer_replace_char(buf, pos, newchar, 0, 0);
2991 /* frees the Dynarrs if necessary. */
2992 unbind_to(speccount, Qnil);
2993 end_multiple_change(buf, mc_count);
2998 static Lisp_Object match_limit(Lisp_Object num, int beginningp)
3000 /* This function has been Mule-ized. */
3005 if (n < 0 || search_regs.num_regs <= 0)
3006 args_out_of_range(num, make_int(search_regs.num_regs));
3007 if (n >= search_regs.num_regs || search_regs.start[n] < 0)
3009 return make_int(beginningp ? search_regs.start[n] : search_regs.end[n]);
3012 DEFUN("match-beginning", Fmatch_beginning, 1, 1, 0, /*
3013 Return position of start of text matched by last regexp search.
3014 NUM, specifies which parenthesized expression in the last regexp.
3015 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
3016 Zero means the entire text matched by the whole regexp or whole string.
3020 return match_limit(num, 1);
3023 DEFUN("match-end", Fmatch_end, 1, 1, 0, /*
3024 Return position of end of text matched by last regexp search.
3025 NUM specifies which parenthesized expression in the last regexp.
3026 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
3027 Zero means the entire text matched by the whole regexp or whole string.
3031 return match_limit(num, 0);
3034 DEFUN("match-data", Fmatch_data, 0, 2, 0, /*
3035 Return a list containing all info on what the last regexp search matched.
3036 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
3037 All the elements are markers or nil (nil if the Nth pair didn't match)
3038 if the last match was on a buffer; integers or nil if a string was matched.
3039 Use `store-match-data' to reinstate the data in this list.
3041 If INTEGERS (the optional first argument) is non-nil, always use integers
3042 \(rather than markers) to represent buffer positions.
3043 If REUSE is a list, reuse it as part of the value. If REUSE is long enough
3044 to hold all the values, and if INTEGERS is non-nil, no consing is done.
3048 /* This function has been Mule-ized. */
3049 Lisp_Object tail, prev;
3054 if (NILP(last_thing_searched))
3055 /*error ("match-data called before any match found"); */
3058 data = alloca_array(Lisp_Object, 2 * search_regs.num_regs);
3061 for (i = 0; i < search_regs.num_regs; i++) {
3062 Bufpos start = search_regs.start[i];
3064 if (EQ(last_thing_searched, Qt)
3065 || !NILP(integers)) {
3066 data[2 * i] = make_int(start);
3067 data[2 * i + 1] = make_int(search_regs.end[i]);
3068 } else if (BUFFERP(last_thing_searched)) {
3069 data[2 * i] = Fmake_marker();
3070 Fset_marker(data[2 * i],
3072 last_thing_searched);
3073 data[2 * i + 1] = Fmake_marker();
3074 Fset_marker(data[2 * i + 1],
3075 make_int(search_regs.end[i]),
3076 last_thing_searched);
3078 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
3083 data[2 * i] = data[2 * i + 1] = Qnil;
3086 return Flist(2 * len + 2, data);
3088 /* If REUSE is a list, store as many value elements as will fit
3089 into the elements of REUSE. */
3090 for (prev = Qnil, i = 0, tail = reuse; CONSP(tail);
3091 i++, tail = XCDR(tail)) {
3092 if (i < 2 * len + 2)
3093 XCAR(tail) = data[i];
3099 /* If we couldn't fit all value elements into REUSE,
3100 cons up the rest of them and add them to the end of REUSE. */
3101 if (i < 2 * len + 2)
3102 XCDR(prev) = Flist(2 * len + 2 - i, data + i);
3107 DEFUN("store-match-data", Fstore_match_data, 1, 1, 0, /*
3108 Set internal data on last search match from elements of LIST.
3109 LIST should have been created by calling `match-data' previously.
3113 /* This function has been Mule-ized. */
3115 REGISTER Lisp_Object marker;
3119 if (running_asynch_code)
3122 CONCHECK_LIST(list);
3124 /* Unless we find a marker with a buffer in LIST, assume that this
3125 match data came from a string. */
3126 last_thing_searched = Qt;
3128 /* Allocate registers if they don't already exist. */
3129 length = XINT(Flength(list)) / 2;
3130 num_regs = search_regs.num_regs;
3132 if (length > num_regs) {
3133 if (search_regs.num_regs == 0) {
3134 search_regs.start = xnew_atomic_array(regoff_t, length);
3135 search_regs.end = xnew_atomic_array(regoff_t, length);
3137 XREALLOC_ARRAY(search_regs.start, regoff_t, length);
3138 XREALLOC_ARRAY(search_regs.end, regoff_t, length);
3141 search_regs.num_regs = length;
3144 for (i = 0; i < num_regs; i++) {
3145 marker = Fcar(list);
3147 search_regs.start[i] = -1;
3150 if (MARKERP(marker)) {
3151 if (XMARKER(marker)->buffer == 0)
3154 XSETBUFFER(last_thing_searched,
3155 XMARKER(marker)->buffer);
3158 CHECK_INT_COERCE_MARKER(marker);
3159 search_regs.start[i] = XINT(marker);
3162 marker = Fcar(list);
3163 if (MARKERP(marker) && XMARKER(marker)->buffer == 0)
3166 CHECK_INT_COERCE_MARKER(marker);
3167 search_regs.end[i] = XINT(marker);
3175 /* If non-zero the match data have been saved in saved_search_regs
3176 during the execution of a sentinel or filter. */
3177 static int search_regs_saved;
3178 static struct re_registers saved_search_regs;
3180 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3181 if asynchronous code (filter or sentinel) is running. */
3182 static void save_search_regs(void)
3184 if (!search_regs_saved) {
3185 saved_search_regs.num_regs = search_regs.num_regs;
3186 saved_search_regs.start = search_regs.start;
3187 saved_search_regs.end = search_regs.end;
3188 search_regs.num_regs = 0;
3189 search_regs.start = 0;
3190 search_regs.end = 0;
3192 search_regs_saved = 1;
3196 /* Called upon exit from filters and sentinels. */
3197 void restore_match_data(void)
3199 if (search_regs_saved) {
3200 if (search_regs.num_regs > 0) {
3201 xfree(search_regs.start);
3202 xfree(search_regs.end);
3204 search_regs.num_regs = saved_search_regs.num_regs;
3205 search_regs.start = saved_search_regs.start;
3206 search_regs.end = saved_search_regs.end;
3208 search_regs_saved = 0;
3212 /* Quote a string to inactivate reg-expr chars */
3214 DEFUN("regexp-quote", Fregexp_quote, 1, 1, 0, /*
3215 Return a regexp string which matches exactly STRING and nothing else.
3219 REGISTER Bufbyte *in, *out, *end;
3220 REGISTER Bufbyte *temp;
3222 CHECK_STRING(string);
3224 temp = (Bufbyte *) alloca(XSTRING_LENGTH(string) * 2);
3226 /* Now copy the data into the new string, inserting escapes. */
3228 in = XSTRING_DATA(string);
3229 end = in + XSTRING_LENGTH(string);
3233 Emchar c = charptr_emchar(in);
3235 if (c == '[' || c == ']'
3236 || c == '*' || c == '.' || c == '\\'
3237 || c == '?' || c == '+' || c == '^' || c == '$')
3239 out += set_charptr_emchar(out, c);
3243 return make_string(temp, out - temp);
3246 DEFUN("set-word-regexp", Fset_word_regexp, 1, 1, 0, /*
3247 Set the regexp to be used to match a word in regular-expression searching.
3248 #### Not yet implemented. Currently does nothing.
3249 #### Do not use this yet. Its calling interface is likely to change.
3256 /************************************************************************/
3257 /* initialization */
3258 /************************************************************************/
3260 void syms_of_search(void)
3263 DEFERROR_STANDARD(Qsearch_failed, Qinvalid_operation);
3264 DEFERROR_STANDARD(Qinvalid_regexp, Qsyntax_error);
3266 #ifdef EF_USE_COMPRE
3267 defsymbol(&Qcompre, "compre");
3268 defsymbol(&Qcomprep, "comprep");
3270 DEFSUBR(Fcompile_regexp);
3271 DEFSUBR(Fdefregexp);
3274 DEFSUBR(Flooking_at);
3275 DEFSUBR(Fposix_looking_at);
3276 DEFSUBR(Fstring_match);
3277 DEFSUBR(Fposix_string_match);
3278 DEFSUBR(Fskip_chars_forward);
3279 DEFSUBR(Fskip_chars_backward);
3280 DEFSUBR(Fskip_syntax_forward);
3281 DEFSUBR(Fskip_syntax_backward);
3282 DEFSUBR(Fsearch_forward);
3283 DEFSUBR(Fsearch_backward);
3284 DEFSUBR(Fword_search_forward);
3285 DEFSUBR(Fword_search_backward);
3286 DEFSUBR(Fre_search_forward);
3287 DEFSUBR(Fre_search_backward);
3288 DEFSUBR(Fposix_search_forward);
3289 DEFSUBR(Fposix_search_backward);
3290 DEFSUBR(Freplace_match);
3291 DEFSUBR(Fmatch_beginning);
3292 DEFSUBR(Fmatch_end);
3293 DEFSUBR(Fmatch_data);
3294 DEFSUBR(Fstore_match_data);
3295 DEFSUBR(Fregexp_quote);
3296 DEFSUBR(Fset_word_regexp);
3299 void reinit_vars_of_search(void)
3303 last_thing_searched = Qnil;
3304 staticpro_nodump(&last_thing_searched);
3306 for (i = 0; i < REGEXP_CACHE_SIZE; ++i) {
3307 searchbufs[i].buf.allocated = 100;
3308 searchbufs[i].buf.buffer = (unsigned char *)xmalloc_atomic(100);
3309 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3310 searchbufs[i].regexp = Qnil;
3311 staticpro_nodump(&searchbufs[i].regexp);
3312 searchbufs[i].next =
3313 (i == REGEXP_CACHE_SIZE - 1 ? 0 : &searchbufs[i + 1]);
3315 searchbuf_head = &searchbufs[0];
3318 void vars_of_search(void)
3320 reinit_vars_of_search();
3322 DEFVAR_LISP("forward-word-regexp", &Vforward_word_regexp /*
3323 *Regular expression to be used in `forward-word'.
3324 #### Not yet implemented.
3326 Vforward_word_regexp = Qnil;
3328 DEFVAR_LISP("backward-word-regexp", &Vbackward_word_regexp /*
3329 *Regular expression to be used in `backward-word'.
3330 #### Not yet implemented.
3332 Vbackward_word_regexp = Qnil;
3335 void complex_vars_of_search(void)
3337 Vskip_chars_range_table = Fmake_range_table();
3338 staticpro(&Vskip_chars_range_table);