Merge remote-tracking branch 'njsf/for-steve' into for-steve
[sxemacs] / src / search.c
1 /* String search routines for SXEmacs.
2    Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
3    Copyright (C) 1995 Sun Microsystems, Inc.
4
5 This file is part of SXEmacs
6
7 SXEmacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 SXEmacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program.  If not, see <http://www.gnu.org/licenses/>. */
19
20
21 /* Synched up with: FSF 19.29, except for region-cache stuff. */
22
23 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */
24
25 /* This file has been Mule-ized except for the TRT stuff. */
26
27 #include <config.h>
28 #include "lisp.h"
29
30 #include "buffer.h"
31 #include "ui/insdel.h"
32 #include "opaque.h"
33 #ifdef REGION_CACHE_NEEDS_WORK
34 #include "region-cache.h"
35 #endif
36 #include "syntax.h"
37
38 #include <sys/types.h>
39 #include "regex.h"
40 #include "casetab.h"
41 #include "chartab.h"
42
43 #include "dynacat.h"
44
45 #define TRANSLATE(table, pos)   \
46  (!NILP (table) ? TRT_TABLE_OF (table, (Emchar) pos) : pos)
47 \f
48 #include "elhash.h"
49 /* Make sure these are ALWAYS powers of 2 */
50 #define REGEXP_CACHE_SIZE 0x80
51 #define REGEXP_CACHE_HASH_MASK (REGEXP_CACHE_SIZE-1)
52 #define REGEXP_FASTMAP_SIZE 0400
53 #define REGEXP_FASTMAP_MASK (REGEXP_FASTMAP_SIZE-1)
54
55 #define __REGEXP_DEBUG__(args...)       fprintf(stderr, "REGEXP " args)
56 #ifndef REGEXP_DEBUG_FLAG
57 #define REGEXP_DEBUG(args...)
58 #else
59 #define REGEXP_DEBUG(args...)           __REGEXP_DEBUG__(args)
60 #endif
61 #define REGEXP_DEBUG_COMPRE(args...)    REGEXP_DEBUG("[compre]: " args)
62 #define REGEXP_DEBUG_COMPRE_C(args...)  REGEXP_DEBUG("[compre/cache]: " args)
63 #define REGEXP_DEBUG_COMPRE_H(args...)  REGEXP_DEBUG("[compre/hash]: " args)
64 #define REGEXP_CRITICAL(args...)        __REGEXP_DEBUG__("CRITICAL: " args)
65
66
67 /* If the regexp is non-nil, then the buffer contains the compiled form
68    of that regexp, suitable for searching.  */
69 struct regexp_cache {
70         struct regexp_cache *next;
71         Lisp_Object regexp;
72         struct re_pattern_buffer buf;
73         char fastmap[REGEXP_FASTMAP_SIZE];
74         /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
75         char posix;
76 };
77
78 /* The instances of that struct.  */
79 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
80
81 /* The head of the linked list; points to the most recently used buffer.  */
82 static struct regexp_cache *searchbuf_head;
83
84 /* Every call to re_match, etc., must pass &search_regs as the regs
85    argument unless you can show it is unnecessary (i.e., if re_match
86    is certainly going to be called again before region-around-match
87    can be called).
88
89    Since the registers are now dynamically allocated, we need to make
90    sure not to refer to the Nth register before checking that it has
91    been allocated by checking search_regs.num_regs.
92
93    The regex code keeps track of whether it has allocated the search
94    buffer using bits in the re_pattern_buffer.  This means that whenever
95    you compile a new pattern, it completely forgets whether it has
96    allocated any registers, and will allocate new registers the next
97    time you call a searching or matching function.  Therefore, we need
98    to call re_set_registers after compiling a new pattern or after
99    setting the match registers, so that the regex functions will be
100    able to free or re-allocate it properly.  */
101
102 /* Note: things get trickier under Mule because the values returned from
103    the regexp routines are in Bytinds but we need them to be in Bufpos's.
104    We take the easy way out for the moment and just convert them immediately.
105    We could be more clever by not converting them until necessary, but
106    that gets real ugly real fast since the buffer might have changed and
107    the positions might be out of sync or out of range.
108    */
109 static struct re_registers search_regs;
110
111 /* The buffer in which the last search was performed, or
112    Qt if the last search was done in a string;
113    Qnil if no searching has been done yet.  */
114 static Lisp_Object last_thing_searched;
115
116 /* error condition signalled when regexp compile_pattern fails */
117
118 Lisp_Object Qinvalid_regexp;
119
120 /* Regular expressions used in forward/backward-word */
121 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
122
123 /* range table for use with skip_chars.  Only needed for Mule. */
124 Lisp_Object Vskip_chars_range_table;
125
126 static void set_search_regs(struct buffer *buf, Bufpos beg, Charcount len);
127 static void clear_unused_search_regs(struct re_registers *regp, int no_sub);
128 static void save_search_regs(void);
129 static Bufpos simple_search(struct buffer *buf, Bufbyte * base_pat,
130                             Bytecount len, Bytind pos, Bytind lim,
131                             EMACS_INT n, Lisp_Object trt);
132 static Bufpos boyer_moore(struct buffer *buf, Bufbyte * base_pat,
133                           Bytecount len, Bytind pos, Bytind lim,
134                           EMACS_INT n, Lisp_Object trt,
135                           Lisp_Object inverse_trt, int charset_base);
136 static Bufpos search_buffer(struct buffer *buf, Lisp_Object str,
137                             Bufpos bufpos, Bufpos buflim, EMACS_INT n, int RE,
138                             Lisp_Object trt, Lisp_Object inverse_trt,
139                             int posix);
140
141 static void matcher_overflow(void)
142 {
143         error("Stack overflow in regexp matcher");
144 }
145
146 #ifdef EF_USE_COMPRE
147 EXFUN(Fget, 3);
148 EXFUN(Fput, 3);
149 EXFUN(Fremprop, 2);
150 #define COMPRE_T                struct re_pattern_buffer
151 #define COMPREP(obj)                                    \
152         (DYNACATP(obj) && EQ(XDYNACAT_TYPE(obj), Qcompre))
153 #define COMPRE_GET(obj) (COMPRE_T*)XDYNACAT(obj)->ptr
154 #define COMPRE_PUT(obj, rec)    do {                            \
155                 obj = make_dynacat((void*)rec);                 \
156                 set_dynacat_printer(obj, compre_prfun);         \
157                 set_dynacat_finaliser(obj, compre_finfun);      \
158                 XDYNACAT_TYPE(obj) = Qcompre;                   \
159         } while (0)
160 #define XCOMPRE_GET(obj)        XSTRING(obj)->compre
161 #define XCOMPRE_PUT(obj, b)     XSTRING(obj)->compre = b
162 #define XCOMPRE_REM(obj)        XSTRING(obj)->compre = Qnil
163
164 /* idenitifier for hard-cached rexp */
165 Lisp_Object Qcompre, Qcomprep;
166 static Lisp_Object cache_regexp(Lisp_Object, struct re_pattern_buffer*);
167 static COMPRE_T* make_compre(void)
168 #if defined(__GNUC__)
169         __attribute__((unused))
170 #endif
171         ;
172 static COMPRE_T* clone_compre(COMPRE_T*);
173 static inline void free_compre(COMPRE_T*);
174 #endif
175
176 /* Compile a regexp and signal a Lisp error if anything goes wrong.
177    PATTERN is the pattern to compile.
178    CP is the place to put the result.
179    TRANSLATE is a translation table for ignoring case, or NULL for none.
180    REGP is the structure that says where to store the "register"
181    values that will result from matching this pattern.
182    If it is 0, we should compile the pattern not to record any
183    subexpression bounds.
184    POSIX is nonzero if we want full backtracking (POSIX style)
185    for this pattern.  0 means backtrack only enough to get a valid match.  */
186
187 static int
188 compile_pattern_1(struct regexp_cache *cp, Lisp_Object pattern,
189                   Lisp_Object translate, struct re_registers *regp, int posix,
190                   Error_behavior errb)
191 {
192         const char *val;
193         reg_syntax_t old;
194
195         cp->regexp = Qnil;
196         cp->buf.translate = translate;
197         cp->posix = posix;
198         old = re_set_syntax(RE_SYNTAX_EMACS
199                             | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
200         val = (const char *)
201             re_compile_pattern((char *)XSTRING_DATA(pattern),
202                                XSTRING_LENGTH(pattern), &cp->buf);
203         re_set_syntax(old);
204         if (val) {
205                 maybe_signal_error(Qinvalid_regexp, list1(build_string(val)),
206                                    Qsearch, errb);
207                 return 0;
208         }
209
210         cp->regexp = Fcopy_sequence(pattern);
211         return 1;
212 }
213
214 #ifdef EF_USE_COMPRE
215 /* extremely simple hash table, no collision management is performed */
216 static struct re_pattern_buffer *
217 try_pattern_from_cache(Lisp_Object pattern,
218                        struct re_registers *regp,
219                        Lisp_Object translate,
220                        int posix,
221                        Error_behavior errb)
222 {
223         struct regexp_cache *cp;
224         hcode_t pathash;
225         int place;
226
227         pathash = internal_hash(pattern, 0);
228         place = (int)((pathash>>4) & REGEXP_CACHE_HASH_MASK);
229         cp = &(searchbufs[place]);
230         REGEXP_DEBUG_COMPRE_H("trying hash place: %d.\n", place);
231
232         if (!NILP(Fstring_equal(cp->regexp, pattern))
233             && EQ(cp->buf.translate, translate) && cp->posix == posix) {
234                 /* found the pig */
235                 REGEXP_DEBUG_COMPRE_H("match: %d.\n", place);
236                 ;
237         } else {
238                 /* collision or empty slot, dont care just put the
239                  * new regexp there */
240                 if (!compile_pattern_1(
241                             cp, pattern, translate, regp, posix, errb))
242                         return 0;
243                 REGEXP_DEBUG_COMPRE_H("collision: %d.\n", place);
244         }
245
246         return &cp->buf;
247 }
248 #else  /* !EF_USE_COMPRE */
249 static struct re_pattern_buffer *
250 try_pattern_from_cache(Lisp_Object pattern,
251                        struct re_registers *regp,
252                        Lisp_Object translate,
253                        int posix,
254                        Error_behavior errb)
255 {
256         struct regexp_cache *cp, **cpp;
257
258         for (cpp = &searchbuf_head;; cpp = &cp->next) {
259                 cp = *cpp;
260                 if (!NILP(Fstring_equal(cp->regexp, pattern))
261                     && EQ(cp->buf.translate, translate)
262                     && cp->posix == posix)
263                         break;
264
265                 /* If we're at the end of the cache, compile
266                  * into the last cell.  */
267                 if (cp->next == 0) {
268                         if (!compile_pattern_1(
269                                     cp, pattern, translate, regp, posix, errb))
270                                 return 0;
271                         break;
272                 }
273         }
274
275         /* When we get here, cp (aka *cpp) contains the compiled pattern,
276            either because we found it in the cache or because we just compiled it.
277            Move it to the front of the queue to mark it as most recently used.  */
278         *cpp = cp->next;
279         cp->next = searchbuf_head;
280         searchbuf_head = cp;
281
282         return &cp->buf;
283 }
284 #endif  /* EF_USE_COMPRE */
285
286 /* Compile a regexp if necessary, but first check to see if there's one in
287    the cache.
288    PATTERN is the pattern to compile.
289    TRANSLATE is a translation table for ignoring case, or NULL for none.
290    REGP is the structure that says where to store the "register"
291    values that will result from matching this pattern.
292    If it is 0, we should compile the pattern not to record any
293    subexpression bounds.
294    POSIX is nonzero if we want full backtracking (POSIX style)
295    for this pattern.  0 means backtrack only enough to get a valid match.  */
296
297 struct re_pattern_buffer *
298 compile_pattern(Lisp_Object pattern,
299                 struct re_registers *regp,
300                 Lisp_Object translate,
301                 int posix,
302                 Error_behavior errb)
303 {
304         struct re_pattern_buffer *result = NULL;
305
306 #ifdef EF_USE_COMPRE
307         Lisp_Object rc = XCOMPRE_GET(pattern);
308
309         if (!NILP(rc) && COMPREP(rc) &&
310             (result = COMPRE_GET(rc)) &&
311             result->re_ngroups >= 0 && result->re_ngroups < 256) {
312                 REGEXP_DEBUG_COMPRE_C("using cache: 0x%x.\n",
313                                       (unsigned int)result);
314                 ;
315         } else {
316 #endif
317                 result = try_pattern_from_cache(
318                         pattern, regp, translate, posix, errb);
319 #ifdef EF_USE_COMPRE
320                 cache_regexp(pattern, result);
321         }
322 #endif
323
324
325         /* Advise the searching functions about the space we have allocated
326            for register data.  */
327         if (regp)
328                 re_set_registers(result, regp, regp->num_regs,
329                                  regp->start, regp->end);
330
331         return result;
332 }
333
334 #ifdef EF_USE_COMPRE
335 static void
336 compre_prfun(Lisp_Object obj, Lisp_Object pcfun, int escflag)
337 {
338         write_fmt_str(pcfun, "#<compiled regexp %lx",
339                       (long unsigned int)((COMPRE_GET(obj))->buffer));
340         if (escflag);
341 }
342
343 static void
344 compre_finfun(Lisp_Object obj, int unused)
345 {
346         REGEXP_DEBUG_COMPRE_C("0x%lx@0x%lx will pass away\n",
347                               (long unsigned int)COMPRE_GET(obj),
348                               (long unsigned int)obj);
349         free_compre(COMPRE_GET(obj));
350 }
351
352 #if 1
353 static COMPRE_T *
354 make_compre(void)
355 {
356         COMPRE_T *result = xnew_and_zero(COMPRE_T);
357
358         result->fastmap = xmalloc_atomic(REGEXP_FASTMAP_SIZE);
359
360         return result;
361 }
362 #endif  /* 0 */
363
364 static COMPRE_T *
365 clone_compre(COMPRE_T *src)
366 {
367         COMPRE_T *result = xnew_and_zero(COMPRE_T);
368
369         /* alloc and clone fastmap */
370         result->fastmap = (char*)xmalloc_atomic(REGEXP_FASTMAP_SIZE);
371         memcpy(result->fastmap, src->fastmap, REGEXP_FASTMAP_SIZE);
372
373         /* alloc and clone buffer */
374         result->buffer = (unsigned char *)xmalloc_atomic(src->allocated);
375         memcpy(result->buffer, src->buffer, src->allocated);
376         result->allocated = src->allocated;
377         result->used = src->used;
378
379         result->syntax = src->syntax;
380         result->translate = src->translate;
381         result->re_nsub = src->re_nsub;
382         result->re_ngroups = src->re_ngroups;
383
384         result->can_be_null = src->can_be_null;
385         result->regs_allocated = src->regs_allocated;
386         result->fastmap_accurate = src->fastmap_accurate;
387
388         result->no_sub = src->no_sub;
389         result->not_bol = src->not_bol;
390         result->not_eol = src->not_eol;
391         result->newline_anchor = src->newline_anchor;
392
393         /* alloc and clone ext_to_int_register */
394         result->external_to_internal_register =
395                 (int*)xmalloc_atomic(
396                         sizeof(int) * src->external_to_internal_register_size);
397         memcpy(result->external_to_internal_register,
398                src->external_to_internal_register,
399                sizeof(int)*src->external_to_internal_register_size);
400         result->external_to_internal_register_size =
401                 src->external_to_internal_register_size;
402
403         return result;
404 }
405
406 static inline void
407 free_compre(COMPRE_T *buf)
408 {
409         if (buf->buffer) {
410                 xfree(buf->buffer);
411                 buf->buffer = NULL;
412         }
413         if (buf->fastmap) {
414                 xfree(buf->fastmap);
415                 buf->fastmap = NULL;
416         }
417         if (buf->external_to_internal_register) {
418                 xfree(buf->external_to_internal_register);
419                 buf->external_to_internal_register = NULL;
420         }
421         xfree(buf);
422 }
423
424 static Lisp_Object
425 cache_regexp(Lisp_Object regexp, COMPRE_T *buf)
426 {
427         Lisp_Object rc;
428         COMPRE_T *resbuf;
429
430         if (buf == NULL)
431                 return Qnil;
432
433         resbuf = clone_compre(buf);
434         COMPRE_PUT(rc, resbuf);
435         XCOMPRE_PUT(regexp, rc);
436
437         REGEXP_DEBUG_COMPRE_C("caching 0x%08x into 0x%08x\n",
438                               (unsigned int)resbuf, (unsigned int)rc);
439         return rc;
440 }
441
442 DEFUN("compile-regexp", Fcompile_regexp, 1, 1, 0, /*
443 Forcibly compile REGEXP and store the result in object-plist.
444 */
445       (regexp))
446 {
447         CHECK_STRING(regexp);
448
449         XCOMPRE_REM(regexp);
450         compile_pattern(regexp, &search_regs, Qnil, 0, ERROR_ME);
451
452         return regexp;
453 }
454
455 DEFUN("defregexp", Fdefregexp, 2, UNEVALLED, 0, /*
456 \(defregexp SYMBOL REGEXP DOCSTRING\)
457 Like `defconst' but for forcing compiled regexps.
458
459 The same restrictions that apply to `defconst' apply here in regard
460 to user variables.  You shouldn't use this for regular expressions
461 that a user might want to customise.  Instead, use `defcustom' with
462 :type 'regexp.
463 */
464       (args))
465 {
466         /* This function can GC */
467         Lisp_Object sym = XCAR(args);
468         Lisp_Object pat = Feval(XCAR(args = XCDR(args)));
469         struct gcpro gcpro1;
470
471         CHECK_STRING(pat);
472
473         GCPRO1(pat);
474         pat = Fcompile_regexp(pat);
475         Fset_default(sym, pat);
476         UNGCPRO;
477
478         if (!NILP(args = XCDR(args))) {
479                 Lisp_Object doc = XCAR(args);
480                 Fput(sym, Qvariable_documentation, doc);
481                 if (!NILP(args = XCDR(args)))
482                         error("too many arguments");
483         }
484 #ifdef I18N3
485         if (!NILP(Vfile_domain))
486                 Fput(sym, Qvariable_domain, Vfile_domain);
487 #endif  /* I18N3 */
488
489         LOADHIST_ATTACH(sym);
490         return sym;
491 }
492 #endif  /* EF_USE_COMPRE */
493
494 /* Error condition used for failing searches */
495 Lisp_Object Qsearch_failed;
496
497 static Lisp_Object signal_failure(Lisp_Object arg)
498 {
499         for (;;)
500                 Fsignal(Qsearch_failed, list1(arg));
501         return Qnil;            /* Not reached. */
502 }
503
504 /* Convert the search registers from Bytinds to Bufpos's.  Needs to be
505    done after each regexp match that uses the search regs.
506
507    We could get a potential speedup by not converting the search registers
508    until it's really necessary, e.g. when match-data or replace-match is
509    called.  However, this complexifies the code a lot (e.g. the buffer
510    could have changed and the Bytinds stored might be invalid) and is
511    probably not a great time-saver. */
512
513 static void fixup_search_regs_for_buffer(struct buffer *buf)
514 {
515         int i;
516         int num_regs = search_regs.num_regs;
517
518         for (i = 0; i < num_regs; i++) {
519                 if (search_regs.start[i] >= 0)
520                         search_regs.start[i] =
521                             bytind_to_bufpos(buf, search_regs.start[i]);
522                 if (search_regs.end[i] >= 0)
523                         search_regs.end[i] =
524                             bytind_to_bufpos(buf, search_regs.end[i]);
525         }
526 }
527
528 /* Similar but for strings. */
529 static void fixup_search_regs_for_string(Lisp_Object string)
530 {
531         int i;
532         int num_regs = search_regs.num_regs;
533
534         /* #### bytecount_to_charcount() is not that efficient.  This function
535            could be faster if it did its own conversion (using INC_CHARPTR()
536            and such), because the register ends are likely to be somewhat ordered.
537            (Even if not, you could sort them.)
538
539            Think about this if this function is a time hog, which it's probably
540            not. */
541         for (i = 0; i < num_regs; i++) {
542                 if (search_regs.start[i] > 0) {
543                         search_regs.start[i] =
544                             bytecount_to_charcount(XSTRING_DATA(string),
545                                                    search_regs.start[i]);
546                 }
547                 if (search_regs.end[i] > 0) {
548                         search_regs.end[i] =
549                             bytecount_to_charcount(XSTRING_DATA(string),
550                                                    search_regs.end[i]);
551                 }
552         }
553 }
554 \f
555 static Lisp_Object
556 looking_at_1(Lisp_Object string, struct buffer *buf, int posix)
557 {
558         /* This function has been Mule-ized, except for the trt table handling. */
559         Lisp_Object val;
560         Bytind p1, p2;
561         Bytecount s1, s2;
562         REGISTER int i;
563         struct re_pattern_buffer *bufp;
564
565         if (running_asynch_code)
566                 save_search_regs();
567
568         CHECK_STRING(string);
569         bufp = compile_pattern(string, &search_regs,
570                                (!NILP(buf->case_fold_search)
571                                 ? XCASE_TABLE_DOWNCASE(buf->case_table) : Qnil),
572                                posix, ERROR_ME);
573
574         QUIT;
575
576         /* Get pointers and sizes of the two strings
577            that make up the visible portion of the buffer. */
578
579         p1 = BI_BUF_BEGV(buf);
580         p2 = BI_BUF_CEILING_OF(buf, p1);
581         s1 = p2 - p1;
582         s2 = BI_BUF_ZV(buf) - p2;
583
584         regex_match_object = Qnil;
585         regex_emacs_buffer = buf;
586         i = re_match_2(bufp, (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
587                        s1, (char *)BI_BUF_BYTE_ADDRESS(buf, p2), s2,
588                        BI_BUF_PT(buf) - BI_BUF_BEGV(buf), &search_regs,
589                        BI_BUF_ZV(buf) - BI_BUF_BEGV(buf));
590
591         if (i == -2)
592                 matcher_overflow();
593
594         val = (0 <= i ? Qt : Qnil);
595         if (NILP(val))
596                 return Qnil;
597         {
598                 int num_regs = search_regs.num_regs;
599                 for (i = 0; i < num_regs; i++)
600                         if (search_regs.start[i] >= 0) {
601                                 search_regs.start[i] += BI_BUF_BEGV(buf);
602                                 search_regs.end[i] += BI_BUF_BEGV(buf);
603                         }
604         }
605         XSETBUFFER(last_thing_searched, buf);
606         fixup_search_regs_for_buffer(buf);
607         return val;
608 }
609
610 DEFUN("looking-at", Flooking_at, 1, 2, 0,       /*
611 Return t if text after point matches regular expression REGEXP.
612 This function modifies the match data that `match-beginning',
613 `match-end' and `match-data' access; save and restore the match
614 data if you want to preserve them.
615
616 Optional argument BUFFER defaults to the current buffer.
617 */
618       (regexp, buffer))
619 {
620         return looking_at_1(regexp, decode_buffer(buffer, 0), 0);
621 }
622
623 DEFUN("posix-looking-at", Fposix_looking_at, 1, 2, 0,   /*
624 Return t if text after point matches regular expression REGEXP.
625 Find the longest match, in accord with Posix regular expression rules.
626 This function modifies the match data that `match-beginning',
627 `match-end' and `match-data' access; save and restore the match
628 data if you want to preserve them.
629
630 Optional argument BUFFER defaults to the current buffer.
631 */
632       (regexp, buffer))
633 {
634         return looking_at_1(regexp, decode_buffer(buffer, 0), 1);
635 }
636 \f
637 static Lisp_Object
638 string_match_1(Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
639                struct buffer *buf, int posix)
640 {
641         /* This function has been Mule-ized, except for the trt table handling. */
642         Bytecount val;
643         Charcount s;
644         struct re_pattern_buffer *bufp;
645
646         if (running_asynch_code)
647                 save_search_regs();
648
649         CHECK_STRING(regexp);
650         CHECK_STRING(string);
651
652         if (NILP(start))
653                 s = 0;
654         else {
655                 Charcount len = XSTRING_CHAR_LENGTH(string);
656
657                 CHECK_INT(start);
658                 s = XINT(start);
659                 if (s < 0 && -s <= len)
660                         s = len + s;
661                 else if (0 > s || s > len)
662                         args_out_of_range(string, start);
663         }
664
665         bufp = compile_pattern(regexp, &search_regs,
666                                (!NILP(buf->case_fold_search)
667                                 ? XCASE_TABLE_DOWNCASE(buf->case_table) : Qnil),
668                                0, ERROR_ME);
669         QUIT;
670         {
671                 Bytecount bis = charcount_to_bytecount(XSTRING_DATA(string), s);
672                 regex_match_object = string;
673                 regex_emacs_buffer = buf;
674                 val = re_search(bufp, (char *)XSTRING_DATA(string),
675                                 XSTRING_LENGTH(string), bis,
676                                 XSTRING_LENGTH(string) - bis, &search_regs);
677         }
678         if (val == -2)
679                 matcher_overflow();
680         if (val < 0)
681                 return Qnil;
682         last_thing_searched = Qt;
683         fixup_search_regs_for_string(string);
684         return make_int(bytecount_to_charcount(XSTRING_DATA(string), val));
685 }
686
687 DEFUN("string-match", Fstring_match, 2, 4, 0,   /*
688 Return index of start of first match for REGEXP in STRING, or nil.
689 If third arg START is non-nil, start search at that index in STRING.
690 For index of first char beyond the match, do (match-end 0).
691 `match-end' and `match-beginning' also give indices of substrings
692 matched by parenthesis constructs in the pattern.
693
694 Optional arg BUFFER controls how case folding is done (according to
695 the value of `case-fold-search' in that buffer and that buffer's case
696 tables) and defaults to the current buffer.
697 */
698       (regexp, string, start, buffer))
699 {
700         return string_match_1(regexp, string, start, decode_buffer(buffer, 0),
701                               0);
702 }
703
704 DEFUN("posix-string-match", Fposix_string_match, 2, 4, 0,       /*
705 Return index of start of first match for REGEXP in STRING, or nil.
706 Find the longest match, in accord with Posix regular expression rules.
707 If third arg START is non-nil, start search at that index in STRING.
708 For index of first char beyond the match, do (match-end 0).
709 `match-end' and `match-beginning' also give indices of substrings
710 matched by parenthesis constructs in the pattern.
711
712 Optional arg BUFFER controls how case folding is done (according to
713 the value of `case-fold-search' in that buffer and that buffer's case
714 tables) and defaults to the current buffer.
715 */
716       (regexp, string, start, buffer))
717 {
718         return string_match_1(regexp, string, start, decode_buffer(buffer, 0),
719                               1);
720 }
721
722 /* Match REGEXP against STRING, searching all of STRING,
723    and return the index of the match, or negative on failure.
724    This does not clobber the match data. */
725
726 Bytecount
727 fast_string_match(Lisp_Object regexp, const Bufbyte * nonreloc,
728                   Lisp_Object reloc, Bytecount offset,
729                   Bytecount length, int case_fold_search,
730                   Error_behavior errb, int no_quit)
731 {
732         /* This function has been Mule-ized, except for the trt table handling. */
733         Bytecount val;
734         const Bufbyte *newnonreloc = (const Bufbyte*)nonreloc;
735         struct re_pattern_buffer *bufp;
736
737         bufp = compile_pattern(regexp, 0,
738                                (case_fold_search
739                                 ? XCASE_TABLE_DOWNCASE(current_buffer->
740                                                        case_table)
741                                 : Qnil), 0, errb);
742         if (!bufp)
743                 return -1;      /* will only do this when errb != ERROR_ME */
744         if (!no_quit)
745                 QUIT;
746         else
747                 no_quit_in_re_search = 1;
748
749         fixup_internal_substring(nonreloc, reloc, offset, &length);
750
751
752
753         if (!NILP(reloc)) {
754                 if (no_quit) {
755                         newnonreloc = XSTRING_DATA(reloc);
756                 } else {
757                         /* QUIT could relocate RELOC.  Therefore we must
758                            alloca() and copy.  No way around this except some
759                            serious rewriting of re_search(). */
760                         /* yeah, let's rewrite this bugger, the warning
761                            hereafter is inevitable too */
762                         if ( length < 0)
763                                 /* By this point
764                                    fixup_internal_substring should
765                                    have updated length, if it didn't
766                                    return with failure...
767                                 */
768                                 return -1;
769                         newnonreloc = alloca(length);
770                         memcpy((void*)newnonreloc, (void*)XSTRING_DATA(reloc), length);
771                 }
772         }
773
774         /* #### evil current-buffer dependency */
775         regex_match_object = reloc;
776         regex_emacs_buffer = current_buffer;
777         val = re_search(bufp, (const char*)newnonreloc + offset, length, 0,
778                         length, 0);
779
780         no_quit_in_re_search = 0;
781         return val;
782 }
783
784 Bytecount fast_lisp_string_match(Lisp_Object regex, Lisp_Object string)
785 {
786         return fast_string_match(regex, 0, string, 0, -1, 0, ERROR_ME, 0);
787 }
788 \f
789 #ifdef REGION_CACHE_NEEDS_WORK
790 /* The newline cache: remembering which sections of text have no newlines.  */
791
792 /* If the user has requested newline caching, make sure it's on.
793    Otherwise, make sure it's off.
794    This is our cheezy way of associating an action with the change of
795    state of a buffer-local variable.  */
796 static void newline_cache_on_off(struct buffer *buf)
797 {
798         if (NILP(buf->cache_long_line_scans)) {
799                 /* It should be off.  */
800                 if (buf->newline_cache) {
801                         free_region_cache(buf->newline_cache);
802                         buf->newline_cache = 0;
803                 }
804         } else {
805                 /* It should be on.  */
806                 if (buf->newline_cache == 0)
807                         buf->newline_cache = new_region_cache();
808         }
809 }
810 #endif
811 \f
812 /* Search in BUF for COUNT instances of the character TARGET between
813    START and END.
814
815    If COUNT is positive, search forwards; END must be >= START.
816    If COUNT is negative, search backwards for the -COUNTth instance;
817       END must be <= START.
818    If COUNT is zero, do anything you please; run rogue, for all I care.
819
820    If END is zero, use BEGV or ZV instead, as appropriate for the
821    direction indicated by COUNT.
822
823    If we find COUNT instances, set *SHORTAGE to zero, and return the
824    position after the COUNTth match.  Note that for reverse motion
825    this is not the same as the usual convention for Emacs motion commands.
826
827    If we don't find COUNT instances before reaching END, set *SHORTAGE
828    to the number of TARGETs left unfound, and return END.
829
830    If ALLOW_QUIT is non-zero, call QUIT periodically. */
831
832 static Bytind
833 bi_scan_buffer(struct buffer *buf, Emchar target, Bytind st, Bytind en,
834                EMACS_INT count, EMACS_INT * shortage, int allow_quit)
835 {
836         /* This function has been Mule-ized. */
837         Bytind lim = en > 0 ? en :
838             ((count > 0) ? BI_BUF_ZV(buf) : BI_BUF_BEGV(buf));
839
840         /* #### newline cache stuff in this function not yet ported */
841
842         assert(count != 0);
843
844         if (shortage)
845                 *shortage = 0;
846
847         if (count > 0) {
848 #ifdef MULE
849                 /* Due to the Mule representation of characters in a buffer,
850                    we can simply search for characters in the range 0 - 127
851                    directly.  For other characters, we do it the "hard" way.
852                    Note that this way works for all characters but the other
853                    way is faster. */
854                 if (target >= 0200) {
855                         while (st < lim && count > 0) {
856                                 if (BI_BUF_FETCH_CHAR(buf, st) == target)
857                                         count--;
858                                 INC_BYTIND(buf, st);
859                         }
860                 } else
861 #endif
862                 {
863                         while (st < lim && count > 0) {
864                                 Bytind _ceil_;
865                                 Bufbyte *bufptr;
866
867                                 _ceil_ = BI_BUF_CEILING_OF(buf, st);
868                                 _ceil_ = min(lim, _ceil_);
869                                 bufptr =
870                                     (Bufbyte *)
871                                     memchr(BI_BUF_BYTE_ADDRESS(buf, st),
872                                            (int)target, _ceil_ - st);
873                                 if (bufptr) {
874                                         count--;
875                                         st = BI_BUF_PTR_BYTE_POS(buf,
876                                                                  bufptr) + 1;
877                                 } else
878                                         st = _ceil_;
879                         }
880                 }
881
882                 if (shortage)
883                         *shortage = count;
884                 if (allow_quit)
885                         QUIT;
886                 return st;
887         } else {
888 #ifdef MULE
889                 if (target >= 0200) {
890                         while (st > lim && count < 0) {
891                                 DEC_BYTIND(buf, st);
892                                 if (BI_BUF_FETCH_CHAR(buf, st) == target)
893                                         count++;
894                         }
895                 } else
896 #endif
897                 {
898                         while (st > lim && count < 0) {
899                                 Bytind _floor_;
900                                 Bufbyte *bufptr;
901                                 Bufbyte *floorptr;
902
903                                 _floor_ = BI_BUF_FLOOR_OF(buf, st);
904                                 _floor_ = max(lim, _floor_);
905                                 /* No memrchr() ... */
906                                 bufptr = BI_BUF_BYTE_ADDRESS_BEFORE(buf, st);
907                                 floorptr = BI_BUF_BYTE_ADDRESS(buf, _floor_);
908                                 while (bufptr >= floorptr) {
909                                         st--;
910                                         /* At this point, both ST and BUFPTR
911                                            refer to the same character.  When
912                                            the loop terminates, ST will always
913                                            point to the last character we
914                                            tried. */
915                                         if (*(unsigned char *)bufptr ==
916                                             (unsigned char)target) {
917                                                 count++;
918                                                 break;
919                                         }
920                                         bufptr--;
921                                 }
922                         }
923                 }
924
925                 if (shortage)
926                         *shortage = -count;
927                 if (allow_quit)
928                         QUIT;
929                 if (count)
930                         return st;
931                 else {
932                         /* We found the character we were looking for; we have to return
933                            the position *after* it due to the strange way that the return
934                            value is defined. */
935                         INC_BYTIND(buf, st);
936                         return st;
937                 }
938         }
939 }
940
941 Bufpos
942 scan_buffer(struct buffer * buf, Emchar target, Bufpos start, Bufpos end,
943             EMACS_INT count, EMACS_INT * shortage, int allow_quit)
944 {
945         Bytind bi_retval;
946         Bytind bi_start, bi_end;
947
948         bi_start = bufpos_to_bytind(buf, start);
949         if (end)
950                 bi_end = bufpos_to_bytind(buf, end);
951         else
952                 bi_end = 0;
953         bi_retval = bi_scan_buffer(buf, target, bi_start, bi_end, count,
954                                    shortage, allow_quit);
955         return bytind_to_bufpos(buf, bi_retval);
956 }
957
958 Bytind bi_find_next_newline_no_quit(struct buffer * buf, Bytind from, int count)
959 {
960         return bi_scan_buffer(buf, '\n', from, 0, count, 0, 0);
961 }
962
963 Bufpos find_next_newline_no_quit(struct buffer * buf, Bufpos from, int count)
964 {
965         return scan_buffer(buf, '\n', from, 0, count, 0, 0);
966 }
967
968 Bufpos find_next_newline(struct buffer * buf, Bufpos from, int count)
969 {
970         return scan_buffer(buf, '\n', from, 0, count, 0, 1);
971 }
972
973 Bytind
974 bi_find_next_emchar_in_string(Lisp_String * str, Emchar target, Bytind st,
975                               EMACS_INT count)
976 {
977         /* This function has been Mule-ized. */
978         Bytind lim = string_length(str) - 1;
979         Bufbyte *s = string_data(str);
980
981         assert(count >= 0);
982
983 #ifdef MULE
984         /* Due to the Mule representation of characters in a buffer,
985            we can simply search for characters in the range 0 - 127
986            directly.  For other characters, we do it the "hard" way.
987            Note that this way works for all characters but the other
988            way is faster. */
989         if (target >= 0200) {
990                 while (st < lim && count > 0) {
991                         if (string_char(str, st) == target)
992                                 count--;
993                         INC_CHARBYTIND(s, st);
994                 }
995         } else
996 #endif
997         {
998                 while (st < lim && count > 0) {
999                         Bufbyte *bufptr =
1000                             (Bufbyte *) memchr(charptr_n_addr(s, st),
1001                                                (int)target, lim - st);
1002                         if (bufptr) {
1003                                 count--;
1004                                 st = (Bytind) (bufptr - s) + 1;
1005                         } else
1006                                 st = lim;
1007                 }
1008         }
1009         return st;
1010 }
1011
1012 /* Like find_next_newline, but returns position before the newline,
1013    not after, and only search up to TO.  This isn't just
1014    find_next_newline (...)-1, because you might hit TO.  */
1015 Bufpos
1016 find_before_next_newline(struct buffer * buf, Bufpos from, Bufpos to, int count)
1017 {
1018         EMACS_INT shortage;
1019         Bufpos pos = scan_buffer(buf, '\n', from, to, count, &shortage, 1);
1020
1021         if (shortage == 0)
1022                 pos--;
1023
1024         return pos;
1025 }
1026 \f
1027 /* This function synched with FSF 21.1 */
1028 static Lisp_Object
1029 skip_chars(struct buffer *buf, int forwardp, int syntaxp,
1030            Lisp_Object string, Lisp_Object lim)
1031 {
1032         /* This function has been Mule-ized. */
1033         REGISTER Bufbyte *p, *pend;
1034         REGISTER Emchar c;
1035         /* We store the first 256 chars in an array here and the rest in
1036            a range table. */
1037         unsigned char fastmap[REGEXP_FASTMAP_SIZE];
1038         int negate = 0;
1039         REGISTER int i;
1040 #ifndef emacs
1041         Lisp_Char_Table *syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
1042 #endif
1043         Bufpos limit;
1044
1045         if (NILP(lim))
1046                 limit = forwardp ? BUF_ZV(buf) : BUF_BEGV(buf);
1047         else {
1048                 CHECK_INT_COERCE_MARKER(lim);
1049                 limit = XINT(lim);
1050
1051                 /* In any case, don't allow scan outside bounds of buffer.  */
1052                 if (limit > BUF_ZV(buf))
1053                         limit = BUF_ZV(buf);
1054                 if (limit < BUF_BEGV(buf))
1055                         limit = BUF_BEGV(buf);
1056         }
1057
1058         CHECK_STRING(string);
1059         p = XSTRING_DATA(string);
1060         pend = p + XSTRING_LENGTH(string);
1061         memset(fastmap, 0, sizeof(fastmap));
1062
1063         Fclear_range_table(Vskip_chars_range_table);
1064
1065         if (p != pend && *p == '^') {
1066                 negate = 1;
1067                 p++;
1068         }
1069
1070         /* Find the characters specified and set their elements of fastmap.
1071            If syntaxp, each character counts as itself.
1072            Otherwise, handle backslashes and ranges specially  */
1073
1074         while (p != pend) {
1075                 c = charptr_emchar(p);
1076                 INC_CHARPTR(p);
1077                 if (syntaxp) {
1078                         if (c < REGEXP_FASTMAP_SIZE
1079                             && syntax_spec_code[c] < (unsigned char)Smax)
1080                                 fastmap[c] = 1;
1081                         else
1082                                 signal_simple_error("Invalid syntax designator",
1083                                                     make_char(c));
1084                 } else {
1085                         if (c == '\\') {
1086                                 if (p == pend)
1087                                         break;
1088                                 c = charptr_emchar(p);
1089                                 INC_CHARPTR(p);
1090                         }
1091                         if (p != pend && *p == '-') {
1092                                 Emchar cend;
1093
1094                                 /* Skip over the dash.  */
1095                                 p++;
1096                                 if (p == pend)
1097                                         break;
1098                                 cend = charptr_emchar(p);
1099                                 while (c <= cend && c < REGEXP_FASTMAP_SIZE) {
1100                                         fastmap[c] = 1;
1101                                         c++;
1102                                 }
1103                                 if (c <= cend)
1104                                         Fput_range_table(make_int(c),
1105                                                          make_int(cend), Qt,
1106                                                          Vskip_chars_range_table);
1107                                 INC_CHARPTR(p);
1108                         } else {
1109                                 if (c < REGEXP_FASTMAP_SIZE)
1110                                         fastmap[c] = 1;
1111                                 else
1112                                         Fput_range_table(make_int(c),
1113                                                          make_int(c), Qt,
1114                                                          Vskip_chars_range_table);
1115                         }
1116                 }
1117         }
1118
1119         /* #### Not in FSF 21.1 */
1120         if (syntaxp && fastmap['-'] != 0)
1121                 fastmap[' '] = 1;
1122
1123         /* If ^ was the first character, complement the fastmap.
1124            We don't complement the range table, however; we just use negate
1125            in the comparisons below. */
1126
1127         if (negate)
1128                 for (i = 0; i < (int)(sizeof fastmap); i++)
1129                         fastmap[i] ^= 1;
1130
1131         {
1132                 Bufpos start_point = BUF_PT(buf);
1133                 Bufpos pos = start_point;
1134                 Bytind pos_byte = BI_BUF_PT(buf);
1135
1136                 if (syntaxp) {
1137                         SETUP_SYNTAX_CACHE_FOR_BUFFER(buf, pos,
1138                                                       forwardp ? 1 : -1);
1139                         /* All syntax designators are normal chars so nothing strange
1140                            to worry about */
1141                         if (forwardp) {
1142                                 if (pos < limit)
1143                                         while (fastmap[(unsigned char)
1144                                                        syntax_code_spec
1145                                                        [(int)SYNTAX_FROM_CACHE
1146                                                         (syntax_table,
1147                                                          BI_BUF_FETCH_CHAR(buf,
1148                                                                            pos_byte))]])
1149                                         {
1150                                                 pos++;
1151                                                 INC_BYTIND(buf, pos_byte);
1152                                                 if (pos >= limit)
1153                                                         break;
1154                                                 UPDATE_SYNTAX_CACHE_FORWARD
1155                                                     (pos);
1156                                         }
1157                         } else {
1158                                 while (pos > limit) {
1159                                         Bufpos savepos = pos_byte;
1160                                         pos--;
1161                                         DEC_BYTIND(buf, pos_byte);
1162                                         UPDATE_SYNTAX_CACHE_BACKWARD(pos);
1163                                         if (!fastmap[(unsigned char)
1164                                                      syntax_code_spec
1165                                                      [(int)SYNTAX_FROM_CACHE
1166                                                       (syntax_table,
1167                                                        BI_BUF_FETCH_CHAR(buf,
1168                                                                          pos_byte))]])
1169                                         {
1170                                                 pos++;
1171                                                 pos_byte = savepos;
1172                                                 break;
1173                                         }
1174                                 }
1175                         }
1176                 } else {
1177                         if (forwardp) {
1178                                 while (pos < limit) {
1179                                         Emchar ch =
1180                                             BI_BUF_FETCH_CHAR(buf, pos_byte);
1181                                         if ((ch <
1182                                              REGEXP_FASTMAP_SIZE) ? fastmap[ch]
1183                                             : (NILP
1184                                                (Fget_range_table
1185                                                 (make_int(ch),
1186                                                  Vskip_chars_range_table, Qnil))
1187                                                == negate)) {
1188                                                 pos++;
1189                                                 INC_BYTIND(buf, pos_byte);
1190                                         } else
1191                                                 break;
1192                                 }
1193                         } else {
1194                                 while (pos > limit) {
1195                                         Bufpos prev_pos_byte = pos_byte;
1196                                         Emchar ch;
1197
1198                                         DEC_BYTIND(buf, prev_pos_byte);
1199                                         ch = BI_BUF_FETCH_CHAR(buf,
1200                                                                prev_pos_byte);
1201                                         if ((ch <
1202                                              REGEXP_FASTMAP_SIZE) ? fastmap[ch]
1203                                             : (NILP
1204                                                (Fget_range_table
1205                                                 (make_int(ch),
1206                                                  Vskip_chars_range_table, Qnil))
1207                                                == negate)) {
1208                                                 pos--;
1209                                                 pos_byte = prev_pos_byte;
1210                                         } else
1211                                                 break;
1212                                 }
1213                         }
1214                 }
1215                 QUIT;
1216                 BOTH_BUF_SET_PT(buf, pos, pos_byte);
1217                 return make_int(BUF_PT(buf) - start_point);
1218         }
1219 }
1220
1221 DEFUN("skip-chars-forward", Fskip_chars_forward, 1, 3, 0,       /*
1222 Move point forward, stopping before a char not in STRING, or at pos LIMIT.
1223 STRING is like the inside of a `[...]' in a regular expression
1224 except that `]' is never special and `\\' quotes `^', `-' or `\\'.
1225 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1226 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1227 Returns the distance traveled, either zero or positive.
1228
1229 Optional argument BUFFER defaults to the current buffer.
1230 */
1231       (string, limit, buffer))
1232 {
1233         return skip_chars(decode_buffer(buffer, 0), 1, 0, string, limit);
1234 }
1235
1236 DEFUN("skip-chars-backward", Fskip_chars_backward, 1, 3, 0,     /*
1237 Move point backward, stopping after a char not in STRING, or at pos LIMIT.
1238 See `skip-chars-forward' for details.
1239 Returns the distance traveled, either zero or negative.
1240
1241 Optional argument BUFFER defaults to the current buffer.
1242 */
1243       (string, limit, buffer))
1244 {
1245         return skip_chars(decode_buffer(buffer, 0), 0, 0, string, limit);
1246 }
1247
1248 DEFUN("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0,     /*
1249 Move point forward across chars in specified syntax classes.
1250 SYNTAX is a string of syntax code characters.
1251 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT.
1252 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1253 This function returns the distance traveled, either zero or positive.
1254
1255 Optional argument BUFFER defaults to the current buffer.
1256 */
1257       (syntax, limit, buffer))
1258 {
1259         return skip_chars(decode_buffer(buffer, 0), 1, 1, syntax, limit);
1260 }
1261
1262 DEFUN("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0,   /*
1263 Move point backward across chars in specified syntax classes.
1264 SYNTAX is a string of syntax code characters.
1265 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT.
1266 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1267 This function returns the distance traveled, either zero or negative.
1268
1269 Optional argument BUFFER defaults to the current buffer.
1270 */
1271       (syntax, limit, buffer))
1272 {
1273         return skip_chars(decode_buffer(buffer, 0), 0, 1, syntax, limit);
1274 }
1275 \f
1276 /* Subroutines of Lisp buffer search functions. */
1277
1278 static Lisp_Object
1279 search_command(Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
1280                Lisp_Object count, Lisp_Object buffer, int direction,
1281                int RE, int posix)
1282 {
1283         /* This function has been Mule-ized, except for the trt table handling. */
1284         REGISTER Bufpos np;
1285         Bufpos lim;
1286         EMACS_INT n = direction;
1287         struct buffer *buf;
1288
1289         if (!NILP(count)) {
1290                 CHECK_INT(count);
1291                 n *= XINT(count);
1292         }
1293
1294         buf = decode_buffer(buffer, 0);
1295         CHECK_STRING(string);
1296         if (NILP(limit))
1297                 lim = n > 0 ? BUF_ZV(buf) : BUF_BEGV(buf);
1298         else {
1299                 CHECK_INT_COERCE_MARKER(limit);
1300                 lim = XINT(limit);
1301                 if (n > 0 ? lim < BUF_PT(buf) : lim > BUF_PT(buf))
1302                         error("Invalid search limit (wrong side of point)");
1303                 if (lim > BUF_ZV(buf))
1304                         lim = BUF_ZV(buf);
1305                 if (lim < BUF_BEGV(buf))
1306                         lim = BUF_BEGV(buf);
1307         }
1308
1309         np = search_buffer(buf, string, BUF_PT(buf), lim, n, RE,
1310                            (!NILP(buf->case_fold_search)
1311                             ? XCASE_TABLE_CANON(buf->case_table)
1312                             : Qnil), (!NILP(buf->case_fold_search)
1313                                       ? XCASE_TABLE_EQV(buf->case_table)
1314                                       : Qnil), posix);
1315
1316         if (np <= 0) {
1317                 if (NILP(noerror))
1318                         return signal_failure(string);
1319                 if (!EQ(noerror, Qt)) {
1320                         if (lim < BUF_BEGV(buf) || lim > BUF_ZV(buf))
1321                                 abort();
1322                         BUF_SET_PT(buf, lim);
1323                         return Qnil;
1324 #if 0                           /* This would be clean, but maybe programs depend on
1325                                    a value of nil here.  */
1326                         np = lim;
1327 #endif
1328                 } else
1329                         return Qnil;
1330         }
1331
1332         if (np < BUF_BEGV(buf) || np > BUF_ZV(buf))
1333                 abort();
1334
1335         BUF_SET_PT(buf, np);
1336
1337         return make_int(np);
1338 }
1339 \f
1340 static int trivial_regexp_p(Lisp_Object regexp)
1341 {
1342         /* This function has been Mule-ized. */
1343         Bytecount len = XSTRING_LENGTH(regexp);
1344         Bufbyte *s = XSTRING_DATA(regexp);
1345         while (--len >= 0) {
1346                 switch (*s++) {
1347                         /* ']' doesn't appear here because it's only special after ] */
1348                 case '.':
1349                 case '*':
1350                 case '+':
1351                 case '?':
1352                 case '[':
1353                 case '^':
1354                 case '$':
1355                         return 0;
1356                 case '\\':
1357                         if (--len < 0)
1358                                 return 0;
1359                         switch (*s++) {
1360                         case '|':
1361                         case '(':
1362                         case ')':
1363                         case '`':
1364                         case '\'':
1365                         case 'b':
1366                         case 'B':
1367                         case '<':
1368                         case '>':
1369                         case 'w':
1370                         case 'W':
1371                         case 's':
1372                         case 'S':
1373                         case '=':
1374                         case '{':
1375                         case '}':
1376 #ifdef MULE
1377                                 /* 97/2/25 jhod Added for category matches */
1378                         case 'c':
1379                         case 'C':
1380 #endif                          /* MULE */
1381                         case '1':
1382                         case '2':
1383                         case '3':
1384                         case '4':
1385                         case '5':
1386                         case '6':
1387                         case '7':
1388                         case '8':
1389                         case '9':
1390                                 return 0;
1391                         default:
1392                                 break;
1393                         }
1394                 default:
1395                         break;
1396                 }
1397         }
1398         return 1;
1399 }
1400
1401 /* Search for the n'th occurrence of STRING in BUF,
1402    starting at position BUFPOS and stopping at position BUFLIM,
1403    treating PAT as a literal string if RE is false or as
1404    a regular expression if RE is true.
1405
1406    If N is positive, searching is forward and BUFLIM must be greater
1407    than BUFPOS.
1408    If N is negative, searching is backward and BUFLIM must be less
1409    than BUFPOS.
1410
1411    Returns -x if only N-x occurrences found (x > 0),
1412    or else the position at the beginning of the Nth occurrence
1413    (if searching backward) or the end (if searching forward).
1414
1415    POSIX is nonzero if we want full backtracking (POSIX style)
1416    for this pattern.  0 means backtrack only enough to get a valid match.  */
1417 static Bufpos
1418 search_buffer(struct buffer *buf, Lisp_Object string, Bufpos bufpos,
1419               Bufpos buflim, EMACS_INT n, int RE, Lisp_Object trt,
1420               Lisp_Object inverse_trt, int posix)
1421 {
1422         /* This function has been Mule-ized, except for the trt table handling. */
1423         Bytecount len = XSTRING_LENGTH(string);
1424         Bufbyte *base_pat = XSTRING_DATA(string);
1425         REGISTER EMACS_INT i, j;
1426         Bytind p1, p2;
1427         Bytecount s1, s2;
1428         Bytind pos, lim;
1429
1430         if (running_asynch_code)
1431                 save_search_regs();
1432
1433         /* Null string is found at starting position.  */
1434         if (len == 0) {
1435                 set_search_regs(buf, bufpos, 0);
1436                 clear_unused_search_regs(&search_regs, 0);
1437                 return bufpos;
1438         }
1439
1440         /* Searching 0 times means noop---don't move, don't touch registers.  */
1441         if (n == 0)
1442                 return bufpos;
1443
1444         pos = bufpos_to_bytind(buf, bufpos);
1445         lim = bufpos_to_bytind(buf, buflim);
1446         if (RE && !trivial_regexp_p(string)) {
1447                 struct re_pattern_buffer *bufp;
1448
1449                 bufp = compile_pattern(string, &search_regs, trt, posix,
1450                                        ERROR_ME);
1451
1452                 /* Get pointers and sizes of the two strings
1453                    that make up the visible portion of the buffer. */
1454
1455                 p1 = BI_BUF_BEGV(buf);
1456                 p2 = BI_BUF_CEILING_OF(buf, p1);
1457                 s1 = p2 - p1;
1458                 s2 = BI_BUF_ZV(buf) - p2;
1459                 regex_match_object = Qnil;
1460
1461                 while (n < 0) {
1462                         Bytecount val;
1463                         QUIT;
1464                         regex_emacs_buffer = buf;
1465                         val = re_search_2(bufp,
1466                                           (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
1467                                           s1, (char *)BI_BUF_BYTE_ADDRESS(buf,
1468                                                                           p2),
1469                                           s2, pos - BI_BUF_BEGV(buf), lim - pos,
1470                                           &search_regs, pos - BI_BUF_BEGV(buf));
1471
1472                         if (val == -2) {
1473                                 matcher_overflow();
1474                         }
1475                         if (val >= 0) {
1476                                 int num_regs = search_regs.num_regs;
1477                                 j = BI_BUF_BEGV(buf);
1478                                 for (i = 0; i < num_regs; i++)
1479                                         if (search_regs.start[i] >= 0) {
1480                                                 search_regs.start[i] += j;
1481                                                 search_regs.end[i] += j;
1482                                         }
1483                                 /* re_match (called from re_search et al) does this for us */
1484                                 /* clear_unused_search_regs (search_regs, bufp->no_sub);   */
1485                                 XSETBUFFER(last_thing_searched, buf);
1486                                 /* Set pos to the new position. */
1487                                 pos = search_regs.start[0];
1488                                 fixup_search_regs_for_buffer(buf);
1489                                 /* And bufpos too. */
1490                                 bufpos = search_regs.start[0];
1491                         } else {
1492                                 return n;
1493                         }
1494                         n++;
1495                 }
1496                 while (n > 0) {
1497                         Bytecount val;
1498                         QUIT;
1499                         regex_emacs_buffer = buf;
1500                         val = re_search_2(bufp,
1501                                           (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
1502                                           s1, (char *)BI_BUF_BYTE_ADDRESS(buf,
1503                                                                           p2),
1504                                           s2, pos - BI_BUF_BEGV(buf), lim - pos,
1505                                           &search_regs, lim - BI_BUF_BEGV(buf));
1506                         if (val == -2) {
1507                                 matcher_overflow();
1508                         }
1509                         if (val >= 0) {
1510                                 int num_regs = search_regs.num_regs;
1511                                 j = BI_BUF_BEGV(buf);
1512                                 for (i = 0; i < num_regs; i++)
1513                                         if (search_regs.start[i] >= 0) {
1514                                                 search_regs.start[i] += j;
1515                                                 search_regs.end[i] += j;
1516                                         }
1517                                 /* re_match (called from re_search et al) does this for us */
1518                                 /* clear_unused_search_regs (search_regs, bufp->no_sub);   */
1519                                 XSETBUFFER(last_thing_searched, buf);
1520                                 /* Set pos to the new position. */
1521                                 pos = search_regs.end[0];
1522                                 fixup_search_regs_for_buffer(buf);
1523                                 /* And bufpos too. */
1524                                 bufpos = search_regs.end[0];
1525                         } else {
1526                                 return 0 - n;
1527                         }
1528                         n--;
1529                 }
1530                 return bufpos;
1531         } else {                /* non-RE case */
1532
1533                 int charset_base = -1;
1534                 int boyer_moore_ok = 1;
1535                 Bufbyte *pat = 0;
1536                 Bufbyte *patbuf = alloca_array(Bufbyte, len * MAX_EMCHAR_LEN);
1537                 pat = patbuf;
1538 #ifdef MULE
1539                 while (len > 0) {
1540                         Bufbyte tmp_str[MAX_EMCHAR_LEN];
1541                         Emchar c, translated, inverse;
1542                         Bytecount orig_bytelen, new_bytelen, inv_bytelen;
1543
1544                         /* If we got here and the RE flag is set, it's because
1545                            we're dealing with a regexp known to be trivial, so the
1546                            backslash just quotes the next character.  */
1547                         if (RE && *base_pat == '\\') {
1548                                 len--;
1549                                 base_pat++;
1550                         }
1551                         c = charptr_emchar(base_pat);
1552                         translated = TRANSLATE(trt, c);
1553                         inverse = TRANSLATE(inverse_trt, c);
1554
1555                         orig_bytelen = charcount_to_bytecount(base_pat, 1);
1556                         inv_bytelen = set_charptr_emchar(tmp_str, inverse);
1557                         new_bytelen = set_charptr_emchar(tmp_str, translated);
1558
1559                         if (new_bytelen != orig_bytelen
1560                             || inv_bytelen != orig_bytelen)
1561                                 boyer_moore_ok = 0;
1562                         if (translated != c || inverse != c) {
1563                                 /* Keep track of which character set row
1564                                    contains the characters that need translation.  */
1565                                 int charset_base_code = c & ~CHAR_FIELD3_MASK;
1566                                 if (charset_base == -1)
1567                                         charset_base = charset_base_code;
1568                                 else if (charset_base != charset_base_code)
1569                                         /* If two different rows appear, needing translation,
1570                                            then we cannot use boyer_moore search.  */
1571                                         boyer_moore_ok = 0;
1572                         }
1573                         memcpy(pat, tmp_str, new_bytelen);
1574                         pat += new_bytelen;
1575                         base_pat += orig_bytelen;
1576                         len -= orig_bytelen;
1577                 }
1578 #else                           /* not MULE */
1579                 while (--len >= 0) {
1580                         /* If we got here and the RE flag is set, it's because
1581                            we're dealing with a regexp known to be trivial, so the
1582                            backslash just quotes the next character.  */
1583                         if (RE && *base_pat == '\\') {
1584                                 len--;
1585                                 base_pat++;
1586                         }
1587                         *pat++ = TRANSLATE(trt, *base_pat++);
1588                 }
1589 #endif                          /* MULE */
1590                 len = pat - patbuf;
1591                 pat = base_pat = patbuf;
1592                 if (boyer_moore_ok)
1593                         return boyer_moore(buf, base_pat, len, pos, lim, n,
1594                                            trt, inverse_trt, charset_base);
1595                 else
1596                         return simple_search(buf, base_pat, len, pos, lim, n,
1597                                              trt);
1598         }
1599 }
1600
1601 /* Do a simple string search N times for the string PAT,
1602    whose length is LEN/LEN_BYTE,
1603    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1604    TRT is the translation table.
1605
1606    Return the character position where the match is found.
1607    Otherwise, if M matches remained to be found, return -M.
1608
1609    This kind of search works regardless of what is in PAT and
1610    regardless of what is in TRT.  It is used in cases where
1611    boyer_moore cannot work.  */
1612
1613 static Bufpos
1614 simple_search(struct buffer *buf, Bufbyte * base_pat, Bytecount len_byte,
1615               Bytind idx, Bytind lim, EMACS_INT n, Lisp_Object trt)
1616 {
1617         int forward = n > 0;
1618         Bytecount buf_len = 0;  /* Shut up compiler. */
1619
1620         if (lim > idx) {
1621                 while (n > 0) {
1622                         while (1) {
1623                                 Bytecount this_len = len_byte;
1624                                 Bytind this_idx = idx;
1625                                 const Bufbyte *p = base_pat;
1626                                 if (idx >= lim)
1627                                         goto stop;
1628
1629                                 while (this_len > 0) {
1630                                         Emchar pat_ch, buf_ch;
1631                                         Bytecount pat_len;
1632
1633                                         pat_ch = charptr_emchar(p);
1634                                         buf_ch =
1635                                             BI_BUF_FETCH_CHAR(buf, this_idx);
1636
1637                                         buf_ch = TRANSLATE(trt, buf_ch);
1638
1639                                         if (buf_ch != pat_ch)
1640                                                 break;
1641
1642                                         pat_len = charcount_to_bytecount(p, 1);
1643                                         p += pat_len;
1644                                         this_len -= pat_len;
1645                                         INC_BYTIND(buf, this_idx);
1646                                 }
1647                                 if (this_len == 0) {
1648                                         buf_len = this_idx - idx;
1649                                         idx = this_idx;
1650                                         break;
1651                                 }
1652                                 INC_BYTIND(buf, idx);
1653                         }
1654                         n--;
1655                 }
1656         } else {
1657                 while (n < 0) {
1658                         while (1) {
1659                                 Bytecount this_len = len_byte;
1660                                 Bytind this_idx = idx;
1661                                 const Bufbyte *p = base_pat + len_byte;
1662
1663                                 if (idx <= lim) {
1664                                         goto stop;
1665                                 }
1666
1667                                 while (this_len > 0) {
1668                                         Emchar pat_ch, buf_ch;
1669
1670                                         DEC_CHARPTR(p);
1671                                         DEC_BYTIND(buf, this_idx);
1672                                         pat_ch = charptr_emchar(p);
1673                                         buf_ch =
1674                                             BI_BUF_FETCH_CHAR(buf, this_idx);
1675
1676                                         buf_ch = TRANSLATE(trt, buf_ch);
1677
1678                                         if (buf_ch != pat_ch)
1679                                                 break;
1680
1681                                         this_len -=
1682                                             charcount_to_bytecount(p, 1);
1683                                 }
1684                                 if (this_len == 0) {
1685                                         buf_len = idx - this_idx;
1686                                         idx = this_idx;
1687                                         break;
1688                                 }
1689                                 DEC_BYTIND(buf, idx);
1690                         }
1691                         n++;
1692                 }
1693         }
1694 stop:
1695         if (n == 0) {
1696                 Bufpos beg, end, retval;
1697                 if (forward) {
1698                         beg = bytind_to_bufpos(buf, idx - buf_len);
1699                         retval = end = bytind_to_bufpos(buf, idx);
1700                 } else {
1701                         retval = beg = bytind_to_bufpos(buf, idx);
1702                         end = bytind_to_bufpos(buf, idx + buf_len);
1703                 }
1704                 set_search_regs(buf, beg, end - beg);
1705                 clear_unused_search_regs(&search_regs, 0);
1706
1707                 return retval;
1708         } else if (n > 0) {
1709                 return -n;
1710         } else {
1711                 return n;
1712         }
1713 }
1714
1715 /* Do Boyer-Moore search N times for the string PAT,
1716    whose length is LEN/LEN_BYTE,
1717    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1718    DIRECTION says which direction we search in.
1719    TRT and INVERSE_TRT are translation tables.
1720
1721    This kind of search works if all the characters in PAT that have
1722    nontrivial translation are the same aside from the last byte.  This
1723    makes it possible to translate just the last byte of a character,
1724    and do so after just a simple test of the context.
1725
1726    If that criterion is not satisfied, do not call this function.  */
1727
1728 static Bufpos
1729 boyer_moore(struct buffer *buf, Bufbyte * base_pat, Bytecount len,
1730             Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
1731             Lisp_Object inverse_trt, int charset_base)
1732 {
1733         /* #### Someone really really really needs to comment the workings
1734            of this junk somewhat better.
1735
1736            BTW "BM" stands for Boyer-Moore, which is one of the standard
1737            string-searching algorithms.  It's the best string-searching
1738            algorithm out there, provided that:
1739
1740            a) You're not fazed by algorithm complexity. (Rabin-Karp, which
1741            uses hashing, is much much easier to code but not as fast.)
1742            b) You can freely move backwards in the string that you're
1743            searching through.
1744
1745            As the comment below tries to explain (but garbles in typical
1746            programmer-ese), the idea is that you don't have to do a
1747            string match at every successive position in the text.  For
1748            example, let's say the pattern is "a very long string".  We
1749            compare the last character in the string (`g') with the
1750            corresponding character in the text.  If it mismatches, and
1751            it is, say, `z', then we can skip forward by the entire
1752            length of the pattern because `z' does not occur anywhere
1753            in the pattern.  If the mismatching character does occur
1754            in the pattern, we can usually still skip forward by more
1755            than one: e.g. if it is `l', then we can skip forward
1756            by the length of the substring "ong string" -- i.e. the
1757            largest end section of the pattern that does not contain
1758            the mismatched character.  So what we do is compute, for
1759            each possible character, the distance we can skip forward
1760            (the "stride") and use it in the string matching.  This
1761            is what the BM_tab holds. */
1762         REGISTER EMACS_INT *BM_tab;
1763         EMACS_INT *BM_tab_base;
1764         REGISTER Bytecount dirlen;
1765         EMACS_INT infinity;
1766         Bytind limit;
1767         Bytecount stride_for_teases = 0;
1768         REGISTER EMACS_INT i, j;
1769         Bufbyte *pat, *pat_end;
1770         REGISTER Bufbyte *cursor, *p_limit, *ptr2;
1771         Bufbyte simple_translate[REGEXP_FASTMAP_SIZE];
1772         REGISTER int direction = ((n > 0) ? 1 : -1);
1773 #ifdef MULE
1774         Bufbyte translate_prev_byte = 0;
1775         Bufbyte translate_anteprev_byte = 0;
1776 #endif
1777 #ifdef C_ALLOCA
1778         EMACS_INT BM_tab_space[REGEXP_FASTMAP_SIZE];
1779         BM_tab = &BM_tab_space[0];
1780 #else
1781         BM_tab = alloca_array(EMACS_INT, 256);
1782 #endif
1783
1784         /* The general approach is that we are going to maintain that we
1785            know the first (closest to the present position, in whatever
1786            direction we're searching) character that could possibly be
1787            the last (furthest from present position) character of a
1788            valid match.  We advance the state of our knowledge by
1789            looking at that character and seeing whether it indeed
1790            matches the last character of the pattern.  If it does, we
1791            take a closer look.  If it does not, we move our pointer (to
1792            putative last characters) as far as is logically possible.
1793            This amount of movement, which I call a stride, will be the
1794            length of the pattern if the actual character appears nowhere
1795            in the pattern, otherwise it will be the distance from the
1796            last occurrence of that character to the end of the pattern.
1797            As a coding trick, an enormous stride is coded into the table
1798            for characters that match the last character.  This allows
1799            use of only a single test, a test for having gone past the
1800            end of the permissible match region, to test for both
1801            possible matches (when the stride goes past the end
1802            immediately) and failure to match (where you get nudged past
1803            the end one stride at a time).
1804
1805            Here we make a "mickey mouse" BM table.  The stride of the
1806            search is determined only by the last character of the
1807            putative match.  If that character does not match, we will
1808            stride the proper distance to propose a match that
1809            superimposes it on the last instance of a character that
1810            matches it (per trt), or misses it entirely if there is
1811            none. */
1812
1813         dirlen = len * direction;
1814         infinity = dirlen - (lim + pos + len + len) * direction;
1815         /* Record position after the end of the pattern.  */
1816         pat_end = base_pat + len;
1817         if (direction < 0)
1818                 base_pat = pat_end - 1;
1819         BM_tab_base = BM_tab;
1820         BM_tab += REGEXP_FASTMAP_SIZE;
1821         j = dirlen;             /* to get it in a register */
1822         /* A character that does not appear in the pattern induces a
1823            stride equal to the pattern length. */
1824         while (BM_tab_base != BM_tab) {
1825                 *--BM_tab = j;
1826                 *--BM_tab = j;
1827                 *--BM_tab = j;
1828                 *--BM_tab = j;
1829         }
1830         /* We use this for translation, instead of TRT itself.  We
1831            fill this in to handle the characters that actually occur
1832            in the pattern.  Others don't matter anyway!  */
1833         xzero(simple_translate);
1834         for (i = 0; i < REGEXP_FASTMAP_SIZE; i++)
1835                 simple_translate[i] = (Bufbyte) i;
1836         i = 0;
1837         while (i != infinity) {
1838                 Bufbyte *ptr = base_pat + i;
1839                 i += direction;
1840                 if (i == dirlen)
1841                         i = infinity;
1842                 if (!NILP(trt)) {
1843 #ifdef MULE
1844                         Emchar ch, untranslated;
1845                         int this_translated = 1;
1846
1847                         /* Is *PTR the last byte of a character?  */
1848                         if (pat_end - ptr == 1 || BUFBYTE_FIRST_BYTE_P(ptr[1])) {
1849                                 Bufbyte *charstart = ptr;
1850                                 while (!BUFBYTE_FIRST_BYTE_P(*charstart))
1851                                         charstart--;
1852                                 untranslated = charptr_emchar(charstart);
1853                                 if (charset_base ==
1854                                     (untranslated & ~CHAR_FIELD3_MASK)) {
1855                                         ch = TRANSLATE(trt, untranslated);
1856                                         if (!BUFBYTE_FIRST_BYTE_P(*ptr)) {
1857                                                 translate_prev_byte = ptr[-1];
1858                                                 if (!BUFBYTE_FIRST_BYTE_P
1859                                                     (translate_prev_byte))
1860                                                         translate_anteprev_byte
1861                                                             = ptr[-2];
1862                                         }
1863                                 } else {
1864                                         this_translated = 0;
1865                                         ch = *ptr;
1866                                 }
1867                         } else {
1868                                 ch = *ptr;
1869                                 this_translated = 0;
1870                         }
1871                         if (ch > REGEXP_FASTMAP_SIZE)
1872                                 j = ((unsigned char)(ch & REGEXP_FASTMAP_MASK)| 0200);
1873                         else
1874                                 j = (unsigned char)(ch & REGEXP_FASTMAP_MASK);
1875
1876                         if (i == infinity)
1877                                 stride_for_teases = BM_tab[j];
1878                         BM_tab[j] = dirlen - i;
1879                         /* A translation table is accompanied by its inverse --
1880                            see comment following downcase_table for details */
1881                         if (this_translated) {
1882                                 Emchar starting_ch = ch;
1883                                 EMACS_INT starting_j = j;
1884                                 while (1) {
1885                                         ch = TRANSLATE(inverse_trt, ch);
1886                                         if (ch > REGEXP_FASTMAP_SIZE)
1887                                                 j = ((unsigned char)(ch & REGEXP_FASTMAP_MASK) | 0200);
1888                                         else
1889                                                 j = (unsigned char)(ch & REGEXP_FASTMAP_MASK);
1890
1891                                         /* For all the characters that map into CH,
1892                                            set up simple_translate to map the last byte
1893                                            into STARTING_J.  */
1894                                         simple_translate[j] = starting_j;
1895                                         if (ch == starting_ch)
1896                                                 break;
1897                                         BM_tab[j] = dirlen - i;
1898                                 }
1899                         }
1900 #else
1901                         EMACS_INT k;
1902                         j = *ptr;
1903                         k = (j = TRANSLATE(trt, j));
1904                         if (i == infinity)
1905                                 stride_for_teases = BM_tab[j];
1906                         BM_tab[j] = dirlen - i;
1907                         /* A translation table is accompanied by its inverse --
1908                            see comment following downcase_table for details */
1909
1910                         while ((j = TRANSLATE(inverse_trt, j)) != k) {
1911                                 simple_translate[j] = (Bufbyte) k;
1912                                 BM_tab[j] = dirlen - i;
1913                         }
1914 #endif
1915                 } else {
1916                         j = *ptr;
1917
1918                         if (i == infinity)
1919                                 stride_for_teases = BM_tab[j];
1920                         BM_tab[j] = dirlen - i;
1921                 }
1922                 /* stride_for_teases tells how much to stride if we get a
1923                    match on the far character but are subsequently
1924                    disappointed, by recording what the stride would have been
1925                    for that character if the last character had been
1926                    different. */
1927         }
1928         infinity = dirlen - infinity;
1929         pos += dirlen - ((direction > 0) ? direction : 0);
1930         /* loop invariant - pos points at where last char (first char if
1931            reverse) of pattern would align in a possible match.  */
1932         while (n != 0) {
1933                 Bytind tail_end;
1934                 Bufbyte *tail_end_ptr;
1935                 /* It's been reported that some (broken) compiler thinks
1936                    that Boolean expressions in an arithmetic context are
1937                    unsigned.  Using an explicit ?1:0 prevents this.  */
1938                 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
1939                         return n * (0 - direction);
1940                 /* First we do the part we can by pointers (maybe
1941                    nothing) */
1942                 QUIT;
1943                 pat = base_pat;
1944                 limit = pos - dirlen + direction;
1945                 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1946                    have changed.  See buffer.h. */
1947                 limit = ((direction > 0)
1948                          ? BI_BUF_CEILING_OF(buf, limit) - 1
1949                          : BI_BUF_FLOOR_OF(buf, limit + 1));
1950                 /* LIMIT is now the last (not beyond-last!) value POS can
1951                    take on without hitting edge of buffer or the gap.  */
1952                 limit = ((direction > 0)
1953                          ? min(lim - 1, min(limit, pos + 20000))
1954                          : max(lim, max(limit, pos - 20000)));
1955                 tail_end = BI_BUF_CEILING_OF(buf, pos);
1956                 tail_end_ptr = BI_BUF_BYTE_ADDRESS(buf, tail_end);
1957
1958                 if ((limit - pos) * direction > 20) {
1959                         p_limit = BI_BUF_BYTE_ADDRESS(buf, limit);
1960                         ptr2 = (cursor = BI_BUF_BYTE_ADDRESS(buf, pos));
1961                         /* In this loop, pos + cursor - ptr2 is the surrogate
1962                            for pos */
1963                         while (1) {     /* use one cursor setting as long as i can */
1964                                 if (direction > 0) {    /* worth duplicating */
1965                                         /* Use signed comparison if appropriate to make
1966                                            cursor+infinity sure to be > p_limit.
1967                                            Assuming that the buffer lies in a range of
1968                                            addresses that are all "positive" (as ints)
1969                                            or all "negative", either kind of comparison
1970                                            will work as long as we don't step by
1971                                            infinity.  So pick the kind that works when
1972                                            we do step by infinity.  */
1973                                         if ((EMACS_INT) (p_limit + infinity) >
1974                                             (EMACS_INT) p_limit)
1975                                                 while ((EMACS_INT) cursor <=
1976                                                        (EMACS_INT) p_limit)
1977                                                         cursor +=
1978                                                             BM_tab[*cursor];
1979                                         else
1980                                                 while ((EMACS_UINT) cursor <=
1981                                                        (EMACS_UINT) p_limit)
1982                                                         cursor +=
1983                                                             BM_tab[*cursor];
1984                                 } else {
1985                                         if ((EMACS_INT) (p_limit + infinity) <
1986                                             (EMACS_INT) p_limit)
1987                                                 while ((EMACS_INT) cursor >=
1988                                                        (EMACS_INT) p_limit)
1989                                                         cursor +=
1990                                                             BM_tab[*cursor];
1991                                         else
1992                                                 while ((EMACS_UINT) cursor >=
1993                                                        (EMACS_UINT) p_limit)
1994                                                         cursor +=
1995                                                             BM_tab[*cursor];
1996                                 }
1997                                 /* If you are here, cursor is beyond the end of the
1998                                    searched region.  This can happen if you match on
1999                                    the far character of the pattern, because the
2000                                    "stride" of that character is infinity, a number
2001                                    able to throw you well beyond the end of the
2002                                    search.  It can also happen if you fail to match
2003                                    within the permitted region and would otherwise
2004                                    try a character beyond that region */
2005                                 if ((cursor - p_limit) * direction <= len)
2006                                         break;  /* a small overrun is genuine */
2007                                 cursor -= infinity;     /* large overrun = hit */
2008                                 i = dirlen - direction;
2009                                 if (!NILP(trt)) {
2010                                         while ((i -=
2011                                                 direction) + direction != 0) {
2012 #ifdef MULE
2013                                                 Emchar ch;
2014                                                 cursor -= direction;
2015                                                 /* Translate only the last byte of a character.  */
2016                                                 if ((cursor == tail_end_ptr
2017                                                      ||
2018                                                      BUFBYTE_FIRST_BYTE_P(cursor
2019                                                                           [1]))
2020                                                     &&
2021                                                     (BUFBYTE_FIRST_BYTE_P
2022                                                      (cursor[0])
2023                                                      || (translate_prev_byte ==
2024                                                          cursor[-1]
2025                                                          &&
2026                                                          (BUFBYTE_FIRST_BYTE_P
2027                                                           (translate_prev_byte)
2028                                                           ||
2029                                                           translate_anteprev_byte
2030                                                           == cursor[-2]))))
2031                                                         ch = simple_translate
2032                                                             [*cursor];
2033                                                 else
2034                                                         ch = *cursor;
2035                                                 if (pat[i] != ch)
2036                                                         break;
2037 #else
2038                                                 if (pat[i] !=
2039                                                     TRANSLATE(trt,
2040                                                               *(cursor -=
2041                                                                 direction)))
2042                                                         break;
2043 #endif
2044                                         }
2045                                 } else {
2046                                         while ((i -=
2047                                                 direction) + direction != 0)
2048                                                 if (pat[i] !=
2049                                                     *(cursor -= direction))
2050                                                         break;
2051                                 }
2052                                 cursor += dirlen - i - direction;       /* fix cursor */
2053                                 if (i + direction == 0) {
2054                                         cursor -= direction;
2055
2056                                         {
2057                                                 Bytind bytstart =
2058                                                     (pos + cursor - ptr2 +
2059                                                      ((direction > 0)
2060                                                       ? 1 - len : 0));
2061                                                 Bufpos bufstart =
2062                                                     bytind_to_bufpos(buf,
2063                                                                      bytstart);
2064                                                 Bufpos bufend =
2065                                                     bytind_to_bufpos(buf,
2066                                                                      bytstart +
2067                                                                      len);
2068
2069                                                 set_search_regs(buf, bufstart,
2070                                                                 bufend -
2071                                                                 bufstart);
2072                                                 clear_unused_search_regs
2073                                                     (&search_regs, 0);
2074                                         }
2075
2076                                         if ((n -= direction) != 0)
2077                                                 cursor += dirlen;       /* to resume search */
2078                                         else
2079                                                 return ((direction > 0)
2080                                                         ? search_regs.
2081                                                         end[0] : search_regs.
2082                                                         start[0]);
2083                                 } else
2084                                         cursor += stride_for_teases;    /* <sigh> we lose -  */
2085                         }
2086                         pos += cursor - ptr2;
2087                 } else
2088                         /* Now we'll pick up a clump that has to be done the hard
2089                            way because it covers a discontinuity */
2090                 {
2091                         /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
2092                            have changed.  See buffer.h. */
2093                         limit = ((direction > 0)
2094                                  ? BI_BUF_CEILING_OF(buf, pos - dirlen + 1) - 1
2095                                  : BI_BUF_FLOOR_OF(buf, pos - dirlen));
2096                         limit = ((direction > 0)
2097                                  ? min(limit + len, lim - 1)
2098                                  : max(limit - len, lim));
2099                         /* LIMIT is now the last value POS can have
2100                            and still be valid for a possible match.  */
2101                         while (1) {
2102                                 /* This loop can be coded for space rather than
2103                                    speed because it will usually run only once.
2104                                    (the reach is at most len + 21, and typically
2105                                    does not exceed len) */
2106                                 while ((limit - pos) * direction >= 0)
2107                                         /* *not* BI_BUF_FETCH_CHAR.  We are working here
2108                                            with bytes, not characters. */
2109                                         pos +=
2110                                             BM_tab[*BI_BUF_BYTE_ADDRESS
2111                                                    (buf, pos)];
2112                                 /* now run the same tests to distinguish going off
2113                                    the end, a match or a phony match. */
2114                                 if ((pos - limit) * direction <= len)
2115                                         break;  /* ran off the end */
2116                                 /* Found what might be a match.
2117                                    Set POS back to last (first if reverse) char pos.  */
2118                                 pos -= infinity;
2119                                 i = dirlen - direction;
2120                                 while ((i -= direction) + direction != 0) {
2121 #ifdef MULE
2122                                         Emchar ch;
2123                                         Bufbyte *ptr;
2124 #endif
2125                                         pos -= direction;
2126 #ifdef MULE
2127                                         ptr = BI_BUF_BYTE_ADDRESS(buf, pos);
2128                                         if ((ptr == tail_end_ptr
2129                                              || BUFBYTE_FIRST_BYTE_P(ptr[1]))
2130                                             && (BUFBYTE_FIRST_BYTE_P(ptr[0])
2131                                                 || (translate_prev_byte ==
2132                                                     ptr[-1]
2133                                                     &&
2134                                                     (BUFBYTE_FIRST_BYTE_P
2135                                                      (translate_prev_byte)
2136                                                      || translate_anteprev_byte
2137                                                      == ptr[-2]))))
2138                                                 ch = simple_translate[*ptr];
2139                                         else
2140                                                 ch = *ptr;
2141                                         if (pat[i] != ch)
2142                                                 break;
2143
2144 #else
2145                                         if (pat[i] != TRANSLATE(trt,
2146                                                                 *BI_BUF_BYTE_ADDRESS
2147                                                                 (buf, pos)))
2148                                                 break;
2149 #endif
2150                                 }
2151                                 /* Above loop has moved POS part or all the way back
2152                                    to the first char pos (last char pos if reverse).
2153                                    Set it once again at the last (first if reverse)
2154                                    char.  */
2155                                 pos += dirlen - i - direction;
2156                                 if (i + direction == 0) {
2157                                         pos -= direction;
2158
2159                                         {
2160                                                 Bytind bytstart = (pos +
2161                                                                    ((direction >
2162                                                                      0)
2163                                                                     ? 1 -
2164                                                                     len : 0));
2165                                                 Bufpos bufstart =
2166                                                     bytind_to_bufpos(buf,
2167                                                                      bytstart);
2168                                                 Bufpos bufend =
2169                                                     bytind_to_bufpos(buf,
2170                                                                      bytstart +
2171                                                                      len);
2172
2173                                                 set_search_regs(buf, bufstart,
2174                                                                 bufend -
2175                                                                 bufstart);
2176                                                 clear_unused_search_regs
2177                                                     (&search_regs, 0);
2178                                         }
2179
2180                                         if ((n -= direction) != 0)
2181                                                 pos += dirlen;  /* to resume search */
2182                                         else
2183                                                 return ((direction > 0)
2184                                                         ? search_regs.
2185                                                         end[0] : search_regs.
2186                                                         start[0]);
2187                                 } else
2188                                         pos += stride_for_teases;
2189                         }
2190                 }
2191                 /* We have done one clump.  Can we continue? */
2192                 if ((lim - pos) * direction < 0)
2193                         return (0 - n) * direction;
2194         }
2195         return bytind_to_bufpos(buf, pos);
2196 }
2197
2198 /* Record the whole-match data (beginning BEG and end BEG + LEN) and the
2199    buffer for a match just found.  */
2200
2201 static void set_search_regs(struct buffer *buf, Bufpos beg, Charcount len)
2202 {
2203         /* This function has been Mule-ized. */
2204         /* Make sure we have registers in which to store
2205            the match position.  */
2206         if (search_regs.num_regs == 0) {
2207                 search_regs.start = xnew_atomic(regoff_t);
2208                 search_regs.end = xnew_atomic(regoff_t);
2209                 search_regs.num_regs = 1;
2210         }
2211
2212         search_regs.start[0] = beg;
2213         search_regs.end[0] = beg + len;
2214         XSETBUFFER(last_thing_searched, buf);
2215 }
2216
2217 /* Clear unused search registers so match data will be null.
2218    REGP is a pointer to the register structure to clear, usually the global
2219    search_regs.
2220    NO_SUB is the number of subexpressions to allow for.  (Does not count
2221    the whole match, ie, for a string search NO_SUB == 0.)
2222    It is an error if NO_SUB > REGP.num_regs - 1. */
2223
2224 static void clear_unused_search_regs(struct re_registers *regp, int no_sub)
2225 {
2226         /* This function has been Mule-ized. */
2227         int i;
2228
2229         assert(no_sub >= 0 && no_sub < regp->num_regs);
2230         for (i = no_sub + 1; i < regp->num_regs; i++)
2231                 regp->start[i] = regp->end[i] = -1;
2232 }
2233 \f
2234 /* Given a string of words separated by word delimiters,
2235    compute a regexp that matches those exact words
2236    separated by arbitrary punctuation.  */
2237
2238 static Lisp_Object wordify(Lisp_Object buffer, Lisp_Object string)
2239 {
2240         Charcount i, len;
2241         EMACS_INT punct_count = 0, word_count = 0;
2242         struct buffer *buf = decode_buffer(buffer, 0);
2243         Lisp_Char_Table *syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
2244
2245         CHECK_STRING(string);
2246         len = XSTRING_CHAR_LENGTH(string);
2247
2248         for (i = 0; i < len; i++)
2249                 if (!WORD_SYNTAX_P
2250                     (syntax_table, string_char(XSTRING(string), i))) {
2251                         punct_count++;
2252                         if (i > 0 && WORD_SYNTAX_P(syntax_table,
2253                                                    string_char(XSTRING(string),
2254                                                                i - 1)))
2255                                 word_count++;
2256                 }
2257         if (WORD_SYNTAX_P(syntax_table, string_char(XSTRING(string), len - 1)))
2258                 word_count++;
2259         if (!word_count)
2260                 return build_string("");
2261
2262         {
2263                 /* The following value is an upper bound on the amount of storage we
2264                    need.  In non-Mule, it is exact. */
2265                 Bufbyte *storage =
2266                     (Bufbyte *) alloca(XSTRING_LENGTH(string) - punct_count +
2267                                        5 * (word_count - 1) + 4);
2268                 Bufbyte *o = storage;
2269
2270                 *o++ = '\\';
2271                 *o++ = 'b';
2272
2273                 for (i = 0; i < len; i++) {
2274                         Emchar ch = string_char(XSTRING(string), i);
2275
2276                         if (WORD_SYNTAX_P(syntax_table, ch))
2277                                 o += set_charptr_emchar(o, ch);
2278                         else if (i > 0
2279                                  && WORD_SYNTAX_P(syntax_table,
2280                                                   string_char(XSTRING(string),
2281                                                               i - 1))
2282                                  && --word_count) {
2283                                 *o++ = '\\';
2284                                 *o++ = 'W';
2285                                 *o++ = '\\';
2286                                 *o++ = 'W';
2287                                 *o++ = '*';
2288                         }
2289                 }
2290
2291                 *o++ = '\\';
2292                 *o++ = 'b';
2293
2294                 return make_string(storage, o - storage);
2295         }
2296 }
2297 \f
2298 DEFUN("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ",  /*
2299 Search backward from point for STRING.
2300 Set point to the beginning of the occurrence found, and return point.
2301
2302 Optional second argument LIMIT bounds the search; it is a buffer
2303 position.  The match found must not extend before that position.
2304 The value nil is equivalent to (point-min).
2305
2306 Optional third argument NOERROR, if t, means just return nil (no
2307 error) if the search fails.  If neither nil nor t, set point to LIMIT
2308 and return nil.
2309
2310 Optional fourth argument COUNT is a repeat count--search for
2311 successive occurrences.
2312
2313 Optional fifth argument BUFFER specifies the buffer to search in and
2314 defaults to the current buffer.
2315
2316 See also the functions `match-beginning', `match-end' and `replace-match'.
2317 */
2318       (string, limit, noerror, count, buffer))
2319 {
2320         return search_command(string, limit, noerror, count, buffer, -1, 0, 0);
2321 }
2322
2323 DEFUN("search-forward", Fsearch_forward, 1, 5, "sSearch: ",     /*
2324 Search forward from point for STRING.
2325 Set point to the end of the occurrence found, and return point.
2326
2327 Optional second argument LIMIT bounds the search; it is a buffer
2328 position.  The match found must not extend after that position.  The
2329 value nil is equivalent to (point-max).
2330
2331 Optional third argument NOERROR, if t, means just return nil (no
2332 error) if the search fails.  If neither nil nor t, set point to LIMIT
2333 and return nil.
2334
2335 Optional fourth argument COUNT is a repeat count--search for
2336 successive occurrences.
2337
2338 Optional fifth argument BUFFER specifies the buffer to search in and
2339 defaults to the current buffer.
2340
2341 See also the functions `match-beginning', `match-end' and `replace-match'.
2342 */
2343       (string, limit, noerror, count, buffer))
2344 {
2345         return search_command(string, limit, noerror, count, buffer, 1, 0, 0);
2346 }
2347
2348 DEFUN("word-search-backward", Fword_search_backward, 1, 5, "sWord search backward: ",   /*
2349 Search backward from point for STRING, ignoring differences in punctuation.
2350 Set point to the beginning of the occurrence found, and return point.
2351
2352 Optional second argument LIMIT bounds the search; it is a buffer
2353 position.  The match found must not extend before that position.
2354 The value nil is equivalent to (point-min).
2355
2356 Optional third argument NOERROR, if t, means just return nil (no
2357 error) if the search fails.  If neither nil nor t, set point to LIMIT
2358 and return nil.
2359
2360 Optional fourth argument COUNT is a repeat count--search for
2361 successive occurrences.
2362
2363 Optional fifth argument BUFFER specifies the buffer to search in and
2364 defaults to the current buffer.
2365
2366 See also the functions `match-beginning', `match-end' and `replace-match'.
2367 */
2368       (string, limit, noerror, count, buffer))
2369 {
2370         return search_command(wordify(buffer, string), limit, noerror, count,
2371                               buffer, -1, 1, 0);
2372 }
2373
2374 DEFUN("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ",      /*
2375 Search forward from point for STRING, ignoring differences in punctuation.
2376 Set point to the end of the occurrence found, and return point.
2377
2378 Optional second argument LIMIT bounds the search; it is a buffer
2379 position.  The match found must not extend after that position.  The
2380 value nil is equivalent to (point-max).
2381
2382 Optional third argument NOERROR, if t, means just return nil (no
2383 error) if the search fails.  If neither nil nor t, set point to LIMIT
2384 and return nil.
2385
2386 Optional fourth argument COUNT is a repeat count--search for
2387 successive occurrences.
2388
2389 Optional fifth argument BUFFER specifies the buffer to search in and
2390 defaults to the current buffer.
2391
2392 See also the functions `match-beginning', `match-end' and `replace-match'.
2393 */
2394       (string, limit, noerror, count, buffer))
2395 {
2396         return search_command(wordify(buffer, string), limit, noerror, count,
2397                               buffer, 1, 1, 0);
2398 }
2399
2400 DEFUN("re-search-backward", Fre_search_backward, 1, 5, "sRE search backward: ", /*
2401 Search backward from point for match for regular expression REGEXP.
2402 Set point to the beginning of the match, and return point.
2403 The match found is the one starting last in the buffer
2404 and yet ending before the origin of the search.
2405
2406 Optional second argument LIMIT bounds the search; it is a buffer
2407 position.  The match found must not extend before that position.
2408 The value nil is equivalent to (point-min).
2409
2410 Optional third argument NOERROR, if t, means just return nil (no
2411 error) if the search fails.  If neither nil nor t, set point to LIMIT
2412 and return nil.
2413
2414 Optional fourth argument COUNT is a repeat count--search for
2415 successive occurrences.
2416
2417 Optional fifth argument BUFFER specifies the buffer to search in and
2418 defaults to the current buffer.
2419
2420 See also the functions `match-beginning', `match-end' and `replace-match'.
2421 */
2422       (regexp, limit, noerror, count, buffer))
2423 {
2424         return search_command(regexp, limit, noerror, count, buffer, -1, 1, 0);
2425 }
2426
2427 DEFUN("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ",    /*
2428 Search forward from point for regular expression REGEXP.
2429 Set point to the end of the occurrence found, and return point.
2430
2431 Optional second argument LIMIT bounds the search; it is a buffer
2432 position.  The match found must not extend after that position.  The
2433 value nil is equivalent to (point-max).
2434
2435 Optional third argument NOERROR, if t, means just return nil (no
2436 error) if the search fails.  If neither nil nor t, set point to LIMIT
2437 and return nil.
2438
2439 Optional fourth argument COUNT is a repeat count--search for
2440 successive occurrences.
2441
2442 Optional fifth argument BUFFER specifies the buffer to search in and
2443 defaults to the current buffer.
2444
2445 See also the functions `match-beginning', `match-end' and `replace-match'.
2446 */
2447       (regexp, limit, noerror, count, buffer))
2448 {
2449         return search_command(regexp, limit, noerror, count, buffer, 1, 1, 0);
2450 }
2451
2452 DEFUN("posix-search-backward", Fposix_search_backward, 1, 5, "sPosix search backward: ",        /*
2453 Search backward from point for match for regular expression REGEXP.
2454 Find the longest match in accord with Posix regular expression rules.
2455 Set point to the beginning of the match, and return point.
2456 The match found is the one starting last in the buffer
2457 and yet ending before the origin of the search.
2458
2459 Optional second argument LIMIT bounds the search; it is a buffer
2460 position.  The match found must not extend before that position.
2461 The value nil is equivalent to (point-min).
2462
2463 Optional third argument NOERROR, if t, means just return nil (no
2464 error) if the search fails.  If neither nil nor t, set point to LIMIT
2465 and return nil.
2466
2467 Optional fourth argument COUNT is a repeat count--search for
2468 successive occurrences.
2469
2470 Optional fifth argument BUFFER specifies the buffer to search in and
2471 defaults to the current buffer.
2472
2473 See also the functions `match-beginning', `match-end' and `replace-match'.
2474 */
2475       (regexp, limit, noerror, count, buffer))
2476 {
2477         return search_command(regexp, limit, noerror, count, buffer, -1, 1, 1);
2478 }
2479
2480 DEFUN("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ",   /*
2481 Search forward from point for regular expression REGEXP.
2482 Find the longest match in accord with Posix regular expression rules.
2483 Set point to the end of the occurrence found, and return point.
2484
2485 Optional second argument LIMIT bounds the search; it is a buffer
2486 position.  The match found must not extend after that position.  The
2487 value nil is equivalent to (point-max).
2488
2489 Optional third argument NOERROR, if t, means just return nil (no
2490 error) if the search fails.  If neither nil nor t, set point to LIMIT
2491 and return nil.
2492
2493 Optional fourth argument COUNT is a repeat count--search for
2494 successive occurrences.
2495
2496 Optional fifth argument BUFFER specifies the buffer to search in and
2497 defaults to the current buffer.
2498
2499 See also the functions `match-beginning', `match-end' and `replace-match'.
2500 */
2501       (regexp, limit, noerror, count, buffer))
2502 {
2503         return search_command(regexp, limit, noerror, count, buffer, 1, 1, 1);
2504 }
2505 \f
2506 static Lisp_Object free_created_dynarrs(Lisp_Object cons)
2507 {
2508         Dynarr_free(get_opaque_ptr(XCAR(cons)));
2509         Dynarr_free(get_opaque_ptr(XCDR(cons)));
2510         free_opaque_ptr(XCAR(cons));
2511         free_opaque_ptr(XCDR(cons));
2512         free_cons(XCONS(cons));
2513         return Qnil;
2514 }
2515
2516 DEFUN("replace-match", Freplace_match, 1, 5, 0, /*
2517 Replace text matched by last search with REPLACEMENT.
2518 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2519 Otherwise maybe capitalize the whole text, or maybe just word initials,
2520 based on the replaced text.
2521 If the replaced text has only capital letters
2522 and has at least one multiletter word, convert REPLACEMENT to all caps.
2523 If the replaced text has at least one word starting with a capital letter,
2524 then capitalize each word in REPLACEMENT.
2525
2526 If third arg LITERAL is non-nil, insert REPLACEMENT literally.
2527 Otherwise treat `\\' as special:
2528 `\\&' in REPLACEMENT means substitute original matched text.
2529 `\\N' means substitute what matched the Nth `\\(...\\)'.
2530 If Nth parens didn't match, substitute nothing.
2531 `\\\\' means insert one `\\'.
2532 `\\u' means upcase the next character.
2533 `\\l' means downcase the next character.
2534 `\\U' means begin upcasing all following characters.
2535 `\\L' means begin downcasing all following characters.
2536 `\\E' means terminate the effect of any `\\U' or `\\L'.
2537 Case changes made with `\\u', `\\l', `\\U', and `\\L' override
2538 all other case changes that may be made in the replaced text.
2539 FIXEDCASE and LITERAL are optional arguments.
2540 Leaves point at end of replacement text.
2541
2542 The optional fourth argument STRING can be a string to modify.
2543 In that case, this function creates and returns a new string
2544 which is made by replacing the part of STRING that was matched.
2545 When fourth argument is a string, fifth argument STRBUFFER specifies
2546 the buffer to be used for syntax-table and case-table lookup and
2547 defaults to the current buffer.  When fourth argument is not a string,
2548 the buffer that the match occurred in has automatically been remembered
2549 and you do not need to specify it.
2550
2551 When fourth argument is nil, STRBUFFER specifies a subexpression of
2552 the match.  It says to replace just that subexpression instead of the
2553 whole match.  This is useful only after a regular expression search or
2554 match since only regular expressions have distinguished subexpressions.
2555 */
2556       (replacement, fixedcase, literal, string, strbuffer))
2557 {
2558         /* This function has been Mule-ized. */
2559         /* This function can GC */
2560         enum { nochange, all_caps, cap_initial } case_action;
2561         Bufpos pos, last;
2562         int some_multiletter_word;
2563         int some_lowercase;
2564         int some_uppercase;
2565         int some_nonuppercase_initial;
2566         Emchar c, prevc;
2567         Charcount inslen;
2568         struct buffer *buf;
2569         Lisp_Char_Table *syntax_table;
2570         int mc_count;
2571         Lisp_Object buffer;
2572         int_dynarr *ul_action_dynarr = 0;
2573         int_dynarr *ul_pos_dynarr = 0;
2574         int sub = 0;
2575         int speccount;
2576
2577         CHECK_STRING(replacement);
2578
2579         if (!NILP(string)) {
2580                 CHECK_STRING(string);
2581                 if (!EQ(last_thing_searched, Qt))
2582                         error("last thing matched was not a string");
2583                 /* If the match data
2584                    were abstracted into a special "match data" type instead
2585                    of the typical half-assed "let the implementation be
2586                    visible" form it's in, we could extend it to include
2587                    the last string matched and the buffer used for that
2588                    matching.  But of course we can't change it as it is. */
2589                 buf = decode_buffer(strbuffer, 0);
2590                 XSETBUFFER(buffer, buf);
2591         } else {
2592                 if (!NILP(strbuffer)) {
2593                         CHECK_INT(strbuffer);
2594                         sub = XINT(strbuffer);
2595                         if (sub < 0 || sub >= (int)search_regs.num_regs)
2596                                 args_out_of_range(strbuffer,
2597                                                   make_int(search_regs.
2598                                                            num_regs));
2599                 }
2600                 if (!BUFFERP(last_thing_searched))
2601                         error("last thing matched was not a buffer");
2602                 buffer = last_thing_searched;
2603                 buf = XBUFFER(buffer);
2604         }
2605
2606         syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
2607
2608         case_action = nochange; /* We tried an initialization */
2609         /* but some C compilers blew it */
2610
2611         if (search_regs.num_regs == 0)
2612                 error("replace-match called before any match found");
2613
2614         if (NILP(string)) {
2615                 if (search_regs.start[sub] < BUF_BEGV(buf)
2616                     || search_regs.start[sub] > search_regs.end[sub]
2617                     || search_regs.end[sub] > BUF_ZV(buf))
2618                         args_out_of_range(make_int(search_regs.start[sub]),
2619                                           make_int(search_regs.end[sub]));
2620         } else {
2621                 if (search_regs.start[0] < 0
2622                     || search_regs.start[0] > search_regs.end[0]
2623                     || search_regs.end[0] > XSTRING_CHAR_LENGTH(string))
2624                         args_out_of_range(make_int(search_regs.start[0]),
2625                                           make_int(search_regs.end[0]));
2626         }
2627
2628         if (NILP(fixedcase)) {
2629                 /* Decide how to casify by examining the matched text. */
2630
2631                 last = search_regs.end[sub];
2632                 prevc = '\n';
2633                 case_action = all_caps;
2634
2635                 /* some_multiletter_word is set nonzero if any original word
2636                    is more than one letter long. */
2637                 some_multiletter_word = 0;
2638                 some_lowercase = 0;
2639                 some_nonuppercase_initial = 0;
2640                 some_uppercase = 0;
2641
2642                 for (pos = search_regs.start[sub]; pos < last; pos++) {
2643                         if (NILP(string))
2644                                 c = BUF_FETCH_CHAR(buf, pos);
2645                         else
2646                                 c = string_char(XSTRING(string), pos);
2647
2648                         if (LOWERCASEP(buf, c)) {
2649                                 /* Cannot be all caps if any original char is lower case */
2650
2651                                 some_lowercase = 1;
2652                                 if (!WORD_SYNTAX_P(syntax_table, prevc))
2653                                         some_nonuppercase_initial = 1;
2654                                 else
2655                                         some_multiletter_word = 1;
2656                         } else if (!NOCASEP(buf, c)) {
2657                                 some_uppercase = 1;
2658                                 if (!WORD_SYNTAX_P(syntax_table, prevc)) ;
2659                                 else
2660                                         some_multiletter_word = 1;
2661                         } else {
2662                                 /* If the initial is a caseless word constituent,
2663                                    treat that like a lowercase initial.  */
2664                                 if (!WORD_SYNTAX_P(syntax_table, prevc))
2665                                         some_nonuppercase_initial = 1;
2666                         }
2667
2668                         prevc = c;
2669                 }
2670
2671                 /* Convert to all caps if the old text is all caps
2672                    and has at least one multiletter word.  */
2673                 if (!some_lowercase && some_multiletter_word)
2674                         case_action = all_caps;
2675                 /* Capitalize each word, if the old text has all capitalized words.  */
2676                 else if (!some_nonuppercase_initial && some_multiletter_word)
2677                         case_action = cap_initial;
2678                 else if (!some_nonuppercase_initial && some_uppercase)
2679                         /* Should x -> yz, operating on X, give Yz or YZ?
2680                            We'll assume the latter.  */
2681                         case_action = all_caps;
2682                 else
2683                         case_action = nochange;
2684         }
2685
2686         /* Do replacement in a string.  */
2687         if (!NILP(string)) {
2688                 Lisp_Object before, after;
2689
2690                 speccount = specpdl_depth();
2691                 before =
2692                     Fsubstring(string, Qzero, make_int(search_regs.start[0]));
2693                 after = Fsubstring(string, make_int(search_regs.end[0]), Qnil);
2694
2695                 /* Do case substitution into REPLACEMENT if desired.  */
2696                 if (NILP(literal)) {
2697                         Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2698                         Charcount strpos;
2699                         /* XEmacs change: rewrote this loop somewhat to make it
2700                            cleaner.  Also added \U, \E, etc. */
2701                         Charcount literal_start = 0;
2702                         /* We build up the substituted string in ACCUM.  */
2703                         Lisp_Object accum;
2704
2705                         accum = Qnil;
2706
2707                         /* OK, the basic idea here is that we scan through the
2708                            replacement string until we find a backslash, which
2709                            represents a substring of the original string to be
2710                            substituted.  We then append onto ACCUM the literal
2711                            text before the backslash (LASTPOS marks the
2712                            beginning of this) followed by the substring of the
2713                            original string that needs to be inserted. */
2714                         for (strpos = 0; strpos < stlen; strpos++) {
2715                                 /* If LITERAL_END is set, we've encountered a backslash
2716                                    (the end of literal text to be inserted). */
2717                                 Charcount literal_end = -1;
2718                                 /* If SUBSTART is set, we need to also insert the
2719                                    text from SUBSTART to SUBEND in the original string. */
2720                                 Charcount substart = -1;
2721                                 Charcount subend = -1;
2722
2723                                 c = string_char(XSTRING(replacement), strpos);
2724                                 if (c == '\\' && strpos < stlen - 1) {
2725                                         c = string_char(XSTRING(replacement),
2726                                                         ++strpos);
2727                                         if (c == '&') {
2728                                                 literal_end = strpos - 1;
2729                                                 substart = search_regs.start[0];
2730                                                 subend = search_regs.end[0];
2731                                         } else if (c >= '1' && c <= '9' &&
2732                                                    c <=
2733                                                    search_regs.num_regs + '0') {
2734                                                 if (search_regs.
2735                                                     start[c - '0'] >= 0) {
2736                                                         literal_end =
2737                                                             strpos - 1;
2738                                                         substart =
2739                                                             search_regs.
2740                                                             start[c - '0'];
2741                                                         subend =
2742                                                             search_regs.end[c -
2743                                                                             '0'];
2744                                                 }
2745                                         } else if (c == 'U' || c == 'u'
2746                                                    || c == 'L' || c == 'l'
2747                                                    || c == 'E') {
2748                                                 /* Keep track of all case changes requested, but don't
2749                                                    make them now.  Do them later so we override
2750                                                    everything else. */
2751                                                 if (!ul_pos_dynarr) {
2752                                                         ul_pos_dynarr =
2753                                                             Dynarr_new(int);
2754                                                         ul_action_dynarr =
2755                                                             Dynarr_new(int);
2756                                                         record_unwind_protect
2757                                                             (free_created_dynarrs,
2758                                                              noseeum_cons
2759                                                              (make_opaque_ptr
2760                                                               (ul_pos_dynarr),
2761                                                               make_opaque_ptr
2762                                                               (ul_action_dynarr)));
2763                                                 }
2764                                                 literal_end = strpos - 1;
2765                                                 Dynarr_add(ul_pos_dynarr,
2766                                                            (!NILP(accum)
2767                                                             ?
2768                                                             XSTRING_CHAR_LENGTH
2769                                                             (accum)
2770                                                             : 0) +
2771                                                            (literal_end -
2772                                                             literal_start));
2773                                                 Dynarr_add(ul_action_dynarr, c);
2774                                         } else if (c == '\\')
2775                                                 /* So we get just one backslash. */
2776                                                 literal_end = strpos;
2777                                 }
2778                                 if (literal_end >= 0) {
2779                                         Lisp_Object literal_text = Qnil;
2780                                         Lisp_Object substring = Qnil;
2781                                         if (literal_end != literal_start)
2782                                                 literal_text =
2783                                                     Fsubstring(replacement,
2784                                                                make_int
2785                                                                (literal_start),
2786                                                                make_int
2787                                                                (literal_end));
2788                                         if (substart >= 0 && subend != substart)
2789                                                 substring = Fsubstring(string,
2790                                                                        make_int
2791                                                                        (substart),
2792                                                                        make_int
2793                                                                        (subend));
2794                                         if (!NILP(literal_text)
2795                                             || !NILP(substring))
2796                                                 accum =
2797                                                     concat3(accum, literal_text,
2798                                                             substring);
2799                                         literal_start = strpos + 1;
2800                                 }
2801                         }
2802
2803                         if (strpos != literal_start)
2804                                 /* some literal text at end to be inserted */
2805                                 replacement =
2806                                     concat2(accum,
2807                                             Fsubstring(replacement,
2808                                                        make_int(literal_start),
2809                                                        make_int(strpos)));
2810                         else
2811                                 replacement = accum;
2812                 }
2813
2814                 /* replacement can be nil. */
2815                 if (NILP(replacement))
2816                         replacement = build_string("");
2817
2818                 if (case_action == all_caps)
2819                         replacement = Fupcase(replacement, buffer);
2820                 else if (case_action == cap_initial)
2821                         replacement = Fupcase_initials(replacement, buffer);
2822
2823                 /* Now finally, we need to process the \U's, \E's, etc. */
2824                 if (ul_pos_dynarr) {
2825                         int i = 0;
2826                         int cur_action = 'E';
2827                         Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2828                         Charcount strpos;
2829
2830                         for (strpos = 0; strpos < stlen; strpos++) {
2831                                 Emchar curchar =
2832                                     string_char(XSTRING(replacement), strpos);
2833                                 Emchar newchar = -1;
2834                                 if (i < Dynarr_length(ul_pos_dynarr) &&
2835                                     strpos == Dynarr_at(ul_pos_dynarr, i)) {
2836                                         int new_action =
2837                                             Dynarr_at(ul_action_dynarr, i);
2838                                         i++;
2839                                         if (new_action == 'u')
2840                                                 newchar = UPCASE(buf, curchar);
2841                                         else if (new_action == 'l')
2842                                                 newchar =
2843                                                     DOWNCASE(buf, curchar);
2844                                         else
2845                                                 cur_action = new_action;
2846                                 }
2847                                 if (newchar == -1) {
2848                                         if (cur_action == 'U')
2849                                                 newchar = UPCASE(buf, curchar);
2850                                         else if (cur_action == 'L')
2851                                                 newchar =
2852                                                     DOWNCASE(buf, curchar);
2853                                         else
2854                                                 newchar = curchar;
2855                                 }
2856                                 if (newchar != curchar)
2857                                         set_string_char(XSTRING(replacement),
2858                                                         strpos, newchar);
2859                         }
2860                 }
2861
2862                 /* frees the Dynarrs if necessary. */
2863                 unbind_to(speccount, Qnil);
2864                 return concat3(before, replacement, after);
2865         }
2866
2867         mc_count = begin_multiple_change(buf, search_regs.start[sub],
2868                                          search_regs.end[sub]);
2869
2870         /* begin_multiple_change() records an unwind-protect, so we need to
2871            record this value now. */
2872         speccount = specpdl_depth();
2873
2874         /* We insert the replacement text before the old text, and then
2875            delete the original text.  This means that markers at the
2876            beginning or end of the original will float to the corresponding
2877            position in the replacement.  */
2878         BUF_SET_PT(buf, search_regs.start[sub]);
2879         if (!NILP(literal))
2880                 Finsert(1, &replacement);
2881         else {
2882                 Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2883                 Charcount strpos;
2884                 struct gcpro gcpro1;
2885                 GCPRO1(replacement);
2886                 for (strpos = 0; strpos < stlen; strpos++) {
2887                         /* on the first iteration assert(offset==0),
2888                            exactly complementing BUF_SET_PT() above.
2889                            During the loop, it keeps track of the amount inserted.
2890                          */
2891                         Charcount offset = BUF_PT(buf) - search_regs.start[sub];
2892
2893                         c = string_char(XSTRING(replacement), strpos);
2894                         if (c == '\\' && strpos < stlen - 1) {
2895                                 /* XXX FIXME: replacing just a substring non-literally
2896                                    using backslash refs to the match looks dangerous.  But
2897                                    <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe
2898                                    <duwe@caldera.de> claims Finsert_buffer_substring already
2899                                    handles this correctly.
2900                                  */
2901                                 c = string_char(XSTRING(replacement), ++strpos);
2902                                 if (c == '&')
2903                                         Finsert_buffer_substring
2904                                             (buffer,
2905                                              make_int(search_regs.start[0] +
2906                                                       offset),
2907                                              make_int(search_regs.end[0] +
2908                                                       offset));
2909                                 else if (c >= '1' && c <= '9'
2910                                          && c <= search_regs.num_regs + '0') {
2911                                         if (search_regs.start[c - '0'] >= 1)
2912                                                 Finsert_buffer_substring
2913                                                     (buffer,
2914                                                      make_int(search_regs.
2915                                                               start[c - '0'] +
2916                                                               offset),
2917                                                      make_int(search_regs.
2918                                                               end[c - '0'] +
2919                                                               offset));
2920                                 } else if (c == 'U' || c == 'u' || c == 'L'
2921                                            || c == 'l' || c == 'E') {
2922                                         /* Keep track of all case changes requested, but don't
2923                                            make them now.  Do them later so we override
2924                                            everything else. */
2925                                         if (!ul_pos_dynarr) {
2926                                                 ul_pos_dynarr = Dynarr_new(int);
2927                                                 ul_action_dynarr =
2928                                                     Dynarr_new(int);
2929                                                 record_unwind_protect
2930                                                     (free_created_dynarrs,
2931                                                      Fcons(make_opaque_ptr
2932                                                            (ul_pos_dynarr),
2933                                                            make_opaque_ptr
2934                                                            (ul_action_dynarr)));
2935                                         }
2936                                         Dynarr_add(ul_pos_dynarr, BUF_PT(buf));
2937                                         Dynarr_add(ul_action_dynarr, c);
2938                                 } else
2939                                         buffer_insert_emacs_char(buf, c);
2940                         } else
2941                                 buffer_insert_emacs_char(buf, c);
2942                 }
2943                 UNGCPRO;
2944         }
2945
2946         inslen = BUF_PT(buf) - (search_regs.start[sub]);
2947         buffer_delete_range(buf, search_regs.start[sub] + inslen,
2948                             search_regs.end[sub] + inslen, 0);
2949
2950         if (case_action == all_caps)
2951                 Fupcase_region(make_int(BUF_PT(buf) - inslen),
2952                                make_int(BUF_PT(buf)), buffer);
2953         else if (case_action == cap_initial)
2954                 Fupcase_initials_region(make_int(BUF_PT(buf) - inslen),
2955                                         make_int(BUF_PT(buf)), buffer);
2956
2957         /* Now go through and make all the case changes that were requested
2958            in the replacement string. */
2959         if (ul_pos_dynarr) {
2960                 Bufpos eend = BUF_PT(buf);
2961                 int i = 0;
2962                 int cur_action = 'E';
2963
2964                 for (pos = BUF_PT(buf) - inslen; pos < eend; pos++) {
2965                         Emchar curchar = BUF_FETCH_CHAR(buf, pos);
2966                         Emchar newchar = -1;
2967                         if (i < Dynarr_length(ul_pos_dynarr) &&
2968                             pos == Dynarr_at(ul_pos_dynarr, i)) {
2969                                 int new_action = Dynarr_at(ul_action_dynarr, i);
2970                                 i++;
2971                                 if (new_action == 'u')
2972                                         newchar = UPCASE(buf, curchar);
2973                                 else if (new_action == 'l')
2974                                         newchar = DOWNCASE(buf, curchar);
2975                                 else
2976                                         cur_action = new_action;
2977                         }
2978                         if (newchar == -1) {
2979                                 if (cur_action == 'U')
2980                                         newchar = UPCASE(buf, curchar);
2981                                 else if (cur_action == 'L')
2982                                         newchar = DOWNCASE(buf, curchar);
2983                                 else
2984                                         newchar = curchar;
2985                         }
2986                         if (newchar != curchar)
2987                                 buffer_replace_char(buf, pos, newchar, 0, 0);
2988                 }
2989         }
2990
2991         /* frees the Dynarrs if necessary. */
2992         unbind_to(speccount, Qnil);
2993         end_multiple_change(buf, mc_count);
2994
2995         return Qnil;
2996 }
2997 \f
2998 static Lisp_Object match_limit(Lisp_Object num, int beginningp)
2999 {
3000         /* This function has been Mule-ized. */
3001         int n;
3002
3003         CHECK_INT(num);
3004         n = XINT(num);
3005         if (n < 0 || search_regs.num_regs <= 0)
3006                 args_out_of_range(num, make_int(search_regs.num_regs));
3007         if (n >= search_regs.num_regs || search_regs.start[n] < 0)
3008                 return Qnil;
3009         return make_int(beginningp ? search_regs.start[n] : search_regs.end[n]);
3010 }
3011
3012 DEFUN("match-beginning", Fmatch_beginning, 1, 1, 0,     /*
3013 Return position of start of text matched by last regexp search.
3014 NUM, specifies which parenthesized expression in the last regexp.
3015 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
3016 Zero means the entire text matched by the whole regexp or whole string.
3017 */
3018       (num))
3019 {
3020         return match_limit(num, 1);
3021 }
3022
3023 DEFUN("match-end", Fmatch_end, 1, 1, 0, /*
3024 Return position of end of text matched by last regexp search.
3025 NUM specifies which parenthesized expression in the last regexp.
3026 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
3027 Zero means the entire text matched by the whole regexp or whole string.
3028 */
3029       (num))
3030 {
3031         return match_limit(num, 0);
3032 }
3033
3034 DEFUN("match-data", Fmatch_data, 0, 2, 0,       /*
3035 Return a list containing all info on what the last regexp search matched.
3036 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
3037 All the elements are markers or nil (nil if the Nth pair didn't match)
3038 if the last match was on a buffer; integers or nil if a string was matched.
3039 Use `store-match-data' to reinstate the data in this list.
3040
3041 If INTEGERS (the optional first argument) is non-nil, always use integers
3042 \(rather than markers) to represent buffer positions.
3043 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough
3044 to hold all the values, and if INTEGERS is non-nil, no consing is done.
3045 */
3046       (integers, reuse))
3047 {
3048         /* This function has been Mule-ized. */
3049         Lisp_Object tail, prev;
3050         Lisp_Object *data;
3051         int i;
3052         Charcount len;
3053
3054         if (NILP(last_thing_searched))
3055                 /*error ("match-data called before any match found"); */
3056                 return Qnil;
3057
3058         data = alloca_array(Lisp_Object, 2 * search_regs.num_regs);
3059
3060         len = -1;
3061         for (i = 0; i < search_regs.num_regs; i++) {
3062                 Bufpos start = search_regs.start[i];
3063                 if (start >= 0) {
3064                         if (EQ(last_thing_searched, Qt)
3065                             || !NILP(integers)) {
3066                                 data[2 * i] = make_int(start);
3067                                 data[2 * i + 1] = make_int(search_regs.end[i]);
3068                         } else if (BUFFERP(last_thing_searched)) {
3069                                 data[2 * i] = Fmake_marker();
3070                                 Fset_marker(data[2 * i],
3071                                             make_int(start),
3072                                             last_thing_searched);
3073                                 data[2 * i + 1] = Fmake_marker();
3074                                 Fset_marker(data[2 * i + 1],
3075                                             make_int(search_regs.end[i]),
3076                                             last_thing_searched);
3077                         } else
3078                                 /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
3079                                 abort();
3080
3081                         len = i;
3082                 } else
3083                         data[2 * i] = data[2 * i + 1] = Qnil;
3084         }
3085         if (!CONSP(reuse))
3086                 return Flist(2 * len + 2, data);
3087
3088         /* If REUSE is a list, store as many value elements as will fit
3089            into the elements of REUSE.  */
3090         for (prev = Qnil, i = 0, tail = reuse; CONSP(tail);
3091              i++, tail = XCDR(tail)) {
3092                 if (i < 2 * len + 2)
3093                         XCAR(tail) = data[i];
3094                 else
3095                         XCAR(tail) = Qnil;
3096                 prev = tail;
3097         }
3098
3099         /* If we couldn't fit all value elements into REUSE,
3100            cons up the rest of them and add them to the end of REUSE.  */
3101         if (i < 2 * len + 2)
3102                 XCDR(prev) = Flist(2 * len + 2 - i, data + i);
3103
3104         return reuse;
3105 }
3106
3107 DEFUN("store-match-data", Fstore_match_data, 1, 1, 0,   /*
3108 Set internal data on last search match from elements of LIST.
3109 LIST should have been created by calling `match-data' previously.
3110 */
3111       (list))
3112 {
3113         /* This function has been Mule-ized. */
3114         REGISTER int i;
3115         REGISTER Lisp_Object marker;
3116         int num_regs;
3117         int length;
3118
3119         if (running_asynch_code)
3120                 save_search_regs();
3121
3122         CONCHECK_LIST(list);
3123
3124         /* Unless we find a marker with a buffer in LIST, assume that this
3125            match data came from a string.  */
3126         last_thing_searched = Qt;
3127
3128         /* Allocate registers if they don't already exist.  */
3129         length = XINT(Flength(list)) / 2;
3130         num_regs = search_regs.num_regs;
3131
3132         if (length > num_regs) {
3133                 if (search_regs.num_regs == 0) {
3134                         search_regs.start = xnew_atomic_array(regoff_t, length);
3135                         search_regs.end = xnew_atomic_array(regoff_t, length);
3136                 } else {
3137                         XREALLOC_ARRAY(search_regs.start, regoff_t, length);
3138                         XREALLOC_ARRAY(search_regs.end, regoff_t, length);
3139                 }
3140
3141                 search_regs.num_regs = length;
3142         }
3143
3144         for (i = 0; i < num_regs; i++) {
3145                 marker = Fcar(list);
3146                 if (NILP(marker)) {
3147                         search_regs.start[i] = -1;
3148                         list = Fcdr(list);
3149                 } else {
3150                         if (MARKERP(marker)) {
3151                                 if (XMARKER(marker)->buffer == 0)
3152                                         marker = Qzero;
3153                                 else
3154                                         XSETBUFFER(last_thing_searched,
3155                                                    XMARKER(marker)->buffer);
3156                         }
3157
3158                         CHECK_INT_COERCE_MARKER(marker);
3159                         search_regs.start[i] = XINT(marker);
3160                         list = Fcdr(list);
3161
3162                         marker = Fcar(list);
3163                         if (MARKERP(marker) && XMARKER(marker)->buffer == 0)
3164                                 marker = Qzero;
3165
3166                         CHECK_INT_COERCE_MARKER(marker);
3167                         search_regs.end[i] = XINT(marker);
3168                 }
3169                 list = Fcdr(list);
3170         }
3171
3172         return Qnil;
3173 }
3174
3175 /* If non-zero the match data have been saved in saved_search_regs
3176    during the execution of a sentinel or filter. */
3177 static int search_regs_saved;
3178 static struct re_registers saved_search_regs;
3179
3180 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3181    if asynchronous code (filter or sentinel) is running. */
3182 static void save_search_regs(void)
3183 {
3184         if (!search_regs_saved) {
3185                 saved_search_regs.num_regs = search_regs.num_regs;
3186                 saved_search_regs.start = search_regs.start;
3187                 saved_search_regs.end = search_regs.end;
3188                 search_regs.num_regs = 0;
3189                 search_regs.start = 0;
3190                 search_regs.end = 0;
3191
3192                 search_regs_saved = 1;
3193         }
3194 }
3195
3196 /* Called upon exit from filters and sentinels. */
3197 void restore_match_data(void)
3198 {
3199         if (search_regs_saved) {
3200                 if (search_regs.num_regs > 0) {
3201                         xfree(search_regs.start);
3202                         xfree(search_regs.end);
3203                 }
3204                 search_regs.num_regs = saved_search_regs.num_regs;
3205                 search_regs.start = saved_search_regs.start;
3206                 search_regs.end = saved_search_regs.end;
3207
3208                 search_regs_saved = 0;
3209         }
3210 }
3211
3212 /* Quote a string to inactivate reg-expr chars */
3213
3214 DEFUN("regexp-quote", Fregexp_quote, 1, 1, 0,   /*
3215 Return a regexp string which matches exactly STRING and nothing else.
3216 */
3217       (string))
3218 {
3219         REGISTER Bufbyte *in, *out, *end;
3220         REGISTER Bufbyte *temp;
3221
3222         CHECK_STRING(string);
3223
3224         temp = (Bufbyte *) alloca(XSTRING_LENGTH(string) * 2);
3225
3226         /* Now copy the data into the new string, inserting escapes. */
3227
3228         in = XSTRING_DATA(string);
3229         end = in + XSTRING_LENGTH(string);
3230         out = temp;
3231
3232         while (in < end) {
3233                 Emchar c = charptr_emchar(in);
3234
3235                 if (c == '[' || c == ']'
3236                     || c == '*' || c == '.' || c == '\\'
3237                     || c == '?' || c == '+' || c == '^' || c == '$')
3238                         *out++ = '\\';
3239                 out += set_charptr_emchar(out, c);
3240                 INC_CHARPTR(in);
3241         }
3242
3243         return make_string(temp, out - temp);
3244 }
3245
3246 DEFUN("set-word-regexp", Fset_word_regexp, 1, 1, 0,     /*
3247 Set the regexp to be used to match a word in regular-expression searching.
3248 #### Not yet implemented.  Currently does nothing.
3249 #### Do not use this yet.  Its calling interface is likely to change.
3250 */
3251       (regexp))
3252 {
3253         return Qnil;
3254 }
3255 \f
3256 /************************************************************************/
3257 /*                            initialization                            */
3258 /************************************************************************/
3259
3260 void syms_of_search(void)
3261 {
3262
3263         DEFERROR_STANDARD(Qsearch_failed, Qinvalid_operation);
3264         DEFERROR_STANDARD(Qinvalid_regexp, Qsyntax_error);
3265
3266 #ifdef EF_USE_COMPRE
3267         defsymbol(&Qcompre, "compre");
3268         defsymbol(&Qcomprep, "comprep");
3269
3270         DEFSUBR(Fcompile_regexp);
3271         DEFSUBR(Fdefregexp);
3272 #endif
3273
3274         DEFSUBR(Flooking_at);
3275         DEFSUBR(Fposix_looking_at);
3276         DEFSUBR(Fstring_match);
3277         DEFSUBR(Fposix_string_match);
3278         DEFSUBR(Fskip_chars_forward);
3279         DEFSUBR(Fskip_chars_backward);
3280         DEFSUBR(Fskip_syntax_forward);
3281         DEFSUBR(Fskip_syntax_backward);
3282         DEFSUBR(Fsearch_forward);
3283         DEFSUBR(Fsearch_backward);
3284         DEFSUBR(Fword_search_forward);
3285         DEFSUBR(Fword_search_backward);
3286         DEFSUBR(Fre_search_forward);
3287         DEFSUBR(Fre_search_backward);
3288         DEFSUBR(Fposix_search_forward);
3289         DEFSUBR(Fposix_search_backward);
3290         DEFSUBR(Freplace_match);
3291         DEFSUBR(Fmatch_beginning);
3292         DEFSUBR(Fmatch_end);
3293         DEFSUBR(Fmatch_data);
3294         DEFSUBR(Fstore_match_data);
3295         DEFSUBR(Fregexp_quote);
3296         DEFSUBR(Fset_word_regexp);
3297 }
3298
3299 void reinit_vars_of_search(void)
3300 {
3301         int i;
3302
3303         last_thing_searched = Qnil;
3304         staticpro_nodump(&last_thing_searched);
3305
3306         for (i = 0; i < REGEXP_CACHE_SIZE; ++i) {
3307                 searchbufs[i].buf.allocated = 100;
3308                 searchbufs[i].buf.buffer = (unsigned char *)xmalloc_atomic(100);
3309                 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3310                 searchbufs[i].regexp = Qnil;
3311                 staticpro_nodump(&searchbufs[i].regexp);
3312                 searchbufs[i].next =
3313                         (i == REGEXP_CACHE_SIZE - 1 ? 0 : &searchbufs[i + 1]);
3314         }
3315         searchbuf_head = &searchbufs[0];
3316 }
3317
3318 void vars_of_search(void)
3319 {
3320         reinit_vars_of_search();
3321
3322         DEFVAR_LISP("forward-word-regexp", &Vforward_word_regexp        /*
3323 *Regular expression to be used in `forward-word'.
3324 #### Not yet implemented.
3325                                                                          */ );
3326         Vforward_word_regexp = Qnil;
3327
3328         DEFVAR_LISP("backward-word-regexp", &Vbackward_word_regexp      /*
3329 *Regular expression to be used in `backward-word'.
3330 #### Not yet implemented.
3331                                                                          */ );
3332         Vbackward_word_regexp = Qnil;
3333 }
3334
3335 void complex_vars_of_search(void)
3336 {
3337         Vskip_chars_range_table = Fmake_range_table();
3338         staticpro(&Vskip_chars_range_table);
3339 }