Improve documentation
[sxemacs] / src / search.c
1 /* String search routines for SXEmacs.
2    Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
3    Copyright (C) 1995 Sun Microsystems, Inc.
4
5 This file is part of SXEmacs
6
7 SXEmacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 SXEmacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program.  If not, see <http://www.gnu.org/licenses/>. */
19
20
21 /* Synched up with: FSF 19.29, except for region-cache stuff. */
22
23 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */
24
25 /* This file has been Mule-ized except for the TRT stuff. */
26
27 #include <config.h>
28 #include "lisp.h"
29
30 #include "buffer.h"
31 #include "ui/insdel.h"
32 #include "opaque.h"
33 #ifdef REGION_CACHE_NEEDS_WORK
34 #include "region-cache.h"
35 #endif
36 #include "syntax.h"
37
38 #include <sys/types.h>
39 #include "regex.h"
40 #include "casetab.h"
41 #include "chartab.h"
42
43 #include "dynacat.h"
44
45 #define TRANSLATE(table, pos)   \
46  (!NILP (table) ? TRT_TABLE_OF (table, (Emchar) pos) : pos)
47 \f
48 #include "elhash.h"
49 /* Make sure these are ALWAYS powers of 2 */
50 #define REGEXP_CACHE_SIZE 0x80
51 #define REGEXP_CACHE_HASH_MASK (REGEXP_CACHE_SIZE-1)
52 #define REGEXP_FASTMAP_SIZE 0400
53 #define REGEXP_FASTMAP_MASK (REGEXP_FASTMAP_SIZE-1)
54
55 #define __REGEXP_DEBUG__(args...)       fprintf(stderr, "REGEXP " args)
56 #ifndef REGEXP_DEBUG_FLAG
57 #define REGEXP_DEBUG(args...)
58 #else
59 #define REGEXP_DEBUG(args...)           __REGEXP_DEBUG__(args)
60 #endif
61 #define REGEXP_DEBUG_COMPRE(args...)    REGEXP_DEBUG("[compre]: " args)
62 #define REGEXP_DEBUG_COMPRE_C(args...)  REGEXP_DEBUG("[compre/cache]: " args)
63 #define REGEXP_DEBUG_COMPRE_H(args...)  REGEXP_DEBUG("[compre/hash]: " args)
64 #define REGEXP_CRITICAL(args...)        __REGEXP_DEBUG__("CRITICAL: " args)
65
66
67 /* If the regexp is non-nil, then the buffer contains the compiled form
68    of that regexp, suitable for searching.  */
69 struct regexp_cache {
70         struct regexp_cache *next;
71         Lisp_Object regexp;
72         struct re_pattern_buffer buf;
73         char fastmap[REGEXP_FASTMAP_SIZE];
74         /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
75         char posix;
76 };
77
78 /* The instances of that struct.  */
79 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
80
81 /* The head of the linked list; points to the most recently used buffer.  */
82 static struct regexp_cache *searchbuf_head;
83
84 /* Every call to re_match, etc., must pass &search_regs as the regs
85    argument unless you can show it is unnecessary (i.e., if re_match
86    is certainly going to be called again before region-around-match
87    can be called).
88
89    Since the registers are now dynamically allocated, we need to make
90    sure not to refer to the Nth register before checking that it has
91    been allocated by checking search_regs.num_regs.
92
93    The regex code keeps track of whether it has allocated the search
94    buffer using bits in the re_pattern_buffer.  This means that whenever
95    you compile a new pattern, it completely forgets whether it has
96    allocated any registers, and will allocate new registers the next
97    time you call a searching or matching function.  Therefore, we need
98    to call re_set_registers after compiling a new pattern or after
99    setting the match registers, so that the regex functions will be
100    able to free or re-allocate it properly.  */
101
102 /* Note: things get trickier under Mule because the values returned from
103    the regexp routines are in Bytinds but we need them to be in Bufpos's.
104    We take the easy way out for the moment and just convert them immediately.
105    We could be more clever by not converting them until necessary, but
106    that gets real ugly real fast since the buffer might have changed and
107    the positions might be out of sync or out of range.
108    */
109 static struct re_registers search_regs;
110
111 /* The buffer in which the last search was performed, or
112    Qt if the last search was done in a string;
113    Qnil if no searching has been done yet.  */
114 static Lisp_Object last_thing_searched;
115
116 /* error condition signalled when regexp compile_pattern fails */
117
118 Lisp_Object Qinvalid_regexp;
119
120 /* Regular expressions used in forward/backward-word */
121 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
122
123 /* range table for use with skip_chars.  Only needed for Mule. */
124 Lisp_Object Vskip_chars_range_table;
125
126 static void set_search_regs(struct buffer *buf, Bufpos beg, Charcount len);
127 static void clear_unused_search_regs(struct re_registers *regp, int no_sub);
128 static void save_search_regs(void);
129 static Bufpos simple_search(struct buffer *buf, Bufbyte * base_pat,
130                             Bytecount len, Bytind pos, Bytind lim,
131                             EMACS_INT n, Lisp_Object trt);
132 static Bufpos boyer_moore(struct buffer *buf, Bufbyte * base_pat,
133                           Bytecount len, Bytind pos, Bytind lim,
134                           EMACS_INT n, Lisp_Object trt,
135                           Lisp_Object inverse_trt, int charset_base);
136 static Bufpos search_buffer(struct buffer *buf, Lisp_Object str,
137                             Bufpos bufpos, Bufpos buflim, EMACS_INT n, int RE,
138                             Lisp_Object trt, Lisp_Object inverse_trt,
139                             int posix);
140
141 static void matcher_overflow(void)
142 {
143         error("Stack overflow in regexp matcher");
144 }
145
146 #ifdef EF_USE_COMPRE
147 EXFUN(Fget, 3);
148 EXFUN(Fput, 3);
149 EXFUN(Fremprop, 2);
150 #define COMPRE_T                struct re_pattern_buffer
151 #define COMPREP(obj)                                    \
152         (DYNACATP(obj) && EQ(XDYNACAT_TYPE(obj), Qcompre))
153 #define COMPRE_GET(obj) (COMPRE_T*)XDYNACAT(obj)->ptr
154 #define COMPRE_PUT(obj, rec)    do {                            \
155                 obj = make_dynacat((void*)rec);                 \
156                 set_dynacat_printer(obj, compre_prfun);         \
157                 set_dynacat_finaliser(obj, compre_finfun);      \
158                 XDYNACAT_TYPE(obj) = Qcompre;                   \
159         } while (0)
160 #define XCOMPRE_GET(obj)        XSTRING(obj)->compre
161 #define XCOMPRE_PUT(obj, b)     XSTRING(obj)->compre = b
162 #define XCOMPRE_REM(obj)        XSTRING(obj)->compre = Qnil
163
164 /* idenitifier for hard-cached rexp */
165 Lisp_Object Qcompre, Qcomprep;
166 static Lisp_Object cache_regexp(Lisp_Object, struct re_pattern_buffer*);
167 static COMPRE_T* make_compre(void)
168 #if defined(__GNUC__)
169         __attribute__((unused))
170 #endif
171         ;
172 static COMPRE_T* clone_compre(COMPRE_T*);
173 static inline void free_compre(COMPRE_T*);
174 #endif
175
176 /* Compile a regexp and signal a Lisp error if anything goes wrong.
177    PATTERN is the pattern to compile.
178    CP is the place to put the result.
179    TRANSLATE is a translation table for ignoring case, or NULL for none.
180    REGP is the structure that says where to store the "register"
181    values that will result from matching this pattern.
182    If it is 0, we should compile the pattern not to record any
183    subexpression bounds.
184    POSIX is nonzero if we want full backtracking (POSIX style)
185    for this pattern.  0 means backtrack only enough to get a valid match.  */
186
187 static int
188 compile_pattern_1(struct regexp_cache *cp, Lisp_Object pattern,
189                   Lisp_Object translate, struct re_registers *regp, int posix,
190                   Error_behavior errb)
191 {
192         const char *val;
193         reg_syntax_t old;
194
195         cp->regexp = Qnil;
196         cp->buf.translate = translate;
197         cp->posix = posix;
198         old = re_set_syntax(RE_SYNTAX_EMACS
199                             | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
200         val = (const char *)
201             re_compile_pattern((char *)XSTRING_DATA(pattern),
202                                XSTRING_LENGTH(pattern), &cp->buf);
203         re_set_syntax(old);
204         if (val) {
205                 maybe_signal_error(Qinvalid_regexp, list1(build_string(val)),
206                                    Qsearch, errb);
207                 return 0;
208         }
209
210         cp->regexp = Fcopy_sequence(pattern);
211         return 1;
212 }
213
214 #ifdef EF_USE_COMPRE
215 /* extremely simple hash table, no collision management is performed */
216 static struct re_pattern_buffer *
217 try_pattern_from_cache(Lisp_Object pattern,
218                        struct re_registers *regp,
219                        Lisp_Object translate,
220                        int posix,
221                        Error_behavior errb)
222 {
223         struct regexp_cache *cp;
224         hcode_t pathash;
225         int place;
226
227         pathash = internal_hash(pattern, 0);
228         place = (int)((pathash>>4) & REGEXP_CACHE_HASH_MASK);
229         cp = &(searchbufs[place]);
230         REGEXP_DEBUG_COMPRE_H("trying hash place: %d.\n", place);
231
232         if (!NILP(Fstring_equal(cp->regexp, pattern))
233             && EQ(cp->buf.translate, translate) && cp->posix == posix) {
234                 /* found the pig */
235                 REGEXP_DEBUG_COMPRE_H("match: %d.\n", place);
236                 ;
237         } else {
238                 /* collision or empty slot, dont care just put the
239                  * new regexp there */
240                 if (!compile_pattern_1(
241                             cp, pattern, translate, regp, posix, errb))
242                         return 0;
243                 REGEXP_DEBUG_COMPRE_H("collision: %d.\n", place);
244         }
245
246         return &cp->buf;
247 }
248 #else  /* !EF_USE_COMPRE */
249 static struct re_pattern_buffer *
250 try_pattern_from_cache(Lisp_Object pattern,
251                        struct re_registers *regp,
252                        Lisp_Object translate,
253                        int posix,
254                        Error_behavior errb)
255 {
256         struct regexp_cache *cp, **cpp;
257
258         for (cpp = &searchbuf_head;; cpp = &cp->next) {
259                 cp = *cpp;
260                 if (!NILP(Fstring_equal(cp->regexp, pattern))
261                     && EQ(cp->buf.translate, translate)
262                     && cp->posix == posix)
263                         break;
264
265                 /* If we're at the end of the cache, compile
266                  * into the last cell.  */
267                 if (cp->next == 0) {
268                         if (!compile_pattern_1(
269                                     cp, pattern, translate, regp, posix, errb))
270                                 return 0;
271                         break;
272                 }
273         }
274
275         /* When we get here, cp (aka *cpp) contains the compiled pattern,
276            either because we found it in the cache or because we just compiled it.
277            Move it to the front of the queue to mark it as most recently used.  */
278         *cpp = cp->next;
279         cp->next = searchbuf_head;
280         searchbuf_head = cp;
281
282         return &cp->buf;
283 }
284 #endif  /* EF_USE_COMPRE */
285
286 /* Compile a regexp if necessary, but first check to see if there's one in
287    the cache.
288    PATTERN is the pattern to compile.
289    TRANSLATE is a translation table for ignoring case, or NULL for none.
290    REGP is the structure that says where to store the "register"
291    values that will result from matching this pattern.
292    If it is 0, we should compile the pattern not to record any
293    subexpression bounds.
294    POSIX is nonzero if we want full backtracking (POSIX style)
295    for this pattern.  0 means backtrack only enough to get a valid match.  */
296
297 struct re_pattern_buffer *
298 compile_pattern(Lisp_Object pattern,
299                 struct re_registers *regp,
300                 Lisp_Object translate,
301                 int posix,
302                 Error_behavior errb)
303 {
304         struct re_pattern_buffer *result = NULL;
305
306 #ifdef EF_USE_COMPRE
307         Lisp_Object rc = XCOMPRE_GET(pattern);
308
309         if (!NILP(rc) && COMPREP(rc) &&
310             (result = COMPRE_GET(rc)) &&
311             result->re_ngroups >= 0 && result->re_ngroups < 256) {
312                 REGEXP_DEBUG_COMPRE_C("using cache: 0x%x.\n",
313                                       (unsigned int)result);
314                 ;
315         } else {
316 #endif
317                 result = try_pattern_from_cache(
318                         pattern, regp, translate, posix, errb);
319 #ifdef EF_USE_COMPRE
320                 cache_regexp(pattern, result);
321         }
322 #endif
323
324
325         /* Advise the searching functions about the space we have allocated
326            for register data.  */
327         if (regp)
328                 re_set_registers(result, regp, regp->num_regs,
329                                  regp->start, regp->end);
330
331         return result;
332 }
333
334 #ifdef EF_USE_COMPRE
335 static void
336 compre_prfun(Lisp_Object obj, Lisp_Object pcfun, int escflag)
337 {
338         write_fmt_str(pcfun, "#<compiled regexp %lx",
339                       (long unsigned int)((COMPRE_GET(obj))->buffer));
340         if (escflag);
341 }
342
343 static void
344 compre_finfun(Lisp_Object obj, int unused)
345 {
346         REGEXP_DEBUG_COMPRE_C("0x%lx@0x%lx will pass away\n",
347                               (long unsigned int)COMPRE_GET(obj),
348                               (long unsigned int)obj);
349         free_compre(COMPRE_GET(obj));
350 }
351
352 #if 1
353 static COMPRE_T *
354 make_compre(void)
355 {
356         COMPRE_T *result = xnew_and_zero(COMPRE_T);
357
358         result->fastmap = xmalloc_atomic(REGEXP_FASTMAP_SIZE);
359
360         return result;
361 }
362 #endif  /* 0 */
363
364 static COMPRE_T *
365 clone_compre(COMPRE_T *src)
366 {
367         COMPRE_T *result = xnew_and_zero(COMPRE_T);
368
369         /* alloc and clone fastmap */
370         result->fastmap = (char*)xmalloc_atomic(REGEXP_FASTMAP_SIZE);
371         memcpy(result->fastmap, src->fastmap, REGEXP_FASTMAP_SIZE);
372
373         /* alloc and clone buffer */
374         result->buffer = (unsigned char *)xmalloc_atomic(src->allocated);
375         memcpy(result->buffer, src->buffer, src->allocated);
376         result->allocated = src->allocated;
377         result->used = src->used;
378
379         result->syntax = src->syntax;
380         result->translate = src->translate;
381         result->re_nsub = src->re_nsub;
382         result->re_ngroups = src->re_ngroups;
383
384         result->can_be_null = src->can_be_null;
385         result->regs_allocated = src->regs_allocated;
386         result->fastmap_accurate = src->fastmap_accurate;
387
388         result->no_sub = src->no_sub;
389         result->not_bol = src->not_bol;
390         result->not_eol = src->not_eol;
391         result->newline_anchor = src->newline_anchor;
392
393         /* alloc and clone ext_to_int_register */
394         result->external_to_internal_register =
395                 (int*)xmalloc_atomic(
396                         sizeof(int) * src->external_to_internal_register_size);
397         memcpy(result->external_to_internal_register,
398                src->external_to_internal_register,
399                sizeof(int)*src->external_to_internal_register_size);
400         result->external_to_internal_register_size =
401                 src->external_to_internal_register_size;
402
403         return result;
404 }
405
406 static inline void
407 free_compre(COMPRE_T *buf)
408 {
409         if (buf->buffer) {
410                 xfree(buf->buffer);
411                 buf->buffer = NULL;
412         }
413         if (buf->fastmap) {
414                 xfree(buf->fastmap);
415                 buf->fastmap = NULL;
416         }
417         if (buf->external_to_internal_register) {
418                 xfree(buf->external_to_internal_register);
419                 buf->external_to_internal_register = NULL;
420         }
421         xfree(buf);
422 }
423
424 static Lisp_Object
425 cache_regexp(Lisp_Object regexp, COMPRE_T *buf)
426 {
427         Lisp_Object rc;
428         COMPRE_T *resbuf;
429
430         if (buf == NULL)
431                 return Qnil;
432
433         resbuf = clone_compre(buf);
434         COMPRE_PUT(rc, resbuf);
435         XCOMPRE_PUT(regexp, rc);
436
437         REGEXP_DEBUG_COMPRE_C("caching 0x%08x into 0x%08x\n",
438                               (unsigned int)resbuf, (unsigned int)rc);
439         return rc;
440 }
441
442 DEFUN("compile-regexp", Fcompile_regexp, 1, 1, 0, /*
443 Forcibly compile REGEXP and store the result in object-plist.
444 */
445       (regexp))
446 {
447         CHECK_STRING(regexp);
448
449         XCOMPRE_REM(regexp);
450         compile_pattern(regexp, &search_regs, Qnil, 0, ERROR_ME);
451
452         return regexp;
453 }
454
455 DEFUN("defregexp", Fdefregexp, 2, UNEVALLED, 0, /*
456 \(defregexp SYMBOL REGEXP DOCSTRING\)
457 Like `defconst' but for forcing compiled regexps.
458
459 The same restrictions that apply to `defconst' apply here in regard
460 to user variables.  You shouldn't use this for regular expressions
461 that a user might want to customise.  Instead, use `defcustom' with
462 :type 'regexp.
463 */
464       (args))
465 {
466         /* This function can GC */
467         Lisp_Object sym = XCAR(args);
468         Lisp_Object pat = Feval(XCAR(args = XCDR(args)));
469         struct gcpro gcpro1;
470
471         CHECK_STRING(pat);
472
473         GCPRO1(pat);
474         pat = Fcompile_regexp(pat);
475         Fset_default(sym, pat);
476         UNGCPRO;
477
478         if (!NILP(args = XCDR(args))) {
479                 Lisp_Object doc = XCAR(args);
480                 Fput(sym, Qvariable_documentation, doc);
481                 if (!NILP(args = XCDR(args)))
482                         error("too many arguments");
483         }
484 #ifdef I18N3
485         if (!NILP(Vfile_domain))
486                 Fput(sym, Qvariable_domain, Vfile_domain);
487 #endif  /* I18N3 */
488
489         LOADHIST_ATTACH(sym);
490         return sym;
491 }
492 #endif  /* EF_USE_COMPRE */
493
494 /* Error condition used for failing searches */
495 Lisp_Object Qsearch_failed;
496
497 static Lisp_Object signal_failure(Lisp_Object arg)
498 {
499         for (;;)
500                 Fsignal(Qsearch_failed, list1(arg));
501         return Qnil;            /* Not reached. */
502 }
503
504 /* Convert the search registers from Bytinds to Bufpos's.  Needs to be
505    done after each regexp match that uses the search regs.
506
507    We could get a potential speedup by not converting the search registers
508    until it's really necessary, e.g. when match-data or replace-match is
509    called.  However, this complexifies the code a lot (e.g. the buffer
510    could have changed and the Bytinds stored might be invalid) and is
511    probably not a great time-saver. */
512
513 static void fixup_search_regs_for_buffer(struct buffer *buf)
514 {
515         int i;
516         int num_regs = search_regs.num_regs;
517
518         for (i = 0; i < num_regs; i++) {
519                 if (search_regs.start[i] >= 0)
520                         search_regs.start[i] =
521                             bytind_to_bufpos(buf, search_regs.start[i]);
522                 if (search_regs.end[i] >= 0)
523                         search_regs.end[i] =
524                             bytind_to_bufpos(buf, search_regs.end[i]);
525         }
526 }
527
528 /* Similar but for strings. */
529 static void fixup_search_regs_for_string(Lisp_Object string)
530 {
531         int i;
532         int num_regs = search_regs.num_regs;
533
534         /* #### bytecount_to_charcount() is not that efficient.  This function
535            could be faster if it did its own conversion (using INC_CHARPTR()
536            and such), because the register ends are likely to be somewhat ordered.
537            (Even if not, you could sort them.)
538
539            Think about this if this function is a time hog, which it's probably
540            not. */
541         for (i = 0; i < num_regs; i++) {
542                 if (search_regs.start[i] > 0) {
543                         search_regs.start[i] =
544                             bytecount_to_charcount(XSTRING_DATA(string),
545                                                    search_regs.start[i]);
546                 }
547                 if (search_regs.end[i] > 0) {
548                         search_regs.end[i] =
549                             bytecount_to_charcount(XSTRING_DATA(string),
550                                                    search_regs.end[i]);
551                 }
552         }
553 }
554 \f
555 static Lisp_Object
556 looking_at_1(Lisp_Object string, struct buffer *buf, int posix)
557 {
558         /* This function has been Mule-ized, except for the trt table handling. */
559         Lisp_Object val;
560         Bytind p1, p2;
561         Bytecount s1, s2;
562         REGISTER int i;
563         struct re_pattern_buffer *bufp;
564
565         if (running_asynch_code)
566                 save_search_regs();
567
568         CHECK_STRING(string);
569         bufp = compile_pattern(string, &search_regs,
570                                (!NILP(buf->case_fold_search)
571                                 ? XCASE_TABLE_DOWNCASE(buf->case_table) : Qnil),
572                                posix, ERROR_ME);
573
574         QUIT;
575
576         /* Get pointers and sizes of the two strings
577            that make up the visible portion of the buffer. */
578
579         p1 = BI_BUF_BEGV(buf);
580         p2 = BI_BUF_CEILING_OF(buf, p1);
581         s1 = p2 - p1;
582         s2 = BI_BUF_ZV(buf) - p2;
583
584         regex_match_object = Qnil;
585         regex_emacs_buffer = buf;
586         i = re_match_2(bufp, (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
587                        s1, (char *)BI_BUF_BYTE_ADDRESS(buf, p2), s2,
588                        BI_BUF_PT(buf) - BI_BUF_BEGV(buf), &search_regs,
589                        BI_BUF_ZV(buf) - BI_BUF_BEGV(buf));
590
591         if (i == -2)
592                 matcher_overflow();
593
594         val = (0 <= i ? Qt : Qnil);
595         if (NILP(val))
596                 return Qnil;
597         {
598                 int num_regs = search_regs.num_regs;
599                 for (i = 0; i < num_regs; i++)
600                         if (search_regs.start[i] >= 0) {
601                                 search_regs.start[i] += BI_BUF_BEGV(buf);
602                                 search_regs.end[i] += BI_BUF_BEGV(buf);
603                         }
604         }
605         XSETBUFFER(last_thing_searched, buf);
606         fixup_search_regs_for_buffer(buf);
607         return val;
608 }
609
610 DEFUN("looking-at", Flooking_at, 1, 2, 0,       /*
611 Return t if text after point matches regular expression REGEXP.
612 This function modifies the match data that `match-beginning',
613 `match-end' and `match-data' access; save and restore the match
614 data if you want to preserve them.
615
616 Optional argument BUFFER defaults to the current buffer.
617 */
618       (regexp, buffer))
619 {
620         return looking_at_1(regexp, decode_buffer(buffer, 0), 0);
621 }
622
623 DEFUN("posix-looking-at", Fposix_looking_at, 1, 2, 0,   /*
624 Return t if text after point matches regular expression REGEXP.
625 Find the longest match, in accord with Posix regular expression rules.
626 This function modifies the match data that `match-beginning',
627 `match-end' and `match-data' access; save and restore the match
628 data if you want to preserve them.
629
630 Optional argument BUFFER defaults to the current buffer.
631 */
632       (regexp, buffer))
633 {
634         return looking_at_1(regexp, decode_buffer(buffer, 0), 1);
635 }
636 \f
637 static Lisp_Object
638 string_match_1(Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
639                struct buffer *buf, int posix)
640 {
641         /* This function has been Mule-ized, except for the trt table handling. */
642         Bytecount val;
643         Charcount s;
644         struct re_pattern_buffer *bufp;
645
646         if (running_asynch_code)
647                 save_search_regs();
648
649         CHECK_STRING(regexp);
650         CHECK_STRING(string);
651
652         if (NILP(start))
653                 s = 0;
654         else {
655                 Charcount len = XSTRING_CHAR_LENGTH(string);
656
657                 CHECK_INT(start);
658                 s = XINT(start);
659                 if (s < 0 && -s <= len)
660                         s = len + s;
661                 else if (0 > s || s > len)
662                         args_out_of_range(string, start);
663         }
664
665         bufp = compile_pattern(regexp, &search_regs,
666                                (!NILP(buf->case_fold_search)
667                                 ? XCASE_TABLE_DOWNCASE(buf->case_table) : Qnil),
668                                0, ERROR_ME);
669         QUIT;
670         {
671                 Bytecount bis = charcount_to_bytecount(XSTRING_DATA(string), s);
672                 regex_match_object = string;
673                 regex_emacs_buffer = buf;
674                 val = re_search(bufp, (char *)XSTRING_DATA(string),
675                                 XSTRING_LENGTH(string), bis,
676                                 XSTRING_LENGTH(string) - bis, &search_regs);
677         }
678         if (val == -2)
679                 matcher_overflow();
680         if (val < 0)
681                 return Qnil;
682         last_thing_searched = Qt;
683         fixup_search_regs_for_string(string);
684         return make_int(bytecount_to_charcount(XSTRING_DATA(string), val));
685 }
686
687 DEFUN("string-match", Fstring_match, 2, 4, 0,   /*
688 Return index of start of first match for REGEXP in STRING, or nil.
689 If third arg START is non-nil, start search at that index in STRING.
690 For index of first char beyond the match, do (match-end 0).
691 `match-end' and `match-beginning' also give indices of substrings
692 matched by parenthesis constructs in the pattern.
693
694 Optional arg BUFFER controls how case folding is done (according to
695 the value of `case-fold-search' in that buffer and that buffer's case
696 tables) and defaults to the current buffer.
697 */
698       (regexp, string, start, buffer))
699 {
700         return string_match_1(regexp, string, start, decode_buffer(buffer, 0),
701                               0);
702 }
703
704 DEFUN("posix-string-match", Fposix_string_match, 2, 4, 0,       /*
705 Return index of start of first match for REGEXP in STRING, or nil.
706 Find the longest match, in accord with Posix regular expression rules.
707 If third arg START is non-nil, start search at that index in STRING.
708 For index of first char beyond the match, do (match-end 0).
709 `match-end' and `match-beginning' also give indices of substrings
710 matched by parenthesis constructs in the pattern.
711
712 Optional arg BUFFER controls how case folding is done (according to
713 the value of `case-fold-search' in that buffer and that buffer's case
714 tables) and defaults to the current buffer.
715 */
716       (regexp, string, start, buffer))
717 {
718         return string_match_1(regexp, string, start, decode_buffer(buffer, 0),
719                               1);
720 }
721
722 /* Match REGEXP against STRING, searching all of STRING,
723    and return the index of the match, or negative on failure.
724    This does not clobber the match data. */
725
726 Bytecount
727 fast_string_match(Lisp_Object regexp, const Bufbyte * nonreloc,
728                   Lisp_Object reloc, Bytecount offset,
729                   Bytecount length, int case_fold_search,
730                   Error_behavior errb, int no_quit)
731 {
732         /* This function has been Mule-ized, except for the trt table handling. */
733         Bytecount val;
734         const Bufbyte *newnonreloc = (const Bufbyte*)nonreloc;
735         struct re_pattern_buffer *bufp;
736
737         bufp = compile_pattern(regexp, 0,
738                                (case_fold_search
739                                 ? XCASE_TABLE_DOWNCASE(current_buffer->
740                                                        case_table)
741                                 : Qnil), 0, errb);
742         if (!bufp)
743                 return -1;      /* will only do this when errb != ERROR_ME */
744         if (!no_quit)
745                 QUIT;
746         else
747                 no_quit_in_re_search = 1;
748
749         fixup_internal_substring(nonreloc, reloc, offset, &length);
750
751
752
753         if (!NILP(reloc)) {
754                 if (no_quit) {
755                         newnonreloc = XSTRING_DATA(reloc);
756                 } else {
757                         /* QUIT could relocate RELOC.  Therefore we must
758                            alloca() and copy.  No way around this except some
759                            serious rewriting of re_search(). */
760                         /* yeah, let's rewrite this bugger, the warning
761                            hereafter is inevitable too */
762                         if ( length < 0)
763                                 /* By this point
764                                    fixup_internal_substring should
765                                    have updated length, if it didn't
766                                    return with failure...
767                                 */
768                                 return -1;
769                         Bufbyte *copy = alloca(length);
770                         memcpy((void*)copy,
771                                (const void*)XSTRING_DATA(reloc),
772                                length);
773                         newnonreloc = copy;
774                 }
775         }
776
777         /* #### evil current-buffer dependency */
778         regex_match_object = reloc;
779         regex_emacs_buffer = current_buffer;
780         val = re_search(bufp, (const char*)newnonreloc + offset, length, 0,
781                         length, 0);
782
783         no_quit_in_re_search = 0;
784         return val;
785 }
786
787 Bytecount fast_lisp_string_match(Lisp_Object regex, Lisp_Object string)
788 {
789         return fast_string_match(regex, 0, string, 0, -1, 0, ERROR_ME, 0);
790 }
791 \f
792 #ifdef REGION_CACHE_NEEDS_WORK
793 /* The newline cache: remembering which sections of text have no newlines.  */
794
795 /* If the user has requested newline caching, make sure it's on.
796    Otherwise, make sure it's off.
797    This is our cheezy way of associating an action with the change of
798    state of a buffer-local variable.  */
799 static void newline_cache_on_off(struct buffer *buf)
800 {
801         if (NILP(buf->cache_long_line_scans)) {
802                 /* It should be off.  */
803                 if (buf->newline_cache) {
804                         free_region_cache(buf->newline_cache);
805                         buf->newline_cache = 0;
806                 }
807         } else {
808                 /* It should be on.  */
809                 if (buf->newline_cache == 0)
810                         buf->newline_cache = new_region_cache();
811         }
812 }
813 #endif
814 \f
815 /* Search in BUF for COUNT instances of the character TARGET between
816    START and END.
817
818    If COUNT is positive, search forwards; END must be >= START.
819    If COUNT is negative, search backwards for the -COUNTth instance;
820       END must be <= START.
821    If COUNT is zero, do anything you please; run rogue, for all I care.
822
823    If END is zero, use BEGV or ZV instead, as appropriate for the
824    direction indicated by COUNT.
825
826    If we find COUNT instances, set *SHORTAGE to zero, and return the
827    position after the COUNTth match.  Note that for reverse motion
828    this is not the same as the usual convention for Emacs motion commands.
829
830    If we don't find COUNT instances before reaching END, set *SHORTAGE
831    to the number of TARGETs left unfound, and return END.
832
833    If ALLOW_QUIT is non-zero, call QUIT periodically. */
834
835 static Bytind
836 bi_scan_buffer(struct buffer *buf, Emchar target, Bytind st, Bytind en,
837                EMACS_INT count, EMACS_INT * shortage, int allow_quit)
838 {
839         /* This function has been Mule-ized. */
840         Bytind lim = en > 0 ? en :
841             ((count > 0) ? BI_BUF_ZV(buf) : BI_BUF_BEGV(buf));
842
843         /* #### newline cache stuff in this function not yet ported */
844
845         assert(count != 0);
846
847         if (shortage)
848                 *shortage = 0;
849
850         if (count > 0) {
851 #ifdef MULE
852                 /* Due to the Mule representation of characters in a buffer,
853                    we can simply search for characters in the range 0 - 127
854                    directly.  For other characters, we do it the "hard" way.
855                    Note that this way works for all characters but the other
856                    way is faster. */
857                 if (target >= 0200) {
858                         while (st < lim && count > 0) {
859                                 if (BI_BUF_FETCH_CHAR(buf, st) == target)
860                                         count--;
861                                 INC_BYTIND(buf, st);
862                         }
863                 } else
864 #endif
865                 {
866                         while (st < lim && count > 0) {
867                                 Bytind _ceil_;
868                                 Bufbyte *bufptr;
869
870                                 _ceil_ = BI_BUF_CEILING_OF(buf, st);
871                                 _ceil_ = min(lim, _ceil_);
872                                 bufptr =
873                                     (Bufbyte *)
874                                     memchr(BI_BUF_BYTE_ADDRESS(buf, st),
875                                            (int)target, _ceil_ - st);
876                                 if (bufptr) {
877                                         count--;
878                                         st = BI_BUF_PTR_BYTE_POS(buf,
879                                                                  bufptr) + 1;
880                                 } else
881                                         st = _ceil_;
882                         }
883                 }
884
885                 if (shortage)
886                         *shortage = count;
887                 if (allow_quit)
888                         QUIT;
889                 return st;
890         } else {
891 #ifdef MULE
892                 if (target >= 0200) {
893                         while (st > lim && count < 0) {
894                                 DEC_BYTIND(buf, st);
895                                 if (BI_BUF_FETCH_CHAR(buf, st) == target)
896                                         count++;
897                         }
898                 } else
899 #endif
900                 {
901                         while (st > lim && count < 0) {
902                                 Bytind _floor_;
903                                 Bufbyte *bufptr;
904                                 Bufbyte *floorptr;
905
906                                 _floor_ = BI_BUF_FLOOR_OF(buf, st);
907                                 _floor_ = max(lim, _floor_);
908                                 /* No memrchr() ... */
909                                 bufptr = BI_BUF_BYTE_ADDRESS_BEFORE(buf, st);
910                                 floorptr = BI_BUF_BYTE_ADDRESS(buf, _floor_);
911                                 while (bufptr >= floorptr) {
912                                         st--;
913                                         /* At this point, both ST and BUFPTR
914                                            refer to the same character.  When
915                                            the loop terminates, ST will always
916                                            point to the last character we
917                                            tried. */
918                                         if (*(unsigned char *)bufptr ==
919                                             (unsigned char)target) {
920                                                 count++;
921                                                 break;
922                                         }
923                                         bufptr--;
924                                 }
925                         }
926                 }
927
928                 if (shortage)
929                         *shortage = -count;
930                 if (allow_quit)
931                         QUIT;
932                 if (count)
933                         return st;
934                 else {
935                         /* We found the character we were looking for; we have to return
936                            the position *after* it due to the strange way that the return
937                            value is defined. */
938                         INC_BYTIND(buf, st);
939                         return st;
940                 }
941         }
942 }
943
944 Bufpos
945 scan_buffer(struct buffer * buf, Emchar target, Bufpos start, Bufpos end,
946             EMACS_INT count, EMACS_INT * shortage, int allow_quit)
947 {
948         Bytind bi_retval;
949         Bytind bi_start, bi_end;
950
951         bi_start = bufpos_to_bytind(buf, start);
952         if (end)
953                 bi_end = bufpos_to_bytind(buf, end);
954         else
955                 bi_end = 0;
956         bi_retval = bi_scan_buffer(buf, target, bi_start, bi_end, count,
957                                    shortage, allow_quit);
958         return bytind_to_bufpos(buf, bi_retval);
959 }
960
961 Bytind bi_find_next_newline_no_quit(struct buffer * buf, Bytind from, int count)
962 {
963         return bi_scan_buffer(buf, '\n', from, 0, count, 0, 0);
964 }
965
966 Bufpos find_next_newline_no_quit(struct buffer * buf, Bufpos from, int count)
967 {
968         return scan_buffer(buf, '\n', from, 0, count, 0, 0);
969 }
970
971 Bufpos find_next_newline(struct buffer * buf, Bufpos from, int count)
972 {
973         return scan_buffer(buf, '\n', from, 0, count, 0, 1);
974 }
975
976 Bytind
977 bi_find_next_emchar_in_string(Lisp_String * str, Emchar target, Bytind st,
978                               EMACS_INT count)
979 {
980         /* This function has been Mule-ized. */
981         Bytind lim = string_length(str) - 1;
982         Bufbyte *s = string_data(str);
983
984         assert(count >= 0);
985
986 #ifdef MULE
987         /* Due to the Mule representation of characters in a buffer,
988            we can simply search for characters in the range 0 - 127
989            directly.  For other characters, we do it the "hard" way.
990            Note that this way works for all characters but the other
991            way is faster. */
992         if (target >= 0200) {
993                 while (st < lim && count > 0) {
994                         if (string_char(str, st) == target)
995                                 count--;
996                         INC_CHARBYTIND(s, st);
997                 }
998         } else
999 #endif
1000         {
1001                 while (st < lim && count > 0) {
1002                         Bufbyte *bufptr =
1003                             (Bufbyte *) memchr(charptr_n_addr(s, st),
1004                                                (int)target, lim - st);
1005                         if (bufptr) {
1006                                 count--;
1007                                 st = (Bytind) (bufptr - s) + 1;
1008                         } else
1009                                 st = lim;
1010                 }
1011         }
1012         return st;
1013 }
1014
1015 /* Like find_next_newline, but returns position before the newline,
1016    not after, and only search up to TO.  This isn't just
1017    find_next_newline (...)-1, because you might hit TO.  */
1018 Bufpos
1019 find_before_next_newline(struct buffer * buf, Bufpos from, Bufpos to, int count)
1020 {
1021         EMACS_INT shortage;
1022         Bufpos pos = scan_buffer(buf, '\n', from, to, count, &shortage, 1);
1023
1024         if (shortage == 0)
1025                 pos--;
1026
1027         return pos;
1028 }
1029 \f
1030 /* This function synched with FSF 21.1 */
1031 static Lisp_Object
1032 skip_chars(struct buffer *buf, int forwardp, int syntaxp,
1033            Lisp_Object string, Lisp_Object lim)
1034 {
1035         /* This function has been Mule-ized. */
1036         REGISTER Bufbyte *p, *pend;
1037         REGISTER Emchar c;
1038         /* We store the first 256 chars in an array here and the rest in
1039            a range table. */
1040         unsigned char fastmap[REGEXP_FASTMAP_SIZE];
1041         int negate = 0;
1042         REGISTER int i;
1043 #ifndef emacs
1044         Lisp_Char_Table *syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
1045 #endif
1046         Bufpos limit;
1047
1048         if (NILP(lim))
1049                 limit = forwardp ? BUF_ZV(buf) : BUF_BEGV(buf);
1050         else {
1051                 CHECK_INT_COERCE_MARKER(lim);
1052                 limit = XINT(lim);
1053
1054                 /* In any case, don't allow scan outside bounds of buffer.  */
1055                 if (limit > BUF_ZV(buf))
1056                         limit = BUF_ZV(buf);
1057                 if (limit < BUF_BEGV(buf))
1058                         limit = BUF_BEGV(buf);
1059         }
1060
1061         CHECK_STRING(string);
1062         p = XSTRING_DATA(string);
1063         pend = p + XSTRING_LENGTH(string);
1064         memset(fastmap, 0, sizeof(fastmap));
1065
1066         Fclear_range_table(Vskip_chars_range_table);
1067
1068         if (p != pend && *p == '^') {
1069                 negate = 1;
1070                 p++;
1071         }
1072
1073         /* Find the characters specified and set their elements of fastmap.
1074            If syntaxp, each character counts as itself.
1075            Otherwise, handle backslashes and ranges specially  */
1076
1077         while (p != pend) {
1078                 c = charptr_emchar(p);
1079                 INC_CHARPTR(p);
1080                 if (syntaxp) {
1081                         if (c < REGEXP_FASTMAP_SIZE
1082                             && syntax_spec_code[c] < (unsigned char)Smax)
1083                                 fastmap[c] = 1;
1084                         else
1085                                 signal_simple_error("Invalid syntax designator",
1086                                                     make_char(c));
1087                 } else {
1088                         if (c == '\\') {
1089                                 if (p == pend)
1090                                         break;
1091                                 c = charptr_emchar(p);
1092                                 INC_CHARPTR(p);
1093                         }
1094                         if (p != pend && *p == '-') {
1095                                 Emchar cend;
1096
1097                                 /* Skip over the dash.  */
1098                                 p++;
1099                                 if (p == pend)
1100                                         break;
1101                                 cend = charptr_emchar(p);
1102                                 while (c <= cend && c < REGEXP_FASTMAP_SIZE) {
1103                                         fastmap[c] = 1;
1104                                         c++;
1105                                 }
1106                                 if (c <= cend)
1107                                         Fput_range_table(make_int(c),
1108                                                          make_int(cend), Qt,
1109                                                          Vskip_chars_range_table);
1110                                 INC_CHARPTR(p);
1111                         } else {
1112                                 if (c < REGEXP_FASTMAP_SIZE)
1113                                         fastmap[c] = 1;
1114                                 else
1115                                         Fput_range_table(make_int(c),
1116                                                          make_int(c), Qt,
1117                                                          Vskip_chars_range_table);
1118                         }
1119                 }
1120         }
1121
1122         /* #### Not in FSF 21.1 */
1123         if (syntaxp && fastmap['-'] != 0)
1124                 fastmap[' '] = 1;
1125
1126         /* If ^ was the first character, complement the fastmap.
1127            We don't complement the range table, however; we just use negate
1128            in the comparisons below. */
1129
1130         if (negate)
1131                 for (i = 0; i < (int)(sizeof fastmap); i++)
1132                         fastmap[i] ^= 1;
1133
1134         {
1135                 Bufpos start_point = BUF_PT(buf);
1136                 Bufpos pos = start_point;
1137                 Bytind pos_byte = BI_BUF_PT(buf);
1138
1139                 if (syntaxp) {
1140                         SETUP_SYNTAX_CACHE_FOR_BUFFER(buf, pos,
1141                                                       forwardp ? 1 : -1);
1142                         /* All syntax designators are normal chars so nothing strange
1143                            to worry about */
1144                         if (forwardp) {
1145                                 if (pos < limit)
1146                                         while (fastmap[(unsigned char)
1147                                                        syntax_code_spec
1148                                                        [(int)SYNTAX_FROM_CACHE
1149                                                         (syntax_table,
1150                                                          BI_BUF_FETCH_CHAR(buf,
1151                                                                            pos_byte))]])
1152                                         {
1153                                                 pos++;
1154                                                 INC_BYTIND(buf, pos_byte);
1155                                                 if (pos >= limit)
1156                                                         break;
1157                                                 UPDATE_SYNTAX_CACHE_FORWARD
1158                                                     (pos);
1159                                         }
1160                         } else {
1161                                 while (pos > limit) {
1162                                         Bufpos savepos = pos_byte;
1163                                         pos--;
1164                                         DEC_BYTIND(buf, pos_byte);
1165                                         UPDATE_SYNTAX_CACHE_BACKWARD(pos);
1166                                         if (!fastmap[(unsigned char)
1167                                                      syntax_code_spec
1168                                                      [(int)SYNTAX_FROM_CACHE
1169                                                       (syntax_table,
1170                                                        BI_BUF_FETCH_CHAR(buf,
1171                                                                          pos_byte))]])
1172                                         {
1173                                                 pos++;
1174                                                 pos_byte = savepos;
1175                                                 break;
1176                                         }
1177                                 }
1178                         }
1179                 } else {
1180                         if (forwardp) {
1181                                 while (pos < limit) {
1182                                         Emchar ch =
1183                                             BI_BUF_FETCH_CHAR(buf, pos_byte);
1184                                         if ((ch <
1185                                              REGEXP_FASTMAP_SIZE) ? fastmap[ch]
1186                                             : (NILP
1187                                                (Fget_range_table
1188                                                 (make_int(ch),
1189                                                  Vskip_chars_range_table, Qnil))
1190                                                == negate)) {
1191                                                 pos++;
1192                                                 INC_BYTIND(buf, pos_byte);
1193                                         } else
1194                                                 break;
1195                                 }
1196                         } else {
1197                                 while (pos > limit) {
1198                                         Bufpos prev_pos_byte = pos_byte;
1199                                         Emchar ch;
1200
1201                                         DEC_BYTIND(buf, prev_pos_byte);
1202                                         ch = BI_BUF_FETCH_CHAR(buf,
1203                                                                prev_pos_byte);
1204                                         if ((ch <
1205                                              REGEXP_FASTMAP_SIZE) ? fastmap[ch]
1206                                             : (NILP
1207                                                (Fget_range_table
1208                                                 (make_int(ch),
1209                                                  Vskip_chars_range_table, Qnil))
1210                                                == negate)) {
1211                                                 pos--;
1212                                                 pos_byte = prev_pos_byte;
1213                                         } else
1214                                                 break;
1215                                 }
1216                         }
1217                 }
1218                 QUIT;
1219                 BOTH_BUF_SET_PT(buf, pos, pos_byte);
1220                 return make_int(BUF_PT(buf) - start_point);
1221         }
1222 }
1223
1224 DEFUN("skip-chars-forward", Fskip_chars_forward, 1, 3, 0,       /*
1225 Move point forward, stopping before a char not in STRING, or at pos LIMIT.
1226 STRING is like the inside of a `[...]' in a regular expression
1227 except that `]' is never special and `\\' quotes `^', `-' or `\\'.
1228 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1229 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1230 Returns the distance traveled, either zero or positive.
1231
1232 Optional argument BUFFER defaults to the current buffer.
1233 */
1234       (string, limit, buffer))
1235 {
1236         return skip_chars(decode_buffer(buffer, 0), 1, 0, string, limit);
1237 }
1238
1239 DEFUN("skip-chars-backward", Fskip_chars_backward, 1, 3, 0,     /*
1240 Move point backward, stopping after a char not in STRING, or at pos LIMIT.
1241 See `skip-chars-forward' for details.
1242 Returns the distance traveled, either zero or negative.
1243
1244 Optional argument BUFFER defaults to the current buffer.
1245 */
1246       (string, limit, buffer))
1247 {
1248         return skip_chars(decode_buffer(buffer, 0), 0, 0, string, limit);
1249 }
1250
1251 DEFUN("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0,     /*
1252 Move point forward across chars in specified syntax classes.
1253 SYNTAX is a string of syntax code characters.
1254 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT.
1255 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1256 This function returns the distance traveled, either zero or positive.
1257
1258 Optional argument BUFFER defaults to the current buffer.
1259 */
1260       (syntax, limit, buffer))
1261 {
1262         return skip_chars(decode_buffer(buffer, 0), 1, 1, syntax, limit);
1263 }
1264
1265 DEFUN("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0,   /*
1266 Move point backward across chars in specified syntax classes.
1267 SYNTAX is a string of syntax code characters.
1268 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT.
1269 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1270 This function returns the distance traveled, either zero or negative.
1271
1272 Optional argument BUFFER defaults to the current buffer.
1273 */
1274       (syntax, limit, buffer))
1275 {
1276         return skip_chars(decode_buffer(buffer, 0), 0, 1, syntax, limit);
1277 }
1278 \f
1279 /* Subroutines of Lisp buffer search functions. */
1280
1281 static Lisp_Object
1282 search_command(Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
1283                Lisp_Object count, Lisp_Object buffer, int direction,
1284                int RE, int posix)
1285 {
1286         /* This function has been Mule-ized, except for the trt table handling. */
1287         REGISTER Bufpos np;
1288         Bufpos lim;
1289         EMACS_INT n = direction;
1290         struct buffer *buf;
1291
1292         if (!NILP(count)) {
1293                 CHECK_INT(count);
1294                 n *= XINT(count);
1295         }
1296
1297         buf = decode_buffer(buffer, 0);
1298         CHECK_STRING(string);
1299         if (NILP(limit))
1300                 lim = n > 0 ? BUF_ZV(buf) : BUF_BEGV(buf);
1301         else {
1302                 CHECK_INT_COERCE_MARKER(limit);
1303                 lim = XINT(limit);
1304                 if (n > 0 ? lim < BUF_PT(buf) : lim > BUF_PT(buf))
1305                         error("Invalid search limit (wrong side of point)");
1306                 if (lim > BUF_ZV(buf))
1307                         lim = BUF_ZV(buf);
1308                 if (lim < BUF_BEGV(buf))
1309                         lim = BUF_BEGV(buf);
1310         }
1311
1312         np = search_buffer(buf, string, BUF_PT(buf), lim, n, RE,
1313                            (!NILP(buf->case_fold_search)
1314                             ? XCASE_TABLE_CANON(buf->case_table)
1315                             : Qnil), (!NILP(buf->case_fold_search)
1316                                       ? XCASE_TABLE_EQV(buf->case_table)
1317                                       : Qnil), posix);
1318
1319         if (np <= 0) {
1320                 if (NILP(noerror))
1321                         return signal_failure(string);
1322                 if (!EQ(noerror, Qt)) {
1323                         if (lim < BUF_BEGV(buf) || lim > BUF_ZV(buf))
1324                                 abort();
1325                         BUF_SET_PT(buf, lim);
1326                         return Qnil;
1327 #if 0                           /* This would be clean, but maybe programs depend on
1328                                    a value of nil here.  */
1329                         np = lim;
1330 #endif
1331                 } else
1332                         return Qnil;
1333         }
1334
1335         if (np < BUF_BEGV(buf) || np > BUF_ZV(buf))
1336                 abort();
1337
1338         BUF_SET_PT(buf, np);
1339
1340         return make_int(np);
1341 }
1342 \f
1343 static int trivial_regexp_p(Lisp_Object regexp)
1344 {
1345         /* This function has been Mule-ized. */
1346         Bytecount len = XSTRING_LENGTH(regexp);
1347         Bufbyte *s = XSTRING_DATA(regexp);
1348         while (--len >= 0) {
1349                 switch (*s++) {
1350                         /* ']' doesn't appear here because it's only special after ] */
1351                 case '.':
1352                 case '*':
1353                 case '+':
1354                 case '?':
1355                 case '[':
1356                 case '^':
1357                 case '$':
1358                         return 0;
1359                 case '\\':
1360                         if (--len < 0)
1361                                 return 0;
1362                         switch (*s++) {
1363                         case '|':
1364                         case '(':
1365                         case ')':
1366                         case '`':
1367                         case '\'':
1368                         case 'b':
1369                         case 'B':
1370                         case '<':
1371                         case '>':
1372                         case 'w':
1373                         case 'W':
1374                         case 's':
1375                         case 'S':
1376                         case '=':
1377                         case '{':
1378                         case '}':
1379 #ifdef MULE
1380                                 /* 97/2/25 jhod Added for category matches */
1381                         case 'c':
1382                         case 'C':
1383 #endif                          /* MULE */
1384                         case '1':
1385                         case '2':
1386                         case '3':
1387                         case '4':
1388                         case '5':
1389                         case '6':
1390                         case '7':
1391                         case '8':
1392                         case '9':
1393                                 return 0;
1394                         default:
1395                                 break;
1396                         }
1397                 default:
1398                         break;
1399                 }
1400         }
1401         return 1;
1402 }
1403
1404 /* Search for the n'th occurrence of STRING in BUF,
1405    starting at position BUFPOS and stopping at position BUFLIM,
1406    treating PAT as a literal string if RE is false or as
1407    a regular expression if RE is true.
1408
1409    If N is positive, searching is forward and BUFLIM must be greater
1410    than BUFPOS.
1411    If N is negative, searching is backward and BUFLIM must be less
1412    than BUFPOS.
1413
1414    Returns -x if only N-x occurrences found (x > 0),
1415    or else the position at the beginning of the Nth occurrence
1416    (if searching backward) or the end (if searching forward).
1417
1418    POSIX is nonzero if we want full backtracking (POSIX style)
1419    for this pattern.  0 means backtrack only enough to get a valid match.  */
1420 static Bufpos
1421 search_buffer(struct buffer *buf, Lisp_Object string, Bufpos bufpos,
1422               Bufpos buflim, EMACS_INT n, int RE, Lisp_Object trt,
1423               Lisp_Object inverse_trt, int posix)
1424 {
1425         /* This function has been Mule-ized, except for the trt table handling. */
1426         Bytecount len = XSTRING_LENGTH(string);
1427         Bufbyte *base_pat = XSTRING_DATA(string);
1428         REGISTER EMACS_INT i, j;
1429         Bytind p1, p2;
1430         Bytecount s1, s2;
1431         Bytind pos, lim;
1432
1433         if (running_asynch_code)
1434                 save_search_regs();
1435
1436         /* Null string is found at starting position.  */
1437         if (len == 0) {
1438                 set_search_regs(buf, bufpos, 0);
1439                 clear_unused_search_regs(&search_regs, 0);
1440                 return bufpos;
1441         }
1442
1443         /* Searching 0 times means noop---don't move, don't touch registers.  */
1444         if (n == 0)
1445                 return bufpos;
1446
1447         pos = bufpos_to_bytind(buf, bufpos);
1448         lim = bufpos_to_bytind(buf, buflim);
1449         if (RE && !trivial_regexp_p(string)) {
1450                 struct re_pattern_buffer *bufp;
1451
1452                 bufp = compile_pattern(string, &search_regs, trt, posix,
1453                                        ERROR_ME);
1454
1455                 /* Get pointers and sizes of the two strings
1456                    that make up the visible portion of the buffer. */
1457
1458                 p1 = BI_BUF_BEGV(buf);
1459                 p2 = BI_BUF_CEILING_OF(buf, p1);
1460                 s1 = p2 - p1;
1461                 s2 = BI_BUF_ZV(buf) - p2;
1462                 regex_match_object = Qnil;
1463
1464                 while (n < 0) {
1465                         Bytecount val;
1466                         QUIT;
1467                         regex_emacs_buffer = buf;
1468                         val = re_search_2(bufp,
1469                                           (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
1470                                           s1, (char *)BI_BUF_BYTE_ADDRESS(buf,
1471                                                                           p2),
1472                                           s2, pos - BI_BUF_BEGV(buf), lim - pos,
1473                                           &search_regs, pos - BI_BUF_BEGV(buf));
1474
1475                         if (val == -2) {
1476                                 matcher_overflow();
1477                         }
1478                         if (val >= 0) {
1479                                 int num_regs = search_regs.num_regs;
1480                                 j = BI_BUF_BEGV(buf);
1481                                 for (i = 0; i < num_regs; i++)
1482                                         if (search_regs.start[i] >= 0) {
1483                                                 search_regs.start[i] += j;
1484                                                 search_regs.end[i] += j;
1485                                         }
1486                                 /* re_match (called from re_search et al) does this for us */
1487                                 /* clear_unused_search_regs (search_regs, bufp->no_sub);   */
1488                                 XSETBUFFER(last_thing_searched, buf);
1489                                 /* Set pos to the new position. */
1490                                 pos = search_regs.start[0];
1491                                 fixup_search_regs_for_buffer(buf);
1492                                 /* And bufpos too. */
1493                                 bufpos = search_regs.start[0];
1494                         } else {
1495                                 return n;
1496                         }
1497                         n++;
1498                 }
1499                 while (n > 0) {
1500                         Bytecount val;
1501                         QUIT;
1502                         regex_emacs_buffer = buf;
1503                         val = re_search_2(bufp,
1504                                           (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
1505                                           s1, (char *)BI_BUF_BYTE_ADDRESS(buf,
1506                                                                           p2),
1507                                           s2, pos - BI_BUF_BEGV(buf), lim - pos,
1508                                           &search_regs, lim - BI_BUF_BEGV(buf));
1509                         if (val == -2) {
1510                                 matcher_overflow();
1511                         }
1512                         if (val >= 0) {
1513                                 int num_regs = search_regs.num_regs;
1514                                 j = BI_BUF_BEGV(buf);
1515                                 for (i = 0; i < num_regs; i++)
1516                                         if (search_regs.start[i] >= 0) {
1517                                                 search_regs.start[i] += j;
1518                                                 search_regs.end[i] += j;
1519                                         }
1520                                 /* re_match (called from re_search et al) does this for us */
1521                                 /* clear_unused_search_regs (search_regs, bufp->no_sub);   */
1522                                 XSETBUFFER(last_thing_searched, buf);
1523                                 /* Set pos to the new position. */
1524                                 pos = search_regs.end[0];
1525                                 fixup_search_regs_for_buffer(buf);
1526                                 /* And bufpos too. */
1527                                 bufpos = search_regs.end[0];
1528                         } else {
1529                                 return 0 - n;
1530                         }
1531                         n--;
1532                 }
1533                 return bufpos;
1534         } else {                /* non-RE case */
1535
1536                 int charset_base = -1;
1537                 int boyer_moore_ok = 1;
1538                 Bufbyte *pat = 0;
1539                 Bufbyte *patbuf = alloca_array(Bufbyte, len * MAX_EMCHAR_LEN);
1540                 pat = patbuf;
1541 #ifdef MULE
1542                 while (len > 0) {
1543                         Bufbyte tmp_str[MAX_EMCHAR_LEN];
1544                         Emchar c, translated, inverse;
1545                         Bytecount orig_bytelen, new_bytelen, inv_bytelen;
1546
1547                         /* If we got here and the RE flag is set, it's because
1548                            we're dealing with a regexp known to be trivial, so the
1549                            backslash just quotes the next character.  */
1550                         if (RE && *base_pat == '\\') {
1551                                 len--;
1552                                 base_pat++;
1553                         }
1554                         c = charptr_emchar(base_pat);
1555                         translated = TRANSLATE(trt, c);
1556                         inverse = TRANSLATE(inverse_trt, c);
1557
1558                         orig_bytelen = charcount_to_bytecount(base_pat, 1);
1559                         inv_bytelen = set_charptr_emchar(tmp_str, inverse);
1560                         new_bytelen = set_charptr_emchar(tmp_str, translated);
1561
1562                         if (new_bytelen != orig_bytelen
1563                             || inv_bytelen != orig_bytelen)
1564                                 boyer_moore_ok = 0;
1565                         if (translated != c || inverse != c) {
1566                                 /* Keep track of which character set row
1567                                    contains the characters that need translation.  */
1568                                 int charset_base_code = c & ~CHAR_FIELD3_MASK;
1569                                 if (charset_base == -1)
1570                                         charset_base = charset_base_code;
1571                                 else if (charset_base != charset_base_code)
1572                                         /* If two different rows appear, needing translation,
1573                                            then we cannot use boyer_moore search.  */
1574                                         boyer_moore_ok = 0;
1575                         }
1576                         memcpy(pat, tmp_str, new_bytelen);
1577                         pat += new_bytelen;
1578                         base_pat += orig_bytelen;
1579                         len -= orig_bytelen;
1580                 }
1581 #else                           /* not MULE */
1582                 while (--len >= 0) {
1583                         /* If we got here and the RE flag is set, it's because
1584                            we're dealing with a regexp known to be trivial, so the
1585                            backslash just quotes the next character.  */
1586                         if (RE && *base_pat == '\\') {
1587                                 len--;
1588                                 base_pat++;
1589                         }
1590                         *pat++ = TRANSLATE(trt, *base_pat++);
1591                 }
1592 #endif                          /* MULE */
1593                 len = pat - patbuf;
1594                 pat = base_pat = patbuf;
1595                 if (boyer_moore_ok)
1596                         return boyer_moore(buf, base_pat, len, pos, lim, n,
1597                                            trt, inverse_trt, charset_base);
1598                 else
1599                         return simple_search(buf, base_pat, len, pos, lim, n,
1600                                              trt);
1601         }
1602 }
1603
1604 /* Do a simple string search N times for the string PAT,
1605    whose length is LEN/LEN_BYTE,
1606    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1607    TRT is the translation table.
1608
1609    Return the character position where the match is found.
1610    Otherwise, if M matches remained to be found, return -M.
1611
1612    This kind of search works regardless of what is in PAT and
1613    regardless of what is in TRT.  It is used in cases where
1614    boyer_moore cannot work.  */
1615
1616 static Bufpos
1617 simple_search(struct buffer *buf, Bufbyte * base_pat, Bytecount len_byte,
1618               Bytind idx, Bytind lim, EMACS_INT n, Lisp_Object trt)
1619 {
1620         int forward = n > 0;
1621         Bytecount buf_len = 0;  /* Shut up compiler. */
1622
1623         if (lim > idx) {
1624                 while (n > 0) {
1625                         while (1) {
1626                                 Bytecount this_len = len_byte;
1627                                 Bytind this_idx = idx;
1628                                 const Bufbyte *p = base_pat;
1629                                 if (idx >= lim)
1630                                         goto stop;
1631
1632                                 while (this_len > 0) {
1633                                         Emchar pat_ch, buf_ch;
1634                                         Bytecount pat_len;
1635
1636                                         pat_ch = charptr_emchar(p);
1637                                         buf_ch =
1638                                             BI_BUF_FETCH_CHAR(buf, this_idx);
1639
1640                                         buf_ch = TRANSLATE(trt, buf_ch);
1641
1642                                         if (buf_ch != pat_ch)
1643                                                 break;
1644
1645                                         pat_len = charcount_to_bytecount(p, 1);
1646                                         p += pat_len;
1647                                         this_len -= pat_len;
1648                                         INC_BYTIND(buf, this_idx);
1649                                 }
1650                                 if (this_len == 0) {
1651                                         buf_len = this_idx - idx;
1652                                         idx = this_idx;
1653                                         break;
1654                                 }
1655                                 INC_BYTIND(buf, idx);
1656                         }
1657                         n--;
1658                 }
1659         } else {
1660                 while (n < 0) {
1661                         while (1) {
1662                                 Bytecount this_len = len_byte;
1663                                 Bytind this_idx = idx;
1664                                 const Bufbyte *p = base_pat + len_byte;
1665
1666                                 if (idx <= lim) {
1667                                         goto stop;
1668                                 }
1669
1670                                 while (this_len > 0) {
1671                                         Emchar pat_ch, buf_ch;
1672
1673                                         DEC_CHARPTR(p);
1674                                         DEC_BYTIND(buf, this_idx);
1675                                         pat_ch = charptr_emchar(p);
1676                                         buf_ch =
1677                                             BI_BUF_FETCH_CHAR(buf, this_idx);
1678
1679                                         buf_ch = TRANSLATE(trt, buf_ch);
1680
1681                                         if (buf_ch != pat_ch)
1682                                                 break;
1683
1684                                         this_len -=
1685                                             charcount_to_bytecount(p, 1);
1686                                 }
1687                                 if (this_len == 0) {
1688                                         buf_len = idx - this_idx;
1689                                         idx = this_idx;
1690                                         break;
1691                                 }
1692                                 DEC_BYTIND(buf, idx);
1693                         }
1694                         n++;
1695                 }
1696         }
1697 stop:
1698         if (n == 0) {
1699                 Bufpos beg, end, retval;
1700                 if (forward) {
1701                         beg = bytind_to_bufpos(buf, idx - buf_len);
1702                         retval = end = bytind_to_bufpos(buf, idx);
1703                 } else {
1704                         retval = beg = bytind_to_bufpos(buf, idx);
1705                         end = bytind_to_bufpos(buf, idx + buf_len);
1706                 }
1707                 set_search_regs(buf, beg, end - beg);
1708                 clear_unused_search_regs(&search_regs, 0);
1709
1710                 return retval;
1711         } else if (n > 0) {
1712                 return -n;
1713         } else {
1714                 return n;
1715         }
1716 }
1717
1718 /* Do Boyer-Moore search N times for the string PAT,
1719    whose length is LEN/LEN_BYTE,
1720    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1721    DIRECTION says which direction we search in.
1722    TRT and INVERSE_TRT are translation tables.
1723
1724    This kind of search works if all the characters in PAT that have
1725    nontrivial translation are the same aside from the last byte.  This
1726    makes it possible to translate just the last byte of a character,
1727    and do so after just a simple test of the context.
1728
1729    If that criterion is not satisfied, do not call this function.  */
1730
1731 static Bufpos
1732 boyer_moore(struct buffer *buf, Bufbyte * base_pat, Bytecount len,
1733             Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
1734             Lisp_Object inverse_trt, int charset_base)
1735 {
1736         /* #### Someone really really really needs to comment the workings
1737            of this junk somewhat better.
1738
1739            BTW "BM" stands for Boyer-Moore, which is one of the standard
1740            string-searching algorithms.  It's the best string-searching
1741            algorithm out there, provided that:
1742
1743            a) You're not fazed by algorithm complexity. (Rabin-Karp, which
1744            uses hashing, is much much easier to code but not as fast.)
1745            b) You can freely move backwards in the string that you're
1746            searching through.
1747
1748            As the comment below tries to explain (but garbles in typical
1749            programmer-ese), the idea is that you don't have to do a
1750            string match at every successive position in the text.  For
1751            example, let's say the pattern is "a very long string".  We
1752            compare the last character in the string (`g') with the
1753            corresponding character in the text.  If it mismatches, and
1754            it is, say, `z', then we can skip forward by the entire
1755            length of the pattern because `z' does not occur anywhere
1756            in the pattern.  If the mismatching character does occur
1757            in the pattern, we can usually still skip forward by more
1758            than one: e.g. if it is `l', then we can skip forward
1759            by the length of the substring "ong string" -- i.e. the
1760            largest end section of the pattern that does not contain
1761            the mismatched character.  So what we do is compute, for
1762            each possible character, the distance we can skip forward
1763            (the "stride") and use it in the string matching.  This
1764            is what the BM_tab holds. */
1765         REGISTER EMACS_INT *BM_tab;
1766         EMACS_INT *BM_tab_base;
1767         REGISTER Bytecount dirlen;
1768         EMACS_INT infinity;
1769         Bytind limit;
1770         Bytecount stride_for_teases = 0;
1771         REGISTER EMACS_INT i, j;
1772         Bufbyte *pat, *pat_end;
1773         REGISTER Bufbyte *cursor, *p_limit, *ptr2;
1774         Bufbyte simple_translate[REGEXP_FASTMAP_SIZE];
1775         REGISTER int direction = ((n > 0) ? 1 : -1);
1776 #ifdef MULE
1777         Bufbyte translate_prev_byte = 0;
1778         Bufbyte translate_anteprev_byte = 0;
1779 #endif
1780 #ifdef C_ALLOCA
1781         EMACS_INT BM_tab_space[REGEXP_FASTMAP_SIZE];
1782         BM_tab = &BM_tab_space[0];
1783 #else
1784         BM_tab = alloca_array(EMACS_INT, 256);
1785 #endif
1786
1787         /* The general approach is that we are going to maintain that we
1788            know the first (closest to the present position, in whatever
1789            direction we're searching) character that could possibly be
1790            the last (furthest from present position) character of a
1791            valid match.  We advance the state of our knowledge by
1792            looking at that character and seeing whether it indeed
1793            matches the last character of the pattern.  If it does, we
1794            take a closer look.  If it does not, we move our pointer (to
1795            putative last characters) as far as is logically possible.
1796            This amount of movement, which I call a stride, will be the
1797            length of the pattern if the actual character appears nowhere
1798            in the pattern, otherwise it will be the distance from the
1799            last occurrence of that character to the end of the pattern.
1800            As a coding trick, an enormous stride is coded into the table
1801            for characters that match the last character.  This allows
1802            use of only a single test, a test for having gone past the
1803            end of the permissible match region, to test for both
1804            possible matches (when the stride goes past the end
1805            immediately) and failure to match (where you get nudged past
1806            the end one stride at a time).
1807
1808            Here we make a "mickey mouse" BM table.  The stride of the
1809            search is determined only by the last character of the
1810            putative match.  If that character does not match, we will
1811            stride the proper distance to propose a match that
1812            superimposes it on the last instance of a character that
1813            matches it (per trt), or misses it entirely if there is
1814            none. */
1815
1816         dirlen = len * direction;
1817         infinity = dirlen - (lim + pos + len + len) * direction;
1818         /* Record position after the end of the pattern.  */
1819         pat_end = base_pat + len;
1820         if (direction < 0)
1821                 base_pat = pat_end - 1;
1822         BM_tab_base = BM_tab;
1823         BM_tab += REGEXP_FASTMAP_SIZE;
1824         j = dirlen;             /* to get it in a register */
1825         /* A character that does not appear in the pattern induces a
1826            stride equal to the pattern length. */
1827         while (BM_tab_base != BM_tab) {
1828                 *--BM_tab = j;
1829                 *--BM_tab = j;
1830                 *--BM_tab = j;
1831                 *--BM_tab = j;
1832         }
1833         /* We use this for translation, instead of TRT itself.  We
1834            fill this in to handle the characters that actually occur
1835            in the pattern.  Others don't matter anyway!  */
1836         xzero(simple_translate);
1837         for (i = 0; i < REGEXP_FASTMAP_SIZE; i++)
1838                 simple_translate[i] = (Bufbyte) i;
1839         i = 0;
1840         while (i != infinity) {
1841                 Bufbyte *ptr = base_pat + i;
1842                 i += direction;
1843                 if (i == dirlen)
1844                         i = infinity;
1845                 if (!NILP(trt)) {
1846 #ifdef MULE
1847                         Emchar ch, untranslated;
1848                         int this_translated = 1;
1849
1850                         /* Is *PTR the last byte of a character?  */
1851                         if (pat_end - ptr == 1 || BUFBYTE_FIRST_BYTE_P(ptr[1])) {
1852                                 Bufbyte *charstart = ptr;
1853                                 while (!BUFBYTE_FIRST_BYTE_P(*charstart))
1854                                         charstart--;
1855                                 untranslated = charptr_emchar(charstart);
1856                                 if (charset_base ==
1857                                     (untranslated & ~CHAR_FIELD3_MASK)) {
1858                                         ch = TRANSLATE(trt, untranslated);
1859                                         if (!BUFBYTE_FIRST_BYTE_P(*ptr)) {
1860                                                 translate_prev_byte = ptr[-1];
1861                                                 if (!BUFBYTE_FIRST_BYTE_P
1862                                                     (translate_prev_byte))
1863                                                         translate_anteprev_byte
1864                                                             = ptr[-2];
1865                                         }
1866                                 } else {
1867                                         this_translated = 0;
1868                                         ch = *ptr;
1869                                 }
1870                         } else {
1871                                 ch = *ptr;
1872                                 this_translated = 0;
1873                         }
1874                         if (ch > REGEXP_FASTMAP_SIZE)
1875                                 j = ((unsigned char)(ch & REGEXP_FASTMAP_MASK)| 0200);
1876                         else
1877                                 j = (unsigned char)(ch & REGEXP_FASTMAP_MASK);
1878
1879                         if (i == infinity)
1880                                 stride_for_teases = BM_tab[j];
1881                         BM_tab[j] = dirlen - i;
1882                         /* A translation table is accompanied by its inverse --
1883                            see comment following downcase_table for details */
1884                         if (this_translated) {
1885                                 Emchar starting_ch = ch;
1886                                 EMACS_INT starting_j = j;
1887                                 while (1) {
1888                                         ch = TRANSLATE(inverse_trt, ch);
1889                                         if (ch > REGEXP_FASTMAP_SIZE)
1890                                                 j = ((unsigned char)(ch & REGEXP_FASTMAP_MASK) | 0200);
1891                                         else
1892                                                 j = (unsigned char)(ch & REGEXP_FASTMAP_MASK);
1893
1894                                         /* For all the characters that map into CH,
1895                                            set up simple_translate to map the last byte
1896                                            into STARTING_J.  */
1897                                         simple_translate[j] = starting_j;
1898                                         if (ch == starting_ch)
1899                                                 break;
1900                                         BM_tab[j] = dirlen - i;
1901                                 }
1902                         }
1903 #else
1904                         EMACS_INT k;
1905                         j = *ptr;
1906                         k = (j = TRANSLATE(trt, j));
1907                         if (i == infinity)
1908                                 stride_for_teases = BM_tab[j];
1909                         BM_tab[j] = dirlen - i;
1910                         /* A translation table is accompanied by its inverse --
1911                            see comment following downcase_table for details */
1912
1913                         while ((j = TRANSLATE(inverse_trt, j)) != k) {
1914                                 simple_translate[j] = (Bufbyte) k;
1915                                 BM_tab[j] = dirlen - i;
1916                         }
1917 #endif
1918                 } else {
1919                         j = *ptr;
1920
1921                         if (i == infinity)
1922                                 stride_for_teases = BM_tab[j];
1923                         BM_tab[j] = dirlen - i;
1924                 }
1925                 /* stride_for_teases tells how much to stride if we get a
1926                    match on the far character but are subsequently
1927                    disappointed, by recording what the stride would have been
1928                    for that character if the last character had been
1929                    different. */
1930         }
1931         infinity = dirlen - infinity;
1932         pos += dirlen - ((direction > 0) ? direction : 0);
1933         /* loop invariant - pos points at where last char (first char if
1934            reverse) of pattern would align in a possible match.  */
1935         while (n != 0) {
1936                 Bytind tail_end;
1937                 Bufbyte *tail_end_ptr;
1938                 /* It's been reported that some (broken) compiler thinks
1939                    that Boolean expressions in an arithmetic context are
1940                    unsigned.  Using an explicit ?1:0 prevents this.  */
1941                 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
1942                         return n * (0 - direction);
1943                 /* First we do the part we can by pointers (maybe
1944                    nothing) */
1945                 QUIT;
1946                 pat = base_pat;
1947                 limit = pos - dirlen + direction;
1948                 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1949                    have changed.  See buffer.h. */
1950                 limit = ((direction > 0)
1951                          ? BI_BUF_CEILING_OF(buf, limit) - 1
1952                          : BI_BUF_FLOOR_OF(buf, limit + 1));
1953                 /* LIMIT is now the last (not beyond-last!) value POS can
1954                    take on without hitting edge of buffer or the gap.  */
1955                 limit = ((direction > 0)
1956                          ? min(lim - 1, min(limit, pos + 20000))
1957                          : max(lim, max(limit, pos - 20000)));
1958                 tail_end = BI_BUF_CEILING_OF(buf, pos);
1959                 tail_end_ptr = BI_BUF_BYTE_ADDRESS(buf, tail_end);
1960
1961                 if ((limit - pos) * direction > 20) {
1962                         p_limit = BI_BUF_BYTE_ADDRESS(buf, limit);
1963                         ptr2 = (cursor = BI_BUF_BYTE_ADDRESS(buf, pos));
1964                         /* In this loop, pos + cursor - ptr2 is the surrogate
1965                            for pos */
1966                         while (1) {     /* use one cursor setting as long as i can */
1967                                 if (direction > 0) {    /* worth duplicating */
1968                                         /* Use signed comparison if appropriate to make
1969                                            cursor+infinity sure to be > p_limit.
1970                                            Assuming that the buffer lies in a range of
1971                                            addresses that are all "positive" (as ints)
1972                                            or all "negative", either kind of comparison
1973                                            will work as long as we don't step by
1974                                            infinity.  So pick the kind that works when
1975                                            we do step by infinity.  */
1976                                         if ((EMACS_INT) (p_limit + infinity) >
1977                                             (EMACS_INT) p_limit)
1978                                                 while ((EMACS_INT) cursor <=
1979                                                        (EMACS_INT) p_limit)
1980                                                         cursor +=
1981                                                             BM_tab[*cursor];
1982                                         else
1983                                                 while ((EMACS_UINT) cursor <=
1984                                                        (EMACS_UINT) p_limit)
1985                                                         cursor +=
1986                                                             BM_tab[*cursor];
1987                                 } else {
1988                                         if ((EMACS_INT) (p_limit + infinity) <
1989                                             (EMACS_INT) p_limit)
1990                                                 while ((EMACS_INT) cursor >=
1991                                                        (EMACS_INT) p_limit)
1992                                                         cursor +=
1993                                                             BM_tab[*cursor];
1994                                         else
1995                                                 while ((EMACS_UINT) cursor >=
1996                                                        (EMACS_UINT) p_limit)
1997                                                         cursor +=
1998                                                             BM_tab[*cursor];
1999                                 }
2000                                 /* If you are here, cursor is beyond the end of the
2001                                    searched region.  This can happen if you match on
2002                                    the far character of the pattern, because the
2003                                    "stride" of that character is infinity, a number
2004                                    able to throw you well beyond the end of the
2005                                    search.  It can also happen if you fail to match
2006                                    within the permitted region and would otherwise
2007                                    try a character beyond that region */
2008                                 if ((cursor - p_limit) * direction <= len)
2009                                         break;  /* a small overrun is genuine */
2010                                 cursor -= infinity;     /* large overrun = hit */
2011                                 i = dirlen - direction;
2012                                 if (!NILP(trt)) {
2013                                         while ((i -=
2014                                                 direction) + direction != 0) {
2015 #ifdef MULE
2016                                                 Emchar ch;
2017                                                 cursor -= direction;
2018                                                 /* Translate only the last byte of a character.  */
2019                                                 if ((cursor == tail_end_ptr
2020                                                      ||
2021                                                      BUFBYTE_FIRST_BYTE_P(cursor
2022                                                                           [1]))
2023                                                     &&
2024                                                     (BUFBYTE_FIRST_BYTE_P
2025                                                      (cursor[0])
2026                                                      || (translate_prev_byte ==
2027                                                          cursor[-1]
2028                                                          &&
2029                                                          (BUFBYTE_FIRST_BYTE_P
2030                                                           (translate_prev_byte)
2031                                                           ||
2032                                                           translate_anteprev_byte
2033                                                           == cursor[-2]))))
2034                                                         ch = simple_translate
2035                                                             [*cursor];
2036                                                 else
2037                                                         ch = *cursor;
2038                                                 if (pat[i] != ch)
2039                                                         break;
2040 #else
2041                                                 if (pat[i] !=
2042                                                     TRANSLATE(trt,
2043                                                               *(cursor -=
2044                                                                 direction)))
2045                                                         break;
2046 #endif
2047                                         }
2048                                 } else {
2049                                         while ((i -=
2050                                                 direction) + direction != 0)
2051                                                 if (pat[i] !=
2052                                                     *(cursor -= direction))
2053                                                         break;
2054                                 }
2055                                 cursor += dirlen - i - direction;       /* fix cursor */
2056                                 if (i + direction == 0) {
2057                                         cursor -= direction;
2058
2059                                         {
2060                                                 Bytind bytstart =
2061                                                     (pos + cursor - ptr2 +
2062                                                      ((direction > 0)
2063                                                       ? 1 - len : 0));
2064                                                 Bufpos bufstart =
2065                                                     bytind_to_bufpos(buf,
2066                                                                      bytstart);
2067                                                 Bufpos bufend =
2068                                                     bytind_to_bufpos(buf,
2069                                                                      bytstart +
2070                                                                      len);
2071
2072                                                 set_search_regs(buf, bufstart,
2073                                                                 bufend -
2074                                                                 bufstart);
2075                                                 clear_unused_search_regs
2076                                                     (&search_regs, 0);
2077                                         }
2078
2079                                         if ((n -= direction) != 0)
2080                                                 cursor += dirlen;       /* to resume search */
2081                                         else
2082                                                 return ((direction > 0)
2083                                                         ? search_regs.
2084                                                         end[0] : search_regs.
2085                                                         start[0]);
2086                                 } else
2087                                         cursor += stride_for_teases;    /* <sigh> we lose -  */
2088                         }
2089                         pos += cursor - ptr2;
2090                 } else
2091                         /* Now we'll pick up a clump that has to be done the hard
2092                            way because it covers a discontinuity */
2093                 {
2094                         /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
2095                            have changed.  See buffer.h. */
2096                         limit = ((direction > 0)
2097                                  ? BI_BUF_CEILING_OF(buf, pos - dirlen + 1) - 1
2098                                  : BI_BUF_FLOOR_OF(buf, pos - dirlen));
2099                         limit = ((direction > 0)
2100                                  ? min(limit + len, lim - 1)
2101                                  : max(limit - len, lim));
2102                         /* LIMIT is now the last value POS can have
2103                            and still be valid for a possible match.  */
2104                         while (1) {
2105                                 /* This loop can be coded for space rather than
2106                                    speed because it will usually run only once.
2107                                    (the reach is at most len + 21, and typically
2108                                    does not exceed len) */
2109                                 while ((limit - pos) * direction >= 0)
2110                                         /* *not* BI_BUF_FETCH_CHAR.  We are working here
2111                                            with bytes, not characters. */
2112                                         pos +=
2113                                             BM_tab[*BI_BUF_BYTE_ADDRESS
2114                                                    (buf, pos)];
2115                                 /* now run the same tests to distinguish going off
2116                                    the end, a match or a phony match. */
2117                                 if ((pos - limit) * direction <= len)
2118                                         break;  /* ran off the end */
2119                                 /* Found what might be a match.
2120                                    Set POS back to last (first if reverse) char pos.  */
2121                                 pos -= infinity;
2122                                 i = dirlen - direction;
2123                                 while ((i -= direction) + direction != 0) {
2124 #ifdef MULE
2125                                         Emchar ch;
2126                                         Bufbyte *ptr;
2127 #endif
2128                                         pos -= direction;
2129 #ifdef MULE
2130                                         ptr = BI_BUF_BYTE_ADDRESS(buf, pos);
2131                                         if ((ptr == tail_end_ptr
2132                                              || BUFBYTE_FIRST_BYTE_P(ptr[1]))
2133                                             && (BUFBYTE_FIRST_BYTE_P(ptr[0])
2134                                                 || (translate_prev_byte ==
2135                                                     ptr[-1]
2136                                                     &&
2137                                                     (BUFBYTE_FIRST_BYTE_P
2138                                                      (translate_prev_byte)
2139                                                      || translate_anteprev_byte
2140                                                      == ptr[-2]))))
2141                                                 ch = simple_translate[*ptr];
2142                                         else
2143                                                 ch = *ptr;
2144                                         if (pat[i] != ch)
2145                                                 break;
2146
2147 #else
2148                                         if (pat[i] != TRANSLATE(trt,
2149                                                                 *BI_BUF_BYTE_ADDRESS
2150                                                                 (buf, pos)))
2151                                                 break;
2152 #endif
2153                                 }
2154                                 /* Above loop has moved POS part or all the way back
2155                                    to the first char pos (last char pos if reverse).
2156                                    Set it once again at the last (first if reverse)
2157                                    char.  */
2158                                 pos += dirlen - i - direction;
2159                                 if (i + direction == 0) {
2160                                         pos -= direction;
2161
2162                                         {
2163                                                 Bytind bytstart = (pos +
2164                                                                    ((direction >
2165                                                                      0)
2166                                                                     ? 1 -
2167                                                                     len : 0));
2168                                                 Bufpos bufstart =
2169                                                     bytind_to_bufpos(buf,
2170                                                                      bytstart);
2171                                                 Bufpos bufend =
2172                                                     bytind_to_bufpos(buf,
2173                                                                      bytstart +
2174                                                                      len);
2175
2176                                                 set_search_regs(buf, bufstart,
2177                                                                 bufend -
2178                                                                 bufstart);
2179                                                 clear_unused_search_regs
2180                                                     (&search_regs, 0);
2181                                         }
2182
2183                                         if ((n -= direction) != 0)
2184                                                 pos += dirlen;  /* to resume search */
2185                                         else
2186                                                 return ((direction > 0)
2187                                                         ? search_regs.
2188                                                         end[0] : search_regs.
2189                                                         start[0]);
2190                                 } else
2191                                         pos += stride_for_teases;
2192                         }
2193                 }
2194                 /* We have done one clump.  Can we continue? */
2195                 if ((lim - pos) * direction < 0)
2196                         return (0 - n) * direction;
2197         }
2198         return bytind_to_bufpos(buf, pos);
2199 }
2200
2201 /* Record the whole-match data (beginning BEG and end BEG + LEN) and the
2202    buffer for a match just found.  */
2203
2204 static void set_search_regs(struct buffer *buf, Bufpos beg, Charcount len)
2205 {
2206         /* This function has been Mule-ized. */
2207         /* Make sure we have registers in which to store
2208            the match position.  */
2209         if (search_regs.num_regs == 0) {
2210                 search_regs.start = xnew_atomic(regoff_t);
2211                 search_regs.end = xnew_atomic(regoff_t);
2212                 search_regs.num_regs = 1;
2213         }
2214
2215         search_regs.start[0] = beg;
2216         search_regs.end[0] = beg + len;
2217         XSETBUFFER(last_thing_searched, buf);
2218 }
2219
2220 /* Clear unused search registers so match data will be null.
2221    REGP is a pointer to the register structure to clear, usually the global
2222    search_regs.
2223    NO_SUB is the number of subexpressions to allow for.  (Does not count
2224    the whole match, ie, for a string search NO_SUB == 0.)
2225    It is an error if NO_SUB > REGP.num_regs - 1. */
2226
2227 static void clear_unused_search_regs(struct re_registers *regp, int no_sub)
2228 {
2229         /* This function has been Mule-ized. */
2230         int i;
2231
2232         assert(no_sub >= 0 && no_sub < regp->num_regs);
2233         for (i = no_sub + 1; i < regp->num_regs; i++)
2234                 regp->start[i] = regp->end[i] = -1;
2235 }
2236 \f
2237 /* Given a string of words separated by word delimiters,
2238    compute a regexp that matches those exact words
2239    separated by arbitrary punctuation.  */
2240
2241 static Lisp_Object wordify(Lisp_Object buffer, Lisp_Object string)
2242 {
2243         Charcount i, len;
2244         EMACS_INT punct_count = 0, word_count = 0;
2245         struct buffer *buf = decode_buffer(buffer, 0);
2246         Lisp_Char_Table *syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
2247
2248         CHECK_STRING(string);
2249         len = XSTRING_CHAR_LENGTH(string);
2250
2251         for (i = 0; i < len; i++)
2252                 if (!WORD_SYNTAX_P
2253                     (syntax_table, string_char(XSTRING(string), i))) {
2254                         punct_count++;
2255                         if (i > 0 && WORD_SYNTAX_P(syntax_table,
2256                                                    string_char(XSTRING(string),
2257                                                                i - 1)))
2258                                 word_count++;
2259                 }
2260         if (WORD_SYNTAX_P(syntax_table, string_char(XSTRING(string), len - 1)))
2261                 word_count++;
2262         if (!word_count)
2263                 return build_string("");
2264
2265         {
2266                 /* The following value is an upper bound on the amount of storage we
2267                    need.  In non-Mule, it is exact. */
2268                 Bufbyte *storage =
2269                     (Bufbyte *) alloca(XSTRING_LENGTH(string) - punct_count +
2270                                        5 * (word_count - 1) + 4);
2271                 Bufbyte *o = storage;
2272
2273                 *o++ = '\\';
2274                 *o++ = 'b';
2275
2276                 for (i = 0; i < len; i++) {
2277                         Emchar ch = string_char(XSTRING(string), i);
2278
2279                         if (WORD_SYNTAX_P(syntax_table, ch))
2280                                 o += set_charptr_emchar(o, ch);
2281                         else if (i > 0
2282                                  && WORD_SYNTAX_P(syntax_table,
2283                                                   string_char(XSTRING(string),
2284                                                               i - 1))
2285                                  && --word_count) {
2286                                 *o++ = '\\';
2287                                 *o++ = 'W';
2288                                 *o++ = '\\';
2289                                 *o++ = 'W';
2290                                 *o++ = '*';
2291                         }
2292                 }
2293
2294                 *o++ = '\\';
2295                 *o++ = 'b';
2296
2297                 return make_string(storage, o - storage);
2298         }
2299 }
2300 \f
2301 DEFUN("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ",  /*
2302 Search backward from point for STRING.
2303 Set point to the beginning of the occurrence found, and return point.
2304
2305 Optional second argument LIMIT bounds the search; it is a buffer
2306 position.  The match found must not extend before that position.
2307 The value nil is equivalent to (point-min).
2308
2309 Optional third argument NOERROR, if t, means just return nil (no
2310 error) if the search fails.  If neither nil nor t, set point to LIMIT
2311 and return nil.
2312
2313 Optional fourth argument COUNT is a repeat count--search for
2314 successive occurrences.
2315
2316 Optional fifth argument BUFFER specifies the buffer to search in and
2317 defaults to the current buffer.
2318
2319 See also the functions `match-beginning', `match-end' and `replace-match'.
2320 */
2321       (string, limit, noerror, count, buffer))
2322 {
2323         return search_command(string, limit, noerror, count, buffer, -1, 0, 0);
2324 }
2325
2326 DEFUN("search-forward", Fsearch_forward, 1, 5, "sSearch: ",     /*
2327 Search forward from point for STRING.
2328 Set point to the end of the occurrence found, and return point.
2329
2330 Optional second argument LIMIT bounds the search; it is a buffer
2331 position.  The match found must not extend after that position.  The
2332 value nil is equivalent to (point-max).
2333
2334 Optional third argument NOERROR, if t, means just return nil (no
2335 error) if the search fails.  If neither nil nor t, set point to LIMIT
2336 and return nil.
2337
2338 Optional fourth argument COUNT is a repeat count--search for
2339 successive occurrences.
2340
2341 Optional fifth argument BUFFER specifies the buffer to search in and
2342 defaults to the current buffer.
2343
2344 See also the functions `match-beginning', `match-end' and `replace-match'.
2345 */
2346       (string, limit, noerror, count, buffer))
2347 {
2348         return search_command(string, limit, noerror, count, buffer, 1, 0, 0);
2349 }
2350
2351 DEFUN("word-search-backward", Fword_search_backward, 1, 5, "sWord search backward: ",   /*
2352 Search backward from point for STRING, ignoring differences in punctuation.
2353 Set point to the beginning of the occurrence found, and return point.
2354
2355 Optional second argument LIMIT bounds the search; it is a buffer
2356 position.  The match found must not extend before that position.
2357 The value nil is equivalent to (point-min).
2358
2359 Optional third argument NOERROR, if t, means just return nil (no
2360 error) if the search fails.  If neither nil nor t, set point to LIMIT
2361 and return nil.
2362
2363 Optional fourth argument COUNT is a repeat count--search for
2364 successive occurrences.
2365
2366 Optional fifth argument BUFFER specifies the buffer to search in and
2367 defaults to the current buffer.
2368
2369 See also the functions `match-beginning', `match-end' and `replace-match'.
2370 */
2371       (string, limit, noerror, count, buffer))
2372 {
2373         return search_command(wordify(buffer, string), limit, noerror, count,
2374                               buffer, -1, 1, 0);
2375 }
2376
2377 DEFUN("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ",      /*
2378 Search forward from point for STRING, ignoring differences in punctuation.
2379 Set point to the end of the occurrence found, and return point.
2380
2381 Optional second argument LIMIT bounds the search; it is a buffer
2382 position.  The match found must not extend after that position.  The
2383 value nil is equivalent to (point-max).
2384
2385 Optional third argument NOERROR, if t, means just return nil (no
2386 error) if the search fails.  If neither nil nor t, set point to LIMIT
2387 and return nil.
2388
2389 Optional fourth argument COUNT is a repeat count--search for
2390 successive occurrences.
2391
2392 Optional fifth argument BUFFER specifies the buffer to search in and
2393 defaults to the current buffer.
2394
2395 See also the functions `match-beginning', `match-end' and `replace-match'.
2396 */
2397       (string, limit, noerror, count, buffer))
2398 {
2399         return search_command(wordify(buffer, string), limit, noerror, count,
2400                               buffer, 1, 1, 0);
2401 }
2402
2403 DEFUN("re-search-backward", Fre_search_backward, 1, 5, "sRE search backward: ", /*
2404 Search backward from point for match for regular expression REGEXP.
2405 Set point to the beginning of the match, and return point.
2406 The match found is the one starting last in the buffer
2407 and yet ending before the origin of the search.
2408
2409 Optional second argument LIMIT bounds the search; it is a buffer
2410 position.  The match found must not extend before that position.
2411 The value nil is equivalent to (point-min).
2412
2413 Optional third argument NOERROR, if t, means just return nil (no
2414 error) if the search fails.  If neither nil nor t, set point to LIMIT
2415 and return nil.
2416
2417 Optional fourth argument COUNT is a repeat count--search for
2418 successive occurrences.
2419
2420 Optional fifth argument BUFFER specifies the buffer to search in and
2421 defaults to the current buffer.
2422
2423 See also the functions `match-beginning', `match-end' and `replace-match'.
2424 */
2425       (regexp, limit, noerror, count, buffer))
2426 {
2427         return search_command(regexp, limit, noerror, count, buffer, -1, 1, 0);
2428 }
2429
2430 DEFUN("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ",    /*
2431 Search forward from point for regular expression REGEXP.
2432 Set point to the end of the occurrence found, and return point.
2433
2434 Optional second argument LIMIT bounds the search; it is a buffer
2435 position.  The match found must not extend after that position.  The
2436 value nil is equivalent to (point-max).
2437
2438 Optional third argument NOERROR, if t, means just return nil (no
2439 error) if the search fails.  If neither nil nor t, set point to LIMIT
2440 and return nil.
2441
2442 Optional fourth argument COUNT is a repeat count--search for
2443 successive occurrences.
2444
2445 Optional fifth argument BUFFER specifies the buffer to search in and
2446 defaults to the current buffer.
2447
2448 See also the functions `match-beginning', `match-end' and `replace-match'.
2449 */
2450       (regexp, limit, noerror, count, buffer))
2451 {
2452         return search_command(regexp, limit, noerror, count, buffer, 1, 1, 0);
2453 }
2454
2455 DEFUN("posix-search-backward", Fposix_search_backward, 1, 5, "sPosix search backward: ",        /*
2456 Search backward from point for match for regular expression REGEXP.
2457 Find the longest match in accord with Posix regular expression rules.
2458 Set point to the beginning of the match, and return point.
2459 The match found is the one starting last in the buffer
2460 and yet ending before the origin of the search.
2461
2462 Optional second argument LIMIT bounds the search; it is a buffer
2463 position.  The match found must not extend before that position.
2464 The value nil is equivalent to (point-min).
2465
2466 Optional third argument NOERROR, if t, means just return nil (no
2467 error) if the search fails.  If neither nil nor t, set point to LIMIT
2468 and return nil.
2469
2470 Optional fourth argument COUNT is a repeat count--search for
2471 successive occurrences.
2472
2473 Optional fifth argument BUFFER specifies the buffer to search in and
2474 defaults to the current buffer.
2475
2476 See also the functions `match-beginning', `match-end' and `replace-match'.
2477 */
2478       (regexp, limit, noerror, count, buffer))
2479 {
2480         return search_command(regexp, limit, noerror, count, buffer, -1, 1, 1);
2481 }
2482
2483 DEFUN("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ",   /*
2484 Search forward from point for regular expression REGEXP.
2485 Find the longest match in accord with Posix regular expression rules.
2486 Set point to the end of the occurrence found, and return point.
2487
2488 Optional second argument LIMIT bounds the search; it is a buffer
2489 position.  The match found must not extend after that position.  The
2490 value nil is equivalent to (point-max).
2491
2492 Optional third argument NOERROR, if t, means just return nil (no
2493 error) if the search fails.  If neither nil nor t, set point to LIMIT
2494 and return nil.
2495
2496 Optional fourth argument COUNT is a repeat count--search for
2497 successive occurrences.
2498
2499 Optional fifth argument BUFFER specifies the buffer to search in and
2500 defaults to the current buffer.
2501
2502 See also the functions `match-beginning', `match-end' and `replace-match'.
2503 */
2504       (regexp, limit, noerror, count, buffer))
2505 {
2506         return search_command(regexp, limit, noerror, count, buffer, 1, 1, 1);
2507 }
2508 \f
2509 static Lisp_Object free_created_dynarrs(Lisp_Object cons)
2510 {
2511         Dynarr_free(get_opaque_ptr(XCAR(cons)));
2512         Dynarr_free(get_opaque_ptr(XCDR(cons)));
2513         free_opaque_ptr(XCAR(cons));
2514         free_opaque_ptr(XCDR(cons));
2515         free_cons(XCONS(cons));
2516         return Qnil;
2517 }
2518
2519 DEFUN("replace-match", Freplace_match, 1, 5, 0, /*
2520 Replace text matched by last search with REPLACEMENT.
2521 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2522 Otherwise maybe capitalize the whole text, or maybe just word initials,
2523 based on the replaced text.
2524 If the replaced text has only capital letters
2525 and has at least one multiletter word, convert REPLACEMENT to all caps.
2526 If the replaced text has at least one word starting with a capital letter,
2527 then capitalize each word in REPLACEMENT.
2528
2529 If third arg LITERAL is non-nil, insert REPLACEMENT literally.
2530 Otherwise treat `\\' as special:
2531 `\\&' in REPLACEMENT means substitute original matched text.
2532 `\\N' means substitute what matched the Nth `\\(...\\)'.
2533 If Nth parens didn't match, substitute nothing.
2534 `\\\\' means insert one `\\'.
2535 `\\u' means upcase the next character.
2536 `\\l' means downcase the next character.
2537 `\\U' means begin upcasing all following characters.
2538 `\\L' means begin downcasing all following characters.
2539 `\\E' means terminate the effect of any `\\U' or `\\L'.
2540 Case changes made with `\\u', `\\l', `\\U', and `\\L' override
2541 all other case changes that may be made in the replaced text.
2542 FIXEDCASE and LITERAL are optional arguments.
2543 Leaves point at end of replacement text.
2544
2545 The optional fourth argument STRING can be a string to modify.
2546 In that case, this function creates and returns a new string
2547 which is made by replacing the part of STRING that was matched.
2548 When fourth argument is a string, fifth argument STRBUFFER specifies
2549 the buffer to be used for syntax-table and case-table lookup and
2550 defaults to the current buffer.  When fourth argument is not a string,
2551 the buffer that the match occurred in has automatically been remembered
2552 and you do not need to specify it.
2553
2554 When fourth argument is nil, STRBUFFER specifies a subexpression of
2555 the match.  It says to replace just that subexpression instead of the
2556 whole match.  This is useful only after a regular expression search or
2557 match since only regular expressions have distinguished subexpressions.
2558 */
2559       (replacement, fixedcase, literal, string, strbuffer))
2560 {
2561         /* This function has been Mule-ized. */
2562         /* This function can GC */
2563         enum { nochange, all_caps, cap_initial } case_action;
2564         Bufpos pos, last;
2565         int some_multiletter_word;
2566         int some_lowercase;
2567         int some_uppercase;
2568         int some_nonuppercase_initial;
2569         Emchar c, prevc;
2570         Charcount inslen;
2571         struct buffer *buf;
2572         Lisp_Char_Table *syntax_table;
2573         int mc_count;
2574         Lisp_Object buffer;
2575         int_dynarr *ul_action_dynarr = 0;
2576         int_dynarr *ul_pos_dynarr = 0;
2577         int sub = 0;
2578         int speccount;
2579
2580         CHECK_STRING(replacement);
2581
2582         if (!NILP(string)) {
2583                 CHECK_STRING(string);
2584                 if (!EQ(last_thing_searched, Qt))
2585                         error("last thing matched was not a string");
2586                 /* If the match data
2587                    were abstracted into a special "match data" type instead
2588                    of the typical half-assed "let the implementation be
2589                    visible" form it's in, we could extend it to include
2590                    the last string matched and the buffer used for that
2591                    matching.  But of course we can't change it as it is. */
2592                 buf = decode_buffer(strbuffer, 0);
2593                 XSETBUFFER(buffer, buf);
2594         } else {
2595                 if (!NILP(strbuffer)) {
2596                         CHECK_INT(strbuffer);
2597                         sub = XINT(strbuffer);
2598                         if (sub < 0 || sub >= (int)search_regs.num_regs)
2599                                 args_out_of_range(strbuffer,
2600                                                   make_int(search_regs.
2601                                                            num_regs));
2602                 }
2603                 if (!BUFFERP(last_thing_searched))
2604                         error("last thing matched was not a buffer");
2605                 buffer = last_thing_searched;
2606                 buf = XBUFFER(buffer);
2607         }
2608
2609         syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
2610
2611         case_action = nochange; /* We tried an initialization */
2612         /* but some C compilers blew it */
2613
2614         if (search_regs.num_regs == 0)
2615                 error("replace-match called before any match found");
2616
2617         if (NILP(string)) {
2618                 if (search_regs.start[sub] < BUF_BEGV(buf)
2619                     || search_regs.start[sub] > search_regs.end[sub]
2620                     || search_regs.end[sub] > BUF_ZV(buf))
2621                         args_out_of_range(make_int(search_regs.start[sub]),
2622                                           make_int(search_regs.end[sub]));
2623         } else {
2624                 if (search_regs.start[0] < 0
2625                     || search_regs.start[0] > search_regs.end[0]
2626                     || search_regs.end[0] > XSTRING_CHAR_LENGTH(string))
2627                         args_out_of_range(make_int(search_regs.start[0]),
2628                                           make_int(search_regs.end[0]));
2629         }
2630
2631         if (NILP(fixedcase)) {
2632                 /* Decide how to casify by examining the matched text. */
2633
2634                 last = search_regs.end[sub];
2635                 prevc = '\n';
2636                 case_action = all_caps;
2637
2638                 /* some_multiletter_word is set nonzero if any original word
2639                    is more than one letter long. */
2640                 some_multiletter_word = 0;
2641                 some_lowercase = 0;
2642                 some_nonuppercase_initial = 0;
2643                 some_uppercase = 0;
2644
2645                 for (pos = search_regs.start[sub]; pos < last; pos++) {
2646                         if (NILP(string))
2647                                 c = BUF_FETCH_CHAR(buf, pos);
2648                         else
2649                                 c = string_char(XSTRING(string), pos);
2650
2651                         if (LOWERCASEP(buf, c)) {
2652                                 /* Cannot be all caps if any original char is lower case */
2653
2654                                 some_lowercase = 1;
2655                                 if (!WORD_SYNTAX_P(syntax_table, prevc))
2656                                         some_nonuppercase_initial = 1;
2657                                 else
2658                                         some_multiletter_word = 1;
2659                         } else if (!NOCASEP(buf, c)) {
2660                                 some_uppercase = 1;
2661                                 if (!WORD_SYNTAX_P(syntax_table, prevc)) ;
2662                                 else
2663                                         some_multiletter_word = 1;
2664                         } else {
2665                                 /* If the initial is a caseless word constituent,
2666                                    treat that like a lowercase initial.  */
2667                                 if (!WORD_SYNTAX_P(syntax_table, prevc))
2668                                         some_nonuppercase_initial = 1;
2669                         }
2670
2671                         prevc = c;
2672                 }
2673
2674                 /* Convert to all caps if the old text is all caps
2675                    and has at least one multiletter word.  */
2676                 if (!some_lowercase && some_multiletter_word)
2677                         case_action = all_caps;
2678                 /* Capitalize each word, if the old text has all capitalized words.  */
2679                 else if (!some_nonuppercase_initial && some_multiletter_word)
2680                         case_action = cap_initial;
2681                 else if (!some_nonuppercase_initial && some_uppercase)
2682                         /* Should x -> yz, operating on X, give Yz or YZ?
2683                            We'll assume the latter.  */
2684                         case_action = all_caps;
2685                 else
2686                         case_action = nochange;
2687         }
2688
2689         /* Do replacement in a string.  */
2690         if (!NILP(string)) {
2691                 Lisp_Object before, after;
2692
2693                 speccount = specpdl_depth();
2694                 before =
2695                     Fsubstring(string, Qzero, make_int(search_regs.start[0]));
2696                 after = Fsubstring(string, make_int(search_regs.end[0]), Qnil);
2697
2698                 /* Do case substitution into REPLACEMENT if desired.  */
2699                 if (NILP(literal)) {
2700                         Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2701                         Charcount strpos;
2702                         /* XEmacs change: rewrote this loop somewhat to make it
2703                            cleaner.  Also added \U, \E, etc. */
2704                         Charcount literal_start = 0;
2705                         /* We build up the substituted string in ACCUM.  */
2706                         Lisp_Object accum;
2707
2708                         accum = Qnil;
2709
2710                         /* OK, the basic idea here is that we scan through the
2711                            replacement string until we find a backslash, which
2712                            represents a substring of the original string to be
2713                            substituted.  We then append onto ACCUM the literal
2714                            text before the backslash (LASTPOS marks the
2715                            beginning of this) followed by the substring of the
2716                            original string that needs to be inserted. */
2717                         for (strpos = 0; strpos < stlen; strpos++) {
2718                                 /* If LITERAL_END is set, we've encountered a backslash
2719                                    (the end of literal text to be inserted). */
2720                                 Charcount literal_end = -1;
2721                                 /* If SUBSTART is set, we need to also insert the
2722                                    text from SUBSTART to SUBEND in the original string. */
2723                                 Charcount substart = -1;
2724                                 Charcount subend = -1;
2725
2726                                 c = string_char(XSTRING(replacement), strpos);
2727                                 if (c == '\\' && strpos < stlen - 1) {
2728                                         c = string_char(XSTRING(replacement),
2729                                                         ++strpos);
2730                                         if (c == '&') {
2731                                                 literal_end = strpos - 1;
2732                                                 substart = search_regs.start[0];
2733                                                 subend = search_regs.end[0];
2734                                         } else if (c >= '1' && c <= '9' &&
2735                                                    c <=
2736                                                    search_regs.num_regs + '0') {
2737                                                 if (search_regs.
2738                                                     start[c - '0'] >= 0) {
2739                                                         literal_end =
2740                                                             strpos - 1;
2741                                                         substart =
2742                                                             search_regs.
2743                                                             start[c - '0'];
2744                                                         subend =
2745                                                             search_regs.end[c -
2746                                                                             '0'];
2747                                                 }
2748                                         } else if (c == 'U' || c == 'u'
2749                                                    || c == 'L' || c == 'l'
2750                                                    || c == 'E') {
2751                                                 /* Keep track of all case changes requested, but don't
2752                                                    make them now.  Do them later so we override
2753                                                    everything else. */
2754                                                 if (!ul_pos_dynarr) {
2755                                                         ul_pos_dynarr =
2756                                                             Dynarr_new(int);
2757                                                         ul_action_dynarr =
2758                                                             Dynarr_new(int);
2759                                                         record_unwind_protect
2760                                                             (free_created_dynarrs,
2761                                                              noseeum_cons
2762                                                              (make_opaque_ptr
2763                                                               (ul_pos_dynarr),
2764                                                               make_opaque_ptr
2765                                                               (ul_action_dynarr)));
2766                                                 }
2767                                                 literal_end = strpos - 1;
2768                                                 Dynarr_add(ul_pos_dynarr,
2769                                                            (!NILP(accum)
2770                                                             ?
2771                                                             XSTRING_CHAR_LENGTH
2772                                                             (accum)
2773                                                             : 0) +
2774                                                            (literal_end -
2775                                                             literal_start));
2776                                                 Dynarr_add(ul_action_dynarr, c);
2777                                         } else if (c == '\\')
2778                                                 /* So we get just one backslash. */
2779                                                 literal_end = strpos;
2780                                 }
2781                                 if (literal_end >= 0) {
2782                                         Lisp_Object literal_text = Qnil;
2783                                         Lisp_Object substring = Qnil;
2784                                         if (literal_end != literal_start)
2785                                                 literal_text =
2786                                                     Fsubstring(replacement,
2787                                                                make_int
2788                                                                (literal_start),
2789                                                                make_int
2790                                                                (literal_end));
2791                                         if (substart >= 0 && subend != substart)
2792                                                 substring = Fsubstring(string,
2793                                                                        make_int
2794                                                                        (substart),
2795                                                                        make_int
2796                                                                        (subend));
2797                                         if (!NILP(literal_text)
2798                                             || !NILP(substring))
2799                                                 accum =
2800                                                     concat3(accum, literal_text,
2801                                                             substring);
2802                                         literal_start = strpos + 1;
2803                                 }
2804                         }
2805
2806                         if (strpos != literal_start)
2807                                 /* some literal text at end to be inserted */
2808                                 replacement =
2809                                     concat2(accum,
2810                                             Fsubstring(replacement,
2811                                                        make_int(literal_start),
2812                                                        make_int(strpos)));
2813                         else
2814                                 replacement = accum;
2815                 }
2816
2817                 /* replacement can be nil. */
2818                 if (NILP(replacement))
2819                         replacement = build_string("");
2820
2821                 if (case_action == all_caps)
2822                         replacement = Fupcase(replacement, buffer);
2823                 else if (case_action == cap_initial)
2824                         replacement = Fupcase_initials(replacement, buffer);
2825
2826                 /* Now finally, we need to process the \U's, \E's, etc. */
2827                 if (ul_pos_dynarr) {
2828                         int i = 0;
2829                         int cur_action = 'E';
2830                         Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2831                         Charcount strpos;
2832
2833                         for (strpos = 0; strpos < stlen; strpos++) {
2834                                 Emchar curchar =
2835                                     string_char(XSTRING(replacement), strpos);
2836                                 Emchar newchar = -1;
2837                                 if (i < Dynarr_length(ul_pos_dynarr) &&
2838                                     strpos == Dynarr_at(ul_pos_dynarr, i)) {
2839                                         int new_action =
2840                                             Dynarr_at(ul_action_dynarr, i);
2841                                         i++;
2842                                         if (new_action == 'u')
2843                                                 newchar = UPCASE(buf, curchar);
2844                                         else if (new_action == 'l')
2845                                                 newchar =
2846                                                     DOWNCASE(buf, curchar);
2847                                         else
2848                                                 cur_action = new_action;
2849                                 }
2850                                 if (newchar == -1) {
2851                                         if (cur_action == 'U')
2852                                                 newchar = UPCASE(buf, curchar);
2853                                         else if (cur_action == 'L')
2854                                                 newchar =
2855                                                     DOWNCASE(buf, curchar);
2856                                         else
2857                                                 newchar = curchar;
2858                                 }
2859                                 if (newchar != curchar)
2860                                         set_string_char(XSTRING(replacement),
2861                                                         strpos, newchar);
2862                         }
2863                 }
2864
2865                 /* frees the Dynarrs if necessary. */
2866                 unbind_to(speccount, Qnil);
2867                 return concat3(before, replacement, after);
2868         }
2869
2870         mc_count = begin_multiple_change(buf, search_regs.start[sub],
2871                                          search_regs.end[sub]);
2872
2873         /* begin_multiple_change() records an unwind-protect, so we need to
2874            record this value now. */
2875         speccount = specpdl_depth();
2876
2877         /* We insert the replacement text before the old text, and then
2878            delete the original text.  This means that markers at the
2879            beginning or end of the original will float to the corresponding
2880            position in the replacement.  */
2881         BUF_SET_PT(buf, search_regs.start[sub]);
2882         if (!NILP(literal))
2883                 Finsert(1, &replacement);
2884         else {
2885                 Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2886                 Charcount strpos;
2887                 struct gcpro gcpro1;
2888                 GCPRO1(replacement);
2889                 for (strpos = 0; strpos < stlen; strpos++) {
2890                         /* on the first iteration assert(offset==0),
2891                            exactly complementing BUF_SET_PT() above.
2892                            During the loop, it keeps track of the amount inserted.
2893                          */
2894                         Charcount offset = BUF_PT(buf) - search_regs.start[sub];
2895
2896                         c = string_char(XSTRING(replacement), strpos);
2897                         if (c == '\\' && strpos < stlen - 1) {
2898                                 /* XXX FIXME: replacing just a substring non-literally
2899                                    using backslash refs to the match looks dangerous.  But
2900                                    <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe
2901                                    <duwe@caldera.de> claims Finsert_buffer_substring already
2902                                    handles this correctly.
2903                                  */
2904                                 c = string_char(XSTRING(replacement), ++strpos);
2905                                 if (c == '&')
2906                                         Finsert_buffer_substring
2907                                             (buffer,
2908                                              make_int(search_regs.start[0] +
2909                                                       offset),
2910                                              make_int(search_regs.end[0] +
2911                                                       offset));
2912                                 else if (c >= '1' && c <= '9'
2913                                          && c <= search_regs.num_regs + '0') {
2914                                         if (search_regs.start[c - '0'] >= 1)
2915                                                 Finsert_buffer_substring
2916                                                     (buffer,
2917                                                      make_int(search_regs.
2918                                                               start[c - '0'] +
2919                                                               offset),
2920                                                      make_int(search_regs.
2921                                                               end[c - '0'] +
2922                                                               offset));
2923                                 } else if (c == 'U' || c == 'u' || c == 'L'
2924                                            || c == 'l' || c == 'E') {
2925                                         /* Keep track of all case changes requested, but don't
2926                                            make them now.  Do them later so we override
2927                                            everything else. */
2928                                         if (!ul_pos_dynarr) {
2929                                                 ul_pos_dynarr = Dynarr_new(int);
2930                                                 ul_action_dynarr =
2931                                                     Dynarr_new(int);
2932                                                 record_unwind_protect
2933                                                     (free_created_dynarrs,
2934                                                      Fcons(make_opaque_ptr
2935                                                            (ul_pos_dynarr),
2936                                                            make_opaque_ptr
2937                                                            (ul_action_dynarr)));
2938                                         }
2939                                         Dynarr_add(ul_pos_dynarr, BUF_PT(buf));
2940                                         Dynarr_add(ul_action_dynarr, c);
2941                                 } else
2942                                         buffer_insert_emacs_char(buf, c);
2943                         } else
2944                                 buffer_insert_emacs_char(buf, c);
2945                 }
2946                 UNGCPRO;
2947         }
2948
2949         inslen = BUF_PT(buf) - (search_regs.start[sub]);
2950         buffer_delete_range(buf, search_regs.start[sub] + inslen,
2951                             search_regs.end[sub] + inslen, 0);
2952
2953         if (case_action == all_caps)
2954                 Fupcase_region(make_int(BUF_PT(buf) - inslen),
2955                                make_int(BUF_PT(buf)), buffer);
2956         else if (case_action == cap_initial)
2957                 Fupcase_initials_region(make_int(BUF_PT(buf) - inslen),
2958                                         make_int(BUF_PT(buf)), buffer);
2959
2960         /* Now go through and make all the case changes that were requested
2961            in the replacement string. */
2962         if (ul_pos_dynarr) {
2963                 Bufpos eend = BUF_PT(buf);
2964                 int i = 0;
2965                 int cur_action = 'E';
2966
2967                 for (pos = BUF_PT(buf) - inslen; pos < eend; pos++) {
2968                         Emchar curchar = BUF_FETCH_CHAR(buf, pos);
2969                         Emchar newchar = -1;
2970                         if (i < Dynarr_length(ul_pos_dynarr) &&
2971                             pos == Dynarr_at(ul_pos_dynarr, i)) {
2972                                 int new_action = Dynarr_at(ul_action_dynarr, i);
2973                                 i++;
2974                                 if (new_action == 'u')
2975                                         newchar = UPCASE(buf, curchar);
2976                                 else if (new_action == 'l')
2977                                         newchar = DOWNCASE(buf, curchar);
2978                                 else
2979                                         cur_action = new_action;
2980                         }
2981                         if (newchar == -1) {
2982                                 if (cur_action == 'U')
2983                                         newchar = UPCASE(buf, curchar);
2984                                 else if (cur_action == 'L')
2985                                         newchar = DOWNCASE(buf, curchar);
2986                                 else
2987                                         newchar = curchar;
2988                         }
2989                         if (newchar != curchar)
2990                                 buffer_replace_char(buf, pos, newchar, 0, 0);
2991                 }
2992         }
2993
2994         /* frees the Dynarrs if necessary. */
2995         unbind_to(speccount, Qnil);
2996         end_multiple_change(buf, mc_count);
2997
2998         return Qnil;
2999 }
3000 \f
3001 static Lisp_Object match_limit(Lisp_Object num, int beginningp)
3002 {
3003         /* This function has been Mule-ized. */
3004         int n;
3005
3006         CHECK_INT(num);
3007         n = XINT(num);
3008         if (n < 0 || search_regs.num_regs <= 0)
3009                 args_out_of_range(num, make_int(search_regs.num_regs));
3010         if (n >= search_regs.num_regs || search_regs.start[n] < 0)
3011                 return Qnil;
3012         return make_int(beginningp ? search_regs.start[n] : search_regs.end[n]);
3013 }
3014
3015 DEFUN("match-beginning", Fmatch_beginning, 1, 1, 0,     /*
3016 Return position of start of text matched by last regexp search.
3017 NUM, specifies which parenthesized expression in the last regexp.
3018 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
3019 Zero means the entire text matched by the whole regexp or whole string.
3020 */
3021       (num))
3022 {
3023         return match_limit(num, 1);
3024 }
3025
3026 DEFUN("match-end", Fmatch_end, 1, 1, 0, /*
3027 Return position of end of text matched by last regexp search.
3028 NUM specifies which parenthesized expression in the last regexp.
3029 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
3030 Zero means the entire text matched by the whole regexp or whole string.
3031 */
3032       (num))
3033 {
3034         return match_limit(num, 0);
3035 }
3036
3037 DEFUN("match-data", Fmatch_data, 0, 2, 0,       /*
3038 Return a list containing all info on what the last regexp search matched.
3039 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
3040 All the elements are markers or nil (nil if the Nth pair didn't match)
3041 if the last match was on a buffer; integers or nil if a string was matched.
3042 Use `store-match-data' to reinstate the data in this list.
3043
3044 If INTEGERS (the optional first argument) is non-nil, always use integers
3045 \(rather than markers) to represent buffer positions.
3046 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough
3047 to hold all the values, and if INTEGERS is non-nil, no consing is done.
3048 */
3049       (integers, reuse))
3050 {
3051         /* This function has been Mule-ized. */
3052         Lisp_Object tail, prev;
3053         Lisp_Object *data;
3054         int i;
3055         Charcount len;
3056
3057         if (NILP(last_thing_searched))
3058                 /*error ("match-data called before any match found"); */
3059                 return Qnil;
3060
3061         data = alloca_array(Lisp_Object, 2 * search_regs.num_regs);
3062
3063         len = -1;
3064         for (i = 0; i < search_regs.num_regs; i++) {
3065                 Bufpos start = search_regs.start[i];
3066                 if (start >= 0) {
3067                         if (EQ(last_thing_searched, Qt)
3068                             || !NILP(integers)) {
3069                                 data[2 * i] = make_int(start);
3070                                 data[2 * i + 1] = make_int(search_regs.end[i]);
3071                         } else if (BUFFERP(last_thing_searched)) {
3072                                 data[2 * i] = Fmake_marker();
3073                                 Fset_marker(data[2 * i],
3074                                             make_int(start),
3075                                             last_thing_searched);
3076                                 data[2 * i + 1] = Fmake_marker();
3077                                 Fset_marker(data[2 * i + 1],
3078                                             make_int(search_regs.end[i]),
3079                                             last_thing_searched);
3080                         } else
3081                                 /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
3082                                 abort();
3083
3084                         len = i;
3085                 } else
3086                         data[2 * i] = data[2 * i + 1] = Qnil;
3087         }
3088         if (!CONSP(reuse))
3089                 return Flist(2 * len + 2, data);
3090
3091         /* If REUSE is a list, store as many value elements as will fit
3092            into the elements of REUSE.  */
3093         for (prev = Qnil, i = 0, tail = reuse; CONSP(tail);
3094              i++, tail = XCDR(tail)) {
3095                 if (i < 2 * len + 2)
3096                         XCAR(tail) = data[i];
3097                 else
3098                         XCAR(tail) = Qnil;
3099                 prev = tail;
3100         }
3101
3102         /* If we couldn't fit all value elements into REUSE,
3103            cons up the rest of them and add them to the end of REUSE.  */
3104         if (i < 2 * len + 2)
3105                 XCDR(prev) = Flist(2 * len + 2 - i, data + i);
3106
3107         return reuse;
3108 }
3109
3110 DEFUN("store-match-data", Fstore_match_data, 1, 1, 0,   /*
3111 Set internal data on last search match from elements of LIST.
3112 LIST should have been created by calling `match-data' previously.
3113 */
3114       (list))
3115 {
3116         /* This function has been Mule-ized. */
3117         REGISTER int i;
3118         REGISTER Lisp_Object marker;
3119         int num_regs;
3120         int length;
3121
3122         if (running_asynch_code)
3123                 save_search_regs();
3124
3125         CONCHECK_LIST(list);
3126
3127         /* Unless we find a marker with a buffer in LIST, assume that this
3128            match data came from a string.  */
3129         last_thing_searched = Qt;
3130
3131         /* Allocate registers if they don't already exist.  */
3132         length = XINT(Flength(list)) / 2;
3133         num_regs = search_regs.num_regs;
3134
3135         if (length > num_regs) {
3136                 if (search_regs.num_regs == 0) {
3137                         search_regs.start = xnew_atomic_array(regoff_t, length);
3138                         search_regs.end = xnew_atomic_array(regoff_t, length);
3139                 } else {
3140                         XREALLOC_ARRAY(search_regs.start, regoff_t, length);
3141                         XREALLOC_ARRAY(search_regs.end, regoff_t, length);
3142                 }
3143
3144                 search_regs.num_regs = length;
3145         }
3146
3147         for (i = 0; i < num_regs; i++) {
3148                 marker = Fcar(list);
3149                 if (NILP(marker)) {
3150                         search_regs.start[i] = -1;
3151                         list = Fcdr(list);
3152                 } else {
3153                         if (MARKERP(marker)) {
3154                                 if (XMARKER(marker)->buffer == 0)
3155                                         marker = Qzero;
3156                                 else
3157                                         XSETBUFFER(last_thing_searched,
3158                                                    XMARKER(marker)->buffer);
3159                         }
3160
3161                         CHECK_INT_COERCE_MARKER(marker);
3162                         search_regs.start[i] = XINT(marker);
3163                         list = Fcdr(list);
3164
3165                         marker = Fcar(list);
3166                         if (MARKERP(marker) && XMARKER(marker)->buffer == 0)
3167                                 marker = Qzero;
3168
3169                         CHECK_INT_COERCE_MARKER(marker);
3170                         search_regs.end[i] = XINT(marker);
3171                 }
3172                 list = Fcdr(list);
3173         }
3174
3175         return Qnil;
3176 }
3177
3178 /* If non-zero the match data have been saved in saved_search_regs
3179    during the execution of a sentinel or filter. */
3180 static int search_regs_saved;
3181 static struct re_registers saved_search_regs;
3182
3183 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3184    if asynchronous code (filter or sentinel) is running. */
3185 static void save_search_regs(void)
3186 {
3187         if (!search_regs_saved) {
3188                 saved_search_regs.num_regs = search_regs.num_regs;
3189                 saved_search_regs.start = search_regs.start;
3190                 saved_search_regs.end = search_regs.end;
3191                 search_regs.num_regs = 0;
3192                 search_regs.start = 0;
3193                 search_regs.end = 0;
3194
3195                 search_regs_saved = 1;
3196         }
3197 }
3198
3199 /* Called upon exit from filters and sentinels. */
3200 void restore_match_data(void)
3201 {
3202         if (search_regs_saved) {
3203                 if (search_regs.num_regs > 0) {
3204                         xfree(search_regs.start);
3205                         xfree(search_regs.end);
3206                 }
3207                 search_regs.num_regs = saved_search_regs.num_regs;
3208                 search_regs.start = saved_search_regs.start;
3209                 search_regs.end = saved_search_regs.end;
3210
3211                 search_regs_saved = 0;
3212         }
3213 }
3214
3215 /* Quote a string to inactivate reg-expr chars */
3216
3217 DEFUN("regexp-quote", Fregexp_quote, 1, 1, 0,   /*
3218 Return a regexp string which matches exactly STRING and nothing else.
3219 */
3220       (string))
3221 {
3222         REGISTER Bufbyte *in, *out, *end;
3223         REGISTER Bufbyte *temp;
3224
3225         CHECK_STRING(string);
3226
3227         temp = (Bufbyte *) alloca(XSTRING_LENGTH(string) * 2);
3228
3229         /* Now copy the data into the new string, inserting escapes. */
3230
3231         in = XSTRING_DATA(string);
3232         end = in + XSTRING_LENGTH(string);
3233         out = temp;
3234
3235         while (in < end) {
3236                 Emchar c = charptr_emchar(in);
3237
3238                 if (c == '[' || c == ']'
3239                     || c == '*' || c == '.' || c == '\\'
3240                     || c == '?' || c == '+' || c == '^' || c == '$')
3241                         *out++ = '\\';
3242                 out += set_charptr_emchar(out, c);
3243                 INC_CHARPTR(in);
3244         }
3245
3246         return make_string(temp, out - temp);
3247 }
3248
3249 DEFUN("set-word-regexp", Fset_word_regexp, 1, 1, 0,     /*
3250 Set the regexp to be used to match a word in regular-expression searching.
3251 #### Not yet implemented.  Currently does nothing.
3252 #### Do not use this yet.  Its calling interface is likely to change.
3253 */
3254       (regexp))
3255 {
3256         return Qnil;
3257 }
3258 \f
3259 /************************************************************************/
3260 /*                            initialization                            */
3261 /************************************************************************/
3262
3263 void syms_of_search(void)
3264 {
3265
3266         DEFERROR_STANDARD(Qsearch_failed, Qinvalid_operation);
3267         DEFERROR_STANDARD(Qinvalid_regexp, Qsyntax_error);
3268
3269 #ifdef EF_USE_COMPRE
3270         defsymbol(&Qcompre, "compre");
3271         defsymbol(&Qcomprep, "comprep");
3272
3273         DEFSUBR(Fcompile_regexp);
3274         DEFSUBR(Fdefregexp);
3275 #endif
3276
3277         DEFSUBR(Flooking_at);
3278         DEFSUBR(Fposix_looking_at);
3279         DEFSUBR(Fstring_match);
3280         DEFSUBR(Fposix_string_match);
3281         DEFSUBR(Fskip_chars_forward);
3282         DEFSUBR(Fskip_chars_backward);
3283         DEFSUBR(Fskip_syntax_forward);
3284         DEFSUBR(Fskip_syntax_backward);
3285         DEFSUBR(Fsearch_forward);
3286         DEFSUBR(Fsearch_backward);
3287         DEFSUBR(Fword_search_forward);
3288         DEFSUBR(Fword_search_backward);
3289         DEFSUBR(Fre_search_forward);
3290         DEFSUBR(Fre_search_backward);
3291         DEFSUBR(Fposix_search_forward);
3292         DEFSUBR(Fposix_search_backward);
3293         DEFSUBR(Freplace_match);
3294         DEFSUBR(Fmatch_beginning);
3295         DEFSUBR(Fmatch_end);
3296         DEFSUBR(Fmatch_data);
3297         DEFSUBR(Fstore_match_data);
3298         DEFSUBR(Fregexp_quote);
3299         DEFSUBR(Fset_word_regexp);
3300 }
3301
3302 void reinit_vars_of_search(void)
3303 {
3304         int i;
3305
3306         last_thing_searched = Qnil;
3307         staticpro_nodump(&last_thing_searched);
3308
3309         for (i = 0; i < REGEXP_CACHE_SIZE; ++i) {
3310                 searchbufs[i].buf.allocated = 100;
3311                 searchbufs[i].buf.buffer = (unsigned char *)xmalloc_atomic(100);
3312                 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3313                 searchbufs[i].regexp = Qnil;
3314                 staticpro_nodump(&searchbufs[i].regexp);
3315                 searchbufs[i].next =
3316                         (i == REGEXP_CACHE_SIZE - 1 ? 0 : &searchbufs[i + 1]);
3317         }
3318         searchbuf_head = &searchbufs[0];
3319 }
3320
3321 void vars_of_search(void)
3322 {
3323         reinit_vars_of_search();
3324
3325         DEFVAR_LISP("forward-word-regexp", &Vforward_word_regexp        /*
3326 *Regular expression to be used in `forward-word'.
3327 #### Not yet implemented.
3328                                                                          */ );
3329         Vforward_word_regexp = Qnil;
3330
3331         DEFVAR_LISP("backward-word-regexp", &Vbackward_word_regexp      /*
3332 *Regular expression to be used in `backward-word'.
3333 #### Not yet implemented.
3334                                                                          */ );
3335         Vbackward_word_regexp = Qnil;
3336 }
3337
3338 void complex_vars_of_search(void)
3339 {
3340         Vskip_chars_range_table = Fmake_range_table();
3341         staticpro(&Vskip_chars_range_table);
3342 }