Wand updates from Evgeny
[sxemacs] / src / search.c
1 /* String search routines for SXEmacs.
2    Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
3    Copyright (C) 1995 Sun Microsystems, Inc.
4
5 This file is part of SXEmacs
6
7 SXEmacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 SXEmacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program.  If not, see <http://www.gnu.org/licenses/>. */
19
20
21 /* Synched up with: FSF 19.29, except for region-cache stuff. */
22
23 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */
24
25 /* This file has been Mule-ized except for the TRT stuff. */
26
27 #include <config.h>
28 #include "lisp.h"
29
30 #include "buffer.h"
31 #include "ui/insdel.h"
32 #include "opaque.h"
33 #ifdef REGION_CACHE_NEEDS_WORK
34 #include "region-cache.h"
35 #endif
36 #include "syntax.h"
37
38 #include <sys/types.h>
39 #include "regex.h"
40 #include "casetab.h"
41 #include "chartab.h"
42
43 #include "dynacat.h"
44
45 #define TRANSLATE(table, pos)   \
46  (!NILP (table) ? TRT_TABLE_OF (table, (Emchar) pos) : pos)
47 \f
48 #include "elhash.h"
49 #define REGEXP_CACHE_SIZE 0x80
50 #define REGEXP_CACHE_HASH_MASK (REGEXP_CACHE_SIZE-1)
51 #define REGEXP_FASTMAP_SIZE 0400
52
53 #define __REGEXP_DEBUG__(args...)       fprintf(stderr, "REGEXP " args)
54 #ifndef REGEXP_DEBUG_FLAG
55 #define REGEXP_DEBUG(args...)
56 #else
57 #define REGEXP_DEBUG(args...)           __REGEXP_DEBUG__(args)
58 #endif
59 #define REGEXP_DEBUG_COMPRE(args...)    REGEXP_DEBUG("[compre]: " args)
60 #define REGEXP_DEBUG_COMPRE_C(args...)  REGEXP_DEBUG("[compre/cache]: " args)
61 #define REGEXP_DEBUG_COMPRE_H(args...)  REGEXP_DEBUG("[compre/hash]: " args)
62 #define REGEXP_CRITICAL(args...)        __REGEXP_DEBUG__("CRITICAL: " args)
63
64
65 /* If the regexp is non-nil, then the buffer contains the compiled form
66    of that regexp, suitable for searching.  */
67 struct regexp_cache {
68         struct regexp_cache *next;
69         Lisp_Object regexp;
70         struct re_pattern_buffer buf;
71         char fastmap[REGEXP_FASTMAP_SIZE];
72         /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
73         char posix;
74 };
75
76 /* The instances of that struct.  */
77 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
78
79 /* The head of the linked list; points to the most recently used buffer.  */
80 static struct regexp_cache *searchbuf_head;
81
82 /* Every call to re_match, etc., must pass &search_regs as the regs
83    argument unless you can show it is unnecessary (i.e., if re_match
84    is certainly going to be called again before region-around-match
85    can be called).
86
87    Since the registers are now dynamically allocated, we need to make
88    sure not to refer to the Nth register before checking that it has
89    been allocated by checking search_regs.num_regs.
90
91    The regex code keeps track of whether it has allocated the search
92    buffer using bits in the re_pattern_buffer.  This means that whenever
93    you compile a new pattern, it completely forgets whether it has
94    allocated any registers, and will allocate new registers the next
95    time you call a searching or matching function.  Therefore, we need
96    to call re_set_registers after compiling a new pattern or after
97    setting the match registers, so that the regex functions will be
98    able to free or re-allocate it properly.  */
99
100 /* Note: things get trickier under Mule because the values returned from
101    the regexp routines are in Bytinds but we need them to be in Bufpos's.
102    We take the easy way out for the moment and just convert them immediately.
103    We could be more clever by not converting them until necessary, but
104    that gets real ugly real fast since the buffer might have changed and
105    the positions might be out of sync or out of range.
106    */
107 static struct re_registers search_regs;
108
109 /* The buffer in which the last search was performed, or
110    Qt if the last search was done in a string;
111    Qnil if no searching has been done yet.  */
112 static Lisp_Object last_thing_searched;
113
114 /* error condition signalled when regexp compile_pattern fails */
115
116 Lisp_Object Qinvalid_regexp;
117
118 /* Regular expressions used in forward/backward-word */
119 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
120
121 /* range table for use with skip_chars.  Only needed for Mule. */
122 Lisp_Object Vskip_chars_range_table;
123
124 static void set_search_regs(struct buffer *buf, Bufpos beg, Charcount len);
125 static void clear_unused_search_regs(struct re_registers *regp, int no_sub);
126 static void save_search_regs(void);
127 static Bufpos simple_search(struct buffer *buf, Bufbyte * base_pat,
128                             Bytecount len, Bytind pos, Bytind lim,
129                             EMACS_INT n, Lisp_Object trt);
130 static Bufpos boyer_moore(struct buffer *buf, Bufbyte * base_pat,
131                           Bytecount len, Bytind pos, Bytind lim,
132                           EMACS_INT n, Lisp_Object trt,
133                           Lisp_Object inverse_trt, int charset_base);
134 static Bufpos search_buffer(struct buffer *buf, Lisp_Object str,
135                             Bufpos bufpos, Bufpos buflim, EMACS_INT n, int RE,
136                             Lisp_Object trt, Lisp_Object inverse_trt,
137                             int posix);
138
139 static void matcher_overflow(void)
140 {
141         error("Stack overflow in regexp matcher");
142 }
143
144 #ifdef EF_USE_COMPRE
145 EXFUN(Fget, 3);
146 EXFUN(Fput, 3);
147 EXFUN(Fremprop, 2);
148 #define COMPRE_T                struct re_pattern_buffer
149 #define COMPREP(obj)                                    \
150         (DYNACATP(obj) && EQ(XDYNACAT_TYPE(obj), Qcompre))
151 #define COMPRE_GET(obj) (COMPRE_T*)XDYNACAT(obj)->ptr
152 #define COMPRE_PUT(obj, rec)    do {                            \
153                 obj = make_dynacat((void*)rec);                 \
154                 set_dynacat_printer(obj, compre_prfun);         \
155                 set_dynacat_finaliser(obj, compre_finfun);      \
156                 XDYNACAT_TYPE(obj) = Qcompre;                   \
157         } while (0)
158 #define XCOMPRE_GET(obj)        XSTRING(obj)->compre
159 #define XCOMPRE_PUT(obj, b)     XSTRING(obj)->compre = b
160 #define XCOMPRE_REM(obj)        XSTRING(obj)->compre = Qnil
161
162 /* idenitifier for hard-cached rexp */
163 Lisp_Object Qcompre, Qcomprep;
164 static Lisp_Object cache_regexp(Lisp_Object, struct re_pattern_buffer*);
165 static COMPRE_T* make_compre(void)
166 #if defined(__GNUC__)
167         __attribute__((unused))
168 #endif
169         ;
170 static COMPRE_T* clone_compre(COMPRE_T*);
171 static inline void free_compre(COMPRE_T*);
172 #endif
173
174 /* Compile a regexp and signal a Lisp error if anything goes wrong.
175    PATTERN is the pattern to compile.
176    CP is the place to put the result.
177    TRANSLATE is a translation table for ignoring case, or NULL for none.
178    REGP is the structure that says where to store the "register"
179    values that will result from matching this pattern.
180    If it is 0, we should compile the pattern not to record any
181    subexpression bounds.
182    POSIX is nonzero if we want full backtracking (POSIX style)
183    for this pattern.  0 means backtrack only enough to get a valid match.  */
184
185 static int
186 compile_pattern_1(struct regexp_cache *cp, Lisp_Object pattern,
187                   Lisp_Object translate, struct re_registers *regp, int posix,
188                   Error_behavior errb)
189 {
190         const char *val;
191         reg_syntax_t old;
192
193         cp->regexp = Qnil;
194         cp->buf.translate = translate;
195         cp->posix = posix;
196         old = re_set_syntax(RE_SYNTAX_EMACS
197                             | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
198         val = (const char *)
199             re_compile_pattern((char *)XSTRING_DATA(pattern),
200                                XSTRING_LENGTH(pattern), &cp->buf);
201         re_set_syntax(old);
202         if (val) {
203                 maybe_signal_error(Qinvalid_regexp, list1(build_string(val)),
204                                    Qsearch, errb);
205                 return 0;
206         }
207
208         cp->regexp = Fcopy_sequence(pattern);
209         return 1;
210 }
211
212 #ifdef EF_USE_COMPRE
213 /* extremely simple hash table, no collision management is performed */
214 static struct re_pattern_buffer *
215 try_pattern_from_cache(Lisp_Object pattern,
216                        struct re_registers *regp,
217                        Lisp_Object translate,
218                        int posix,
219                        Error_behavior errb)
220 {
221         struct regexp_cache *cp;
222         hcode_t pathash;
223         int place;
224
225         pathash = internal_hash(pattern, 0);
226         place = (int)((pathash>>4) & REGEXP_CACHE_HASH_MASK);
227         cp = &(searchbufs[place]);
228         REGEXP_DEBUG_COMPRE_H("trying hash place: %d.\n", place);
229
230         if (!NILP(Fstring_equal(cp->regexp, pattern))
231             && EQ(cp->buf.translate, translate) && cp->posix == posix) {
232                 /* found the pig */
233                 REGEXP_DEBUG_COMPRE_H("match: %d.\n", place);
234                 ;
235         } else {
236                 /* collision or empty slot, dont care just put the
237                  * new regexp there */
238                 if (!compile_pattern_1(
239                             cp, pattern, translate, regp, posix, errb))
240                         return 0;
241                 REGEXP_DEBUG_COMPRE_H("collision: %d.\n", place);
242         }
243
244         return &cp->buf;
245 }
246 #else  /* !EF_USE_COMPRE */
247 static struct re_pattern_buffer *
248 try_pattern_from_cache(Lisp_Object pattern,
249                        struct re_registers *regp,
250                        Lisp_Object translate,
251                        int posix,
252                        Error_behavior errb)
253 {
254         struct regexp_cache *cp, **cpp;
255
256         for (cpp = &searchbuf_head;; cpp = &cp->next) {
257                 cp = *cpp;
258                 if (!NILP(Fstring_equal(cp->regexp, pattern))
259                     && EQ(cp->buf.translate, translate)
260                     && cp->posix == posix)
261                         break;
262
263                 /* If we're at the end of the cache, compile
264                  * into the last cell.  */
265                 if (cp->next == 0) {
266                         if (!compile_pattern_1(
267                                     cp, pattern, translate, regp, posix, errb))
268                                 return 0;
269                         break;
270                 }
271         }
272
273         /* When we get here, cp (aka *cpp) contains the compiled pattern,
274            either because we found it in the cache or because we just compiled it.
275            Move it to the front of the queue to mark it as most recently used.  */
276         *cpp = cp->next;
277         cp->next = searchbuf_head;
278         searchbuf_head = cp;
279
280         return &cp->buf;
281 }
282 #endif  /* EF_USE_COMPRE */
283
284 /* Compile a regexp if necessary, but first check to see if there's one in
285    the cache.
286    PATTERN is the pattern to compile.
287    TRANSLATE is a translation table for ignoring case, or NULL for none.
288    REGP is the structure that says where to store the "register"
289    values that will result from matching this pattern.
290    If it is 0, we should compile the pattern not to record any
291    subexpression bounds.
292    POSIX is nonzero if we want full backtracking (POSIX style)
293    for this pattern.  0 means backtrack only enough to get a valid match.  */
294
295 struct re_pattern_buffer *
296 compile_pattern(Lisp_Object pattern,
297                 struct re_registers *regp,
298                 Lisp_Object translate,
299                 int posix,
300                 Error_behavior errb)
301 {
302         struct re_pattern_buffer *result = NULL;
303
304 #ifdef EF_USE_COMPRE
305         Lisp_Object rc = XCOMPRE_GET(pattern);
306
307         if (!NILP(rc) && COMPREP(rc) &&
308             (result = COMPRE_GET(rc)) &&
309             result->re_ngroups >= 0 && result->re_ngroups < 256) {
310                 REGEXP_DEBUG_COMPRE_C("using cache: 0x%x.\n",
311                                       (unsigned int)result);
312                 ;
313         } else {
314 #endif
315                 result = try_pattern_from_cache(
316                         pattern, regp, translate, posix, errb);
317 #ifdef EF_USE_COMPRE
318                 cache_regexp(pattern, result);
319         }
320 #endif
321
322
323         /* Advise the searching functions about the space we have allocated
324            for register data.  */
325         if (regp)
326                 re_set_registers(result, regp, regp->num_regs,
327                                  regp->start, regp->end);
328
329         return result;
330 }
331
332 #ifdef EF_USE_COMPRE
333 static void
334 compre_prfun(Lisp_Object obj, Lisp_Object pcfun, int escflag)
335 {
336         write_fmt_str(pcfun, "#<compiled regexp %lx", 
337                       (long unsigned int)((COMPRE_GET(obj))->buffer));
338         if (escflag);
339 }
340
341 static void
342 compre_finfun(Lisp_Object obj, int unused)
343 {
344         REGEXP_DEBUG_COMPRE_C("0x%lx@0x%lx will pass away\n",
345                               (long unsigned int)COMPRE_GET(obj),
346                               (long unsigned int)obj);
347         free_compre(COMPRE_GET(obj));
348 }
349
350 #if 1
351 static COMPRE_T *
352 make_compre(void)
353 {
354         COMPRE_T *result = xnew_and_zero(COMPRE_T);
355
356         result->fastmap = xmalloc_atomic(REGEXP_FASTMAP_SIZE);
357
358         return result;
359 }
360 #endif  /* 0 */
361
362 static COMPRE_T *
363 clone_compre(COMPRE_T *src)
364 {
365         COMPRE_T *result = xnew_and_zero(COMPRE_T);
366
367         /* alloc and clone fastmap */
368         result->fastmap = (char*)xmalloc_atomic(REGEXP_FASTMAP_SIZE);
369         memcpy(result->fastmap, src->fastmap, REGEXP_FASTMAP_SIZE);
370
371         /* alloc and clone buffer */
372         result->buffer = (unsigned char *)xmalloc_atomic(src->allocated);
373         memcpy(result->buffer, src->buffer, src->allocated);
374         result->allocated = src->allocated;
375         result->used = src->used;
376
377         result->syntax = src->syntax;
378         result->translate = src->translate;
379         result->re_nsub = src->re_nsub;
380         result->re_ngroups = src->re_ngroups;
381
382         result->can_be_null = src->can_be_null;
383         result->regs_allocated = src->regs_allocated;
384         result->fastmap_accurate = src->fastmap_accurate;
385
386         result->no_sub = src->no_sub;
387         result->not_bol = src->not_bol;
388         result->not_eol = src->not_eol;
389         result->newline_anchor = src->newline_anchor;
390
391         /* alloc and clone ext_to_int_register */
392         result->external_to_internal_register =
393                 (int*)xmalloc_atomic(
394                         sizeof(int) * src->external_to_internal_register_size);
395         memcpy(result->external_to_internal_register,
396                src->external_to_internal_register,
397                sizeof(int)*src->external_to_internal_register_size);
398         result->external_to_internal_register_size =
399                 src->external_to_internal_register_size;
400
401         return result;
402 }
403
404 static inline void
405 free_compre(COMPRE_T *buf)
406 {
407         if (buf->buffer) {
408                 xfree(buf->buffer);
409                 buf->buffer = NULL;
410         }
411         if (buf->fastmap) {
412                 xfree(buf->fastmap);
413                 buf->fastmap = NULL;
414         }
415         if (buf->external_to_internal_register) {
416                 xfree(buf->external_to_internal_register);
417                 buf->external_to_internal_register = NULL;
418         }
419         xfree(buf);
420 }
421
422 static Lisp_Object
423 cache_regexp(Lisp_Object regexp, COMPRE_T *buf)
424 {
425         Lisp_Object rc;
426         COMPRE_T *resbuf;
427
428         if (buf == NULL)
429                 return Qnil;
430
431         resbuf = clone_compre(buf);
432         COMPRE_PUT(rc, resbuf);
433         XCOMPRE_PUT(regexp, rc);
434
435         REGEXP_DEBUG_COMPRE_C("caching 0x%08x into 0x%08x\n",
436                               (unsigned int)resbuf, (unsigned int)rc);
437         return rc;
438 }
439
440 DEFUN("compile-regexp", Fcompile_regexp, 1, 1, 0, /*
441 Forcibly compile REGEXP and store the result in object-plist.
442 */
443       (regexp))
444 {
445         CHECK_STRING(regexp);
446
447         XCOMPRE_REM(regexp);
448         compile_pattern(regexp, &search_regs, Qnil, 0, ERROR_ME);
449
450         return regexp;
451 }
452
453 DEFUN("defregexp", Fdefregexp, 2, UNEVALLED, 0, /*
454 \(defregexp SYMBOL REGEXP DOCSTRING\)
455 Like `defconst' but for forcing compiled regexps.
456
457 The same restrictions that apply to `defconst' apply here in regard
458 to user variables.  You shouldn't use this for regular expressions
459 that a user might want to customise.  Instead, use `defcustom' with
460 :type 'regexp.
461 */
462       (args))
463 {
464         /* This function can GC */
465         Lisp_Object sym = XCAR(args);
466         Lisp_Object pat = Feval(XCAR(args = XCDR(args)));
467         struct gcpro gcpro1;
468
469         CHECK_STRING(pat);
470
471         GCPRO1(pat);
472         pat = Fcompile_regexp(pat);
473         Fset_default(sym, pat);
474         UNGCPRO;
475
476         if (!NILP(args = XCDR(args))) {
477                 Lisp_Object doc = XCAR(args);
478                 Fput(sym, Qvariable_documentation, doc);
479                 if (!NILP(args = XCDR(args)))
480                         error("too many arguments");
481         }
482 #ifdef I18N3
483         if (!NILP(Vfile_domain))
484                 Fput(sym, Qvariable_domain, Vfile_domain);
485 #endif  /* I18N3 */
486
487         LOADHIST_ATTACH(sym);
488         return sym;
489 }
490 #endif  /* EF_USE_COMPRE */
491
492 /* Error condition used for failing searches */
493 Lisp_Object Qsearch_failed;
494
495 static Lisp_Object signal_failure(Lisp_Object arg)
496 {
497         for (;;)
498                 Fsignal(Qsearch_failed, list1(arg));
499         return Qnil;            /* Not reached. */
500 }
501
502 /* Convert the search registers from Bytinds to Bufpos's.  Needs to be
503    done after each regexp match that uses the search regs.
504
505    We could get a potential speedup by not converting the search registers
506    until it's really necessary, e.g. when match-data or replace-match is
507    called.  However, this complexifies the code a lot (e.g. the buffer
508    could have changed and the Bytinds stored might be invalid) and is
509    probably not a great time-saver. */
510
511 static void fixup_search_regs_for_buffer(struct buffer *buf)
512 {
513         int i;
514         int num_regs = search_regs.num_regs;
515
516         for (i = 0; i < num_regs; i++) {
517                 if (search_regs.start[i] >= 0)
518                         search_regs.start[i] =
519                             bytind_to_bufpos(buf, search_regs.start[i]);
520                 if (search_regs.end[i] >= 0)
521                         search_regs.end[i] =
522                             bytind_to_bufpos(buf, search_regs.end[i]);
523         }
524 }
525
526 /* Similar but for strings. */
527 static void fixup_search_regs_for_string(Lisp_Object string)
528 {
529         int i;
530         int num_regs = search_regs.num_regs;
531
532         /* #### bytecount_to_charcount() is not that efficient.  This function
533            could be faster if it did its own conversion (using INC_CHARPTR()
534            and such), because the register ends are likely to be somewhat ordered.
535            (Even if not, you could sort them.)
536
537            Think about this if this function is a time hog, which it's probably
538            not. */
539         for (i = 0; i < num_regs; i++) {
540                 if (search_regs.start[i] > 0) {
541                         search_regs.start[i] =
542                             bytecount_to_charcount(XSTRING_DATA(string),
543                                                    search_regs.start[i]);
544                 }
545                 if (search_regs.end[i] > 0) {
546                         search_regs.end[i] =
547                             bytecount_to_charcount(XSTRING_DATA(string),
548                                                    search_regs.end[i]);
549                 }
550         }
551 }
552 \f
553 static Lisp_Object
554 looking_at_1(Lisp_Object string, struct buffer *buf, int posix)
555 {
556         /* This function has been Mule-ized, except for the trt table handling. */
557         Lisp_Object val;
558         Bytind p1, p2;
559         Bytecount s1, s2;
560         REGISTER int i;
561         struct re_pattern_buffer *bufp;
562
563         if (running_asynch_code)
564                 save_search_regs();
565
566         CHECK_STRING(string);
567         bufp = compile_pattern(string, &search_regs,
568                                (!NILP(buf->case_fold_search)
569                                 ? XCASE_TABLE_DOWNCASE(buf->case_table) : Qnil),
570                                posix, ERROR_ME);
571
572         QUIT;
573
574         /* Get pointers and sizes of the two strings
575            that make up the visible portion of the buffer. */
576
577         p1 = BI_BUF_BEGV(buf);
578         p2 = BI_BUF_CEILING_OF(buf, p1);
579         s1 = p2 - p1;
580         s2 = BI_BUF_ZV(buf) - p2;
581
582         regex_match_object = Qnil;
583         regex_emacs_buffer = buf;
584         i = re_match_2(bufp, (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
585                        s1, (char *)BI_BUF_BYTE_ADDRESS(buf, p2), s2,
586                        BI_BUF_PT(buf) - BI_BUF_BEGV(buf), &search_regs,
587                        BI_BUF_ZV(buf) - BI_BUF_BEGV(buf));
588
589         if (i == -2)
590                 matcher_overflow();
591
592         val = (0 <= i ? Qt : Qnil);
593         if (NILP(val))
594                 return Qnil;
595         {
596                 int num_regs = search_regs.num_regs;
597                 for (i = 0; i < num_regs; i++)
598                         if (search_regs.start[i] >= 0) {
599                                 search_regs.start[i] += BI_BUF_BEGV(buf);
600                                 search_regs.end[i] += BI_BUF_BEGV(buf);
601                         }
602         }
603         XSETBUFFER(last_thing_searched, buf);
604         fixup_search_regs_for_buffer(buf);
605         return val;
606 }
607
608 DEFUN("looking-at", Flooking_at, 1, 2, 0,       /*
609 Return t if text after point matches regular expression REGEXP.
610 This function modifies the match data that `match-beginning',
611 `match-end' and `match-data' access; save and restore the match
612 data if you want to preserve them.
613
614 Optional argument BUFFER defaults to the current buffer.
615 */
616       (regexp, buffer))
617 {
618         return looking_at_1(regexp, decode_buffer(buffer, 0), 0);
619 }
620
621 DEFUN("posix-looking-at", Fposix_looking_at, 1, 2, 0,   /*
622 Return t if text after point matches regular expression REGEXP.
623 Find the longest match, in accord with Posix regular expression rules.
624 This function modifies the match data that `match-beginning',
625 `match-end' and `match-data' access; save and restore the match
626 data if you want to preserve them.
627
628 Optional argument BUFFER defaults to the current buffer.
629 */
630       (regexp, buffer))
631 {
632         return looking_at_1(regexp, decode_buffer(buffer, 0), 1);
633 }
634 \f
635 static Lisp_Object
636 string_match_1(Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
637                struct buffer *buf, int posix)
638 {
639         /* This function has been Mule-ized, except for the trt table handling. */
640         Bytecount val;
641         Charcount s;
642         struct re_pattern_buffer *bufp;
643
644         if (running_asynch_code)
645                 save_search_regs();
646
647         CHECK_STRING(regexp);
648         CHECK_STRING(string);
649
650         if (NILP(start))
651                 s = 0;
652         else {
653                 Charcount len = XSTRING_CHAR_LENGTH(string);
654
655                 CHECK_INT(start);
656                 s = XINT(start);
657                 if (s < 0 && -s <= len)
658                         s = len + s;
659                 else if (0 > s || s > len)
660                         args_out_of_range(string, start);
661         }
662
663         bufp = compile_pattern(regexp, &search_regs,
664                                (!NILP(buf->case_fold_search)
665                                 ? XCASE_TABLE_DOWNCASE(buf->case_table) : Qnil),
666                                0, ERROR_ME);
667         QUIT;
668         {
669                 Bytecount bis = charcount_to_bytecount(XSTRING_DATA(string), s);
670                 regex_match_object = string;
671                 regex_emacs_buffer = buf;
672                 val = re_search(bufp, (char *)XSTRING_DATA(string),
673                                 XSTRING_LENGTH(string), bis,
674                                 XSTRING_LENGTH(string) - bis, &search_regs);
675         }
676         if (val == -2)
677                 matcher_overflow();
678         if (val < 0)
679                 return Qnil;
680         last_thing_searched = Qt;
681         fixup_search_regs_for_string(string);
682         return make_int(bytecount_to_charcount(XSTRING_DATA(string), val));
683 }
684
685 DEFUN("string-match", Fstring_match, 2, 4, 0,   /*
686 Return index of start of first match for REGEXP in STRING, or nil.
687 If third arg START is non-nil, start search at that index in STRING.
688 For index of first char beyond the match, do (match-end 0).
689 `match-end' and `match-beginning' also give indices of substrings
690 matched by parenthesis constructs in the pattern.
691
692 Optional arg BUFFER controls how case folding is done (according to
693 the value of `case-fold-search' in that buffer and that buffer's case
694 tables) and defaults to the current buffer.
695 */
696       (regexp, string, start, buffer))
697 {
698         return string_match_1(regexp, string, start, decode_buffer(buffer, 0),
699                               0);
700 }
701
702 DEFUN("posix-string-match", Fposix_string_match, 2, 4, 0,       /*
703 Return index of start of first match for REGEXP in STRING, or nil.
704 Find the longest match, in accord with Posix regular expression rules.
705 If third arg START is non-nil, start search at that index in STRING.
706 For index of first char beyond the match, do (match-end 0).
707 `match-end' and `match-beginning' also give indices of substrings
708 matched by parenthesis constructs in the pattern.
709
710 Optional arg BUFFER controls how case folding is done (according to
711 the value of `case-fold-search' in that buffer and that buffer's case
712 tables) and defaults to the current buffer.
713 */
714       (regexp, string, start, buffer))
715 {
716         return string_match_1(regexp, string, start, decode_buffer(buffer, 0),
717                               1);
718 }
719
720 /* Match REGEXP against STRING, searching all of STRING,
721    and return the index of the match, or negative on failure.
722    This does not clobber the match data. */
723
724 Bytecount
725 fast_string_match(Lisp_Object regexp, const Bufbyte * nonreloc,
726                   Lisp_Object reloc, Bytecount offset,
727                   Bytecount length, int case_fold_search,
728                   Error_behavior errb, int no_quit)
729 {
730         /* This function has been Mule-ized, except for the trt table handling. */
731         Bytecount val;
732         const Bufbyte *newnonreloc = (const Bufbyte*)nonreloc;
733         struct re_pattern_buffer *bufp;
734
735         bufp = compile_pattern(regexp, 0,
736                                (case_fold_search
737                                 ? XCASE_TABLE_DOWNCASE(current_buffer->
738                                                        case_table)
739                                 : Qnil), 0, errb);
740         if (!bufp)
741                 return -1;      /* will only do this when errb != ERROR_ME */
742         if (!no_quit)
743                 QUIT;
744         else
745                 no_quit_in_re_search = 1;
746
747         fixup_internal_substring(nonreloc, reloc, offset, &length);
748
749
750         
751         if (!NILP(reloc)) {
752                 if (no_quit) {
753                         newnonreloc = XSTRING_DATA(reloc);
754                 } else {
755                         /* QUIT could relocate RELOC.  Therefore we must
756                            alloca() and copy.  No way around this except some
757                            serious rewriting of re_search(). */
758                         /* yeah, let's rewrite this bugger, the warning
759                            hereafter is inevitable too */
760                         if ( length < 0)
761                                 /* By this point
762                                    fixup_internal_substring should
763                                    have updated length, if it didn't
764                                    return with failure...
765                                 */
766                                 return -1;
767                         newnonreloc = alloca(length);
768                         memcpy((void*)newnonreloc, (void*)XSTRING_DATA(reloc), length);
769                 }
770         }
771
772         /* #### evil current-buffer dependency */
773         regex_match_object = reloc;
774         regex_emacs_buffer = current_buffer;
775         val = re_search(bufp, (const char*)newnonreloc + offset, length, 0,
776                         length, 0);
777
778         no_quit_in_re_search = 0;
779         return val;
780 }
781
782 Bytecount fast_lisp_string_match(Lisp_Object regex, Lisp_Object string)
783 {
784         return fast_string_match(regex, 0, string, 0, -1, 0, ERROR_ME, 0);
785 }
786 \f
787 #ifdef REGION_CACHE_NEEDS_WORK
788 /* The newline cache: remembering which sections of text have no newlines.  */
789
790 /* If the user has requested newline caching, make sure it's on.
791    Otherwise, make sure it's off.
792    This is our cheezy way of associating an action with the change of
793    state of a buffer-local variable.  */
794 static void newline_cache_on_off(struct buffer *buf)
795 {
796         if (NILP(buf->cache_long_line_scans)) {
797                 /* It should be off.  */
798                 if (buf->newline_cache) {
799                         free_region_cache(buf->newline_cache);
800                         buf->newline_cache = 0;
801                 }
802         } else {
803                 /* It should be on.  */
804                 if (buf->newline_cache == 0)
805                         buf->newline_cache = new_region_cache();
806         }
807 }
808 #endif
809 \f
810 /* Search in BUF for COUNT instances of the character TARGET between
811    START and END.
812
813    If COUNT is positive, search forwards; END must be >= START.
814    If COUNT is negative, search backwards for the -COUNTth instance;
815       END must be <= START.
816    If COUNT is zero, do anything you please; run rogue, for all I care.
817
818    If END is zero, use BEGV or ZV instead, as appropriate for the
819    direction indicated by COUNT.
820
821    If we find COUNT instances, set *SHORTAGE to zero, and return the
822    position after the COUNTth match.  Note that for reverse motion
823    this is not the same as the usual convention for Emacs motion commands.
824
825    If we don't find COUNT instances before reaching END, set *SHORTAGE
826    to the number of TARGETs left unfound, and return END.
827
828    If ALLOW_QUIT is non-zero, call QUIT periodically. */
829
830 static Bytind
831 bi_scan_buffer(struct buffer *buf, Emchar target, Bytind st, Bytind en,
832                EMACS_INT count, EMACS_INT * shortage, int allow_quit)
833 {
834         /* This function has been Mule-ized. */
835         Bytind lim = en > 0 ? en :
836             ((count > 0) ? BI_BUF_ZV(buf) : BI_BUF_BEGV(buf));
837
838         /* #### newline cache stuff in this function not yet ported */
839
840         assert(count != 0);
841
842         if (shortage)
843                 *shortage = 0;
844
845         if (count > 0) {
846 #ifdef MULE
847                 /* Due to the Mule representation of characters in a buffer,
848                    we can simply search for characters in the range 0 - 127
849                    directly.  For other characters, we do it the "hard" way.
850                    Note that this way works for all characters but the other
851                    way is faster. */
852                 if (target >= 0200) {
853                         while (st < lim && count > 0) {
854                                 if (BI_BUF_FETCH_CHAR(buf, st) == target)
855                                         count--;
856                                 INC_BYTIND(buf, st);
857                         }
858                 } else
859 #endif
860                 {
861                         while (st < lim && count > 0) {
862                                 Bytind _ceil_;
863                                 Bufbyte *bufptr;
864
865                                 _ceil_ = BI_BUF_CEILING_OF(buf, st);
866                                 _ceil_ = min(lim, _ceil_);
867                                 bufptr =
868                                     (Bufbyte *)
869                                     memchr(BI_BUF_BYTE_ADDRESS(buf, st),
870                                            (int)target, _ceil_ - st);
871                                 if (bufptr) {
872                                         count--;
873                                         st = BI_BUF_PTR_BYTE_POS(buf,
874                                                                  bufptr) + 1;
875                                 } else
876                                         st = _ceil_;
877                         }
878                 }
879
880                 if (shortage)
881                         *shortage = count;
882                 if (allow_quit)
883                         QUIT;
884                 return st;
885         } else {
886 #ifdef MULE
887                 if (target >= 0200) {
888                         while (st > lim && count < 0) {
889                                 DEC_BYTIND(buf, st);
890                                 if (BI_BUF_FETCH_CHAR(buf, st) == target)
891                                         count++;
892                         }
893                 } else
894 #endif
895                 {
896                         while (st > lim && count < 0) {
897                                 Bytind _floor_;
898                                 Bufbyte *bufptr;
899                                 Bufbyte *floorptr;
900
901                                 _floor_ = BI_BUF_FLOOR_OF(buf, st);
902                                 _floor_ = max(lim, _floor_);
903                                 /* No memrchr() ... */
904                                 bufptr = BI_BUF_BYTE_ADDRESS_BEFORE(buf, st);
905                                 floorptr = BI_BUF_BYTE_ADDRESS(buf, _floor_);
906                                 while (bufptr >= floorptr) {
907                                         st--;
908                                         /* At this point, both ST and BUFPTR
909                                            refer to the same character.  When
910                                            the loop terminates, ST will always
911                                            point to the last character we
912                                            tried. */
913                                         if (*(unsigned char *)bufptr ==
914                                             (unsigned char)target) {
915                                                 count++;
916                                                 break;
917                                         }
918                                         bufptr--;
919                                 }
920                         }
921                 }
922
923                 if (shortage)
924                         *shortage = -count;
925                 if (allow_quit)
926                         QUIT;
927                 if (count)
928                         return st;
929                 else {
930                         /* We found the character we were looking for; we have to return
931                            the position *after* it due to the strange way that the return
932                            value is defined. */
933                         INC_BYTIND(buf, st);
934                         return st;
935                 }
936         }
937 }
938
939 Bufpos
940 scan_buffer(struct buffer * buf, Emchar target, Bufpos start, Bufpos end,
941             EMACS_INT count, EMACS_INT * shortage, int allow_quit)
942 {
943         Bytind bi_retval;
944         Bytind bi_start, bi_end;
945
946         bi_start = bufpos_to_bytind(buf, start);
947         if (end)
948                 bi_end = bufpos_to_bytind(buf, end);
949         else
950                 bi_end = 0;
951         bi_retval = bi_scan_buffer(buf, target, bi_start, bi_end, count,
952                                    shortage, allow_quit);
953         return bytind_to_bufpos(buf, bi_retval);
954 }
955
956 Bytind bi_find_next_newline_no_quit(struct buffer * buf, Bytind from, int count)
957 {
958         return bi_scan_buffer(buf, '\n', from, 0, count, 0, 0);
959 }
960
961 Bufpos find_next_newline_no_quit(struct buffer * buf, Bufpos from, int count)
962 {
963         return scan_buffer(buf, '\n', from, 0, count, 0, 0);
964 }
965
966 Bufpos find_next_newline(struct buffer * buf, Bufpos from, int count)
967 {
968         return scan_buffer(buf, '\n', from, 0, count, 0, 1);
969 }
970
971 Bytind
972 bi_find_next_emchar_in_string(Lisp_String * str, Emchar target, Bytind st,
973                               EMACS_INT count)
974 {
975         /* This function has been Mule-ized. */
976         Bytind lim = string_length(str) - 1;
977         Bufbyte *s = string_data(str);
978
979         assert(count >= 0);
980
981 #ifdef MULE
982         /* Due to the Mule representation of characters in a buffer,
983            we can simply search for characters in the range 0 - 127
984            directly.  For other characters, we do it the "hard" way.
985            Note that this way works for all characters but the other
986            way is faster. */
987         if (target >= 0200) {
988                 while (st < lim && count > 0) {
989                         if (string_char(str, st) == target)
990                                 count--;
991                         INC_CHARBYTIND(s, st);
992                 }
993         } else
994 #endif
995         {
996                 while (st < lim && count > 0) {
997                         Bufbyte *bufptr =
998                             (Bufbyte *) memchr(charptr_n_addr(s, st),
999                                                (int)target, lim - st);
1000                         if (bufptr) {
1001                                 count--;
1002                                 st = (Bytind) (bufptr - s) + 1;
1003                         } else
1004                                 st = lim;
1005                 }
1006         }
1007         return st;
1008 }
1009
1010 /* Like find_next_newline, but returns position before the newline,
1011    not after, and only search up to TO.  This isn't just
1012    find_next_newline (...)-1, because you might hit TO.  */
1013 Bufpos
1014 find_before_next_newline(struct buffer * buf, Bufpos from, Bufpos to, int count)
1015 {
1016         EMACS_INT shortage;
1017         Bufpos pos = scan_buffer(buf, '\n', from, to, count, &shortage, 1);
1018
1019         if (shortage == 0)
1020                 pos--;
1021
1022         return pos;
1023 }
1024 \f
1025 /* This function synched with FSF 21.1 */
1026 static Lisp_Object
1027 skip_chars(struct buffer *buf, int forwardp, int syntaxp,
1028            Lisp_Object string, Lisp_Object lim)
1029 {
1030         /* This function has been Mule-ized. */
1031         REGISTER Bufbyte *p, *pend;
1032         REGISTER Emchar c;
1033         /* We store the first 256 chars in an array here and the rest in
1034            a range table. */
1035         unsigned char fastmap[REGEXP_FASTMAP_SIZE];
1036         int negate = 0;
1037         REGISTER int i;
1038 #ifndef emacs
1039         Lisp_Char_Table *syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
1040 #endif
1041         Bufpos limit;
1042
1043         if (NILP(lim))
1044                 limit = forwardp ? BUF_ZV(buf) : BUF_BEGV(buf);
1045         else {
1046                 CHECK_INT_COERCE_MARKER(lim);
1047                 limit = XINT(lim);
1048
1049                 /* In any case, don't allow scan outside bounds of buffer.  */
1050                 if (limit > BUF_ZV(buf))
1051                         limit = BUF_ZV(buf);
1052                 if (limit < BUF_BEGV(buf))
1053                         limit = BUF_BEGV(buf);
1054         }
1055
1056         CHECK_STRING(string);
1057         p = XSTRING_DATA(string);
1058         pend = p + XSTRING_LENGTH(string);
1059         memset(fastmap, 0, sizeof(fastmap));
1060
1061         Fclear_range_table(Vskip_chars_range_table);
1062
1063         if (p != pend && *p == '^') {
1064                 negate = 1;
1065                 p++;
1066         }
1067
1068         /* Find the characters specified and set their elements of fastmap.
1069            If syntaxp, each character counts as itself.
1070            Otherwise, handle backslashes and ranges specially  */
1071
1072         while (p != pend) {
1073                 c = charptr_emchar(p);
1074                 INC_CHARPTR(p);
1075                 if (syntaxp) {
1076                         if (c < REGEXP_FASTMAP_SIZE
1077                             && syntax_spec_code[c] < (unsigned char)Smax)
1078                                 fastmap[c] = 1;
1079                         else
1080                                 signal_simple_error("Invalid syntax designator",
1081                                                     make_char(c));
1082                 } else {
1083                         if (c == '\\') {
1084                                 if (p == pend)
1085                                         break;
1086                                 c = charptr_emchar(p);
1087                                 INC_CHARPTR(p);
1088                         }
1089                         if (p != pend && *p == '-') {
1090                                 Emchar cend;
1091
1092                                 /* Skip over the dash.  */
1093                                 p++;
1094                                 if (p == pend)
1095                                         break;
1096                                 cend = charptr_emchar(p);
1097                                 while (c <= cend && c < REGEXP_FASTMAP_SIZE) {
1098                                         fastmap[c] = 1;
1099                                         c++;
1100                                 }
1101                                 if (c <= cend)
1102                                         Fput_range_table(make_int(c),
1103                                                          make_int(cend), Qt,
1104                                                          Vskip_chars_range_table);
1105                                 INC_CHARPTR(p);
1106                         } else {
1107                                 if (c < REGEXP_FASTMAP_SIZE)
1108                                         fastmap[c] = 1;
1109                                 else
1110                                         Fput_range_table(make_int(c),
1111                                                          make_int(c), Qt,
1112                                                          Vskip_chars_range_table);
1113                         }
1114                 }
1115         }
1116
1117         /* #### Not in FSF 21.1 */
1118         if (syntaxp && fastmap['-'] != 0)
1119                 fastmap[' '] = 1;
1120
1121         /* If ^ was the first character, complement the fastmap.
1122            We don't complement the range table, however; we just use negate
1123            in the comparisons below. */
1124
1125         if (negate)
1126                 for (i = 0; i < (int)(sizeof fastmap); i++)
1127                         fastmap[i] ^= 1;
1128
1129         {
1130                 Bufpos start_point = BUF_PT(buf);
1131                 Bufpos pos = start_point;
1132                 Bytind pos_byte = BI_BUF_PT(buf);
1133
1134                 if (syntaxp) {
1135                         SETUP_SYNTAX_CACHE_FOR_BUFFER(buf, pos,
1136                                                       forwardp ? 1 : -1);
1137                         /* All syntax designators are normal chars so nothing strange
1138                            to worry about */
1139                         if (forwardp) {
1140                                 if (pos < limit)
1141                                         while (fastmap[(unsigned char)
1142                                                        syntax_code_spec
1143                                                        [(int)SYNTAX_FROM_CACHE
1144                                                         (syntax_table,
1145                                                          BI_BUF_FETCH_CHAR(buf,
1146                                                                            pos_byte))]])
1147                                         {
1148                                                 pos++;
1149                                                 INC_BYTIND(buf, pos_byte);
1150                                                 if (pos >= limit)
1151                                                         break;
1152                                                 UPDATE_SYNTAX_CACHE_FORWARD
1153                                                     (pos);
1154                                         }
1155                         } else {
1156                                 while (pos > limit) {
1157                                         Bufpos savepos = pos_byte;
1158                                         pos--;
1159                                         DEC_BYTIND(buf, pos_byte);
1160                                         UPDATE_SYNTAX_CACHE_BACKWARD(pos);
1161                                         if (!fastmap[(unsigned char)
1162                                                      syntax_code_spec
1163                                                      [(int)SYNTAX_FROM_CACHE
1164                                                       (syntax_table,
1165                                                        BI_BUF_FETCH_CHAR(buf,
1166                                                                          pos_byte))]])
1167                                         {
1168                                                 pos++;
1169                                                 pos_byte = savepos;
1170                                                 break;
1171                                         }
1172                                 }
1173                         }
1174                 } else {
1175                         if (forwardp) {
1176                                 while (pos < limit) {
1177                                         Emchar ch =
1178                                             BI_BUF_FETCH_CHAR(buf, pos_byte);
1179                                         if ((ch <
1180                                              REGEXP_FASTMAP_SIZE) ? fastmap[ch]
1181                                             : (NILP
1182                                                (Fget_range_table
1183                                                 (make_int(ch),
1184                                                  Vskip_chars_range_table, Qnil))
1185                                                == negate)) {
1186                                                 pos++;
1187                                                 INC_BYTIND(buf, pos_byte);
1188                                         } else
1189                                                 break;
1190                                 }
1191                         } else {
1192                                 while (pos > limit) {
1193                                         Bufpos prev_pos_byte = pos_byte;
1194                                         Emchar ch;
1195
1196                                         DEC_BYTIND(buf, prev_pos_byte);
1197                                         ch = BI_BUF_FETCH_CHAR(buf,
1198                                                                prev_pos_byte);
1199                                         if ((ch <
1200                                              REGEXP_FASTMAP_SIZE) ? fastmap[ch]
1201                                             : (NILP
1202                                                (Fget_range_table
1203                                                 (make_int(ch),
1204                                                  Vskip_chars_range_table, Qnil))
1205                                                == negate)) {
1206                                                 pos--;
1207                                                 pos_byte = prev_pos_byte;
1208                                         } else
1209                                                 break;
1210                                 }
1211                         }
1212                 }
1213                 QUIT;
1214                 BOTH_BUF_SET_PT(buf, pos, pos_byte);
1215                 return make_int(BUF_PT(buf) - start_point);
1216         }
1217 }
1218
1219 DEFUN("skip-chars-forward", Fskip_chars_forward, 1, 3, 0,       /*
1220 Move point forward, stopping before a char not in STRING, or at pos LIMIT.
1221 STRING is like the inside of a `[...]' in a regular expression
1222 except that `]' is never special and `\\' quotes `^', `-' or `\\'.
1223 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1224 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1225 Returns the distance traveled, either zero or positive.
1226
1227 Optional argument BUFFER defaults to the current buffer.
1228 */
1229       (string, limit, buffer))
1230 {
1231         return skip_chars(decode_buffer(buffer, 0), 1, 0, string, limit);
1232 }
1233
1234 DEFUN("skip-chars-backward", Fskip_chars_backward, 1, 3, 0,     /*
1235 Move point backward, stopping after a char not in STRING, or at pos LIMIT.
1236 See `skip-chars-forward' for details.
1237 Returns the distance traveled, either zero or negative.
1238
1239 Optional argument BUFFER defaults to the current buffer.
1240 */
1241       (string, limit, buffer))
1242 {
1243         return skip_chars(decode_buffer(buffer, 0), 0, 0, string, limit);
1244 }
1245
1246 DEFUN("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0,     /*
1247 Move point forward across chars in specified syntax classes.
1248 SYNTAX is a string of syntax code characters.
1249 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT.
1250 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1251 This function returns the distance traveled, either zero or positive.
1252
1253 Optional argument BUFFER defaults to the current buffer.
1254 */
1255       (syntax, limit, buffer))
1256 {
1257         return skip_chars(decode_buffer(buffer, 0), 1, 1, syntax, limit);
1258 }
1259
1260 DEFUN("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0,   /*
1261 Move point backward across chars in specified syntax classes.
1262 SYNTAX is a string of syntax code characters.
1263 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT.
1264 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1265 This function returns the distance traveled, either zero or negative.
1266
1267 Optional argument BUFFER defaults to the current buffer.
1268 */
1269       (syntax, limit, buffer))
1270 {
1271         return skip_chars(decode_buffer(buffer, 0), 0, 1, syntax, limit);
1272 }
1273 \f
1274 /* Subroutines of Lisp buffer search functions. */
1275
1276 static Lisp_Object
1277 search_command(Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
1278                Lisp_Object count, Lisp_Object buffer, int direction,
1279                int RE, int posix)
1280 {
1281         /* This function has been Mule-ized, except for the trt table handling. */
1282         REGISTER Bufpos np;
1283         Bufpos lim;
1284         EMACS_INT n = direction;
1285         struct buffer *buf;
1286
1287         if (!NILP(count)) {
1288                 CHECK_INT(count);
1289                 n *= XINT(count);
1290         }
1291
1292         buf = decode_buffer(buffer, 0);
1293         CHECK_STRING(string);
1294         if (NILP(limit))
1295                 lim = n > 0 ? BUF_ZV(buf) : BUF_BEGV(buf);
1296         else {
1297                 CHECK_INT_COERCE_MARKER(limit);
1298                 lim = XINT(limit);
1299                 if (n > 0 ? lim < BUF_PT(buf) : lim > BUF_PT(buf))
1300                         error("Invalid search limit (wrong side of point)");
1301                 if (lim > BUF_ZV(buf))
1302                         lim = BUF_ZV(buf);
1303                 if (lim < BUF_BEGV(buf))
1304                         lim = BUF_BEGV(buf);
1305         }
1306
1307         np = search_buffer(buf, string, BUF_PT(buf), lim, n, RE,
1308                            (!NILP(buf->case_fold_search)
1309                             ? XCASE_TABLE_CANON(buf->case_table)
1310                             : Qnil), (!NILP(buf->case_fold_search)
1311                                       ? XCASE_TABLE_EQV(buf->case_table)
1312                                       : Qnil), posix);
1313
1314         if (np <= 0) {
1315                 if (NILP(noerror))
1316                         return signal_failure(string);
1317                 if (!EQ(noerror, Qt)) {
1318                         if (lim < BUF_BEGV(buf) || lim > BUF_ZV(buf))
1319                                 abort();
1320                         BUF_SET_PT(buf, lim);
1321                         return Qnil;
1322 #if 0                           /* This would be clean, but maybe programs depend on
1323                                    a value of nil here.  */
1324                         np = lim;
1325 #endif
1326                 } else
1327                         return Qnil;
1328         }
1329
1330         if (np < BUF_BEGV(buf) || np > BUF_ZV(buf))
1331                 abort();
1332
1333         BUF_SET_PT(buf, np);
1334
1335         return make_int(np);
1336 }
1337 \f
1338 static int trivial_regexp_p(Lisp_Object regexp)
1339 {
1340         /* This function has been Mule-ized. */
1341         Bytecount len = XSTRING_LENGTH(regexp);
1342         Bufbyte *s = XSTRING_DATA(regexp);
1343         while (--len >= 0) {
1344                 switch (*s++) {
1345                         /* ']' doesn't appear here because it's only special after ] */
1346                 case '.':
1347                 case '*':
1348                 case '+':
1349                 case '?':
1350                 case '[':
1351                 case '^':
1352                 case '$':
1353                         return 0;
1354                 case '\\':
1355                         if (--len < 0)
1356                                 return 0;
1357                         switch (*s++) {
1358                         case '|':
1359                         case '(':
1360                         case ')':
1361                         case '`':
1362                         case '\'':
1363                         case 'b':
1364                         case 'B':
1365                         case '<':
1366                         case '>':
1367                         case 'w':
1368                         case 'W':
1369                         case 's':
1370                         case 'S':
1371                         case '=':
1372                         case '{':
1373                         case '}':
1374 #ifdef MULE
1375                                 /* 97/2/25 jhod Added for category matches */
1376                         case 'c':
1377                         case 'C':
1378 #endif                          /* MULE */
1379                         case '1':
1380                         case '2':
1381                         case '3':
1382                         case '4':
1383                         case '5':
1384                         case '6':
1385                         case '7':
1386                         case '8':
1387                         case '9':
1388                                 return 0;
1389                         default:
1390                                 break;
1391                         }
1392                 default:
1393                         break;
1394                 }
1395         }
1396         return 1;
1397 }
1398
1399 /* Search for the n'th occurrence of STRING in BUF,
1400    starting at position BUFPOS and stopping at position BUFLIM,
1401    treating PAT as a literal string if RE is false or as
1402    a regular expression if RE is true.
1403
1404    If N is positive, searching is forward and BUFLIM must be greater
1405    than BUFPOS.
1406    If N is negative, searching is backward and BUFLIM must be less
1407    than BUFPOS.
1408
1409    Returns -x if only N-x occurrences found (x > 0),
1410    or else the position at the beginning of the Nth occurrence
1411    (if searching backward) or the end (if searching forward).
1412
1413    POSIX is nonzero if we want full backtracking (POSIX style)
1414    for this pattern.  0 means backtrack only enough to get a valid match.  */
1415 static Bufpos
1416 search_buffer(struct buffer *buf, Lisp_Object string, Bufpos bufpos,
1417               Bufpos buflim, EMACS_INT n, int RE, Lisp_Object trt,
1418               Lisp_Object inverse_trt, int posix)
1419 {
1420         /* This function has been Mule-ized, except for the trt table handling. */
1421         Bytecount len = XSTRING_LENGTH(string);
1422         Bufbyte *base_pat = XSTRING_DATA(string);
1423         REGISTER EMACS_INT i, j;
1424         Bytind p1, p2;
1425         Bytecount s1, s2;
1426         Bytind pos, lim;
1427
1428         if (running_asynch_code)
1429                 save_search_regs();
1430
1431         /* Null string is found at starting position.  */
1432         if (len == 0) {
1433                 set_search_regs(buf, bufpos, 0);
1434                 clear_unused_search_regs(&search_regs, 0);
1435                 return bufpos;
1436         }
1437
1438         /* Searching 0 times means noop---don't move, don't touch registers.  */
1439         if (n == 0)
1440                 return bufpos;
1441
1442         pos = bufpos_to_bytind(buf, bufpos);
1443         lim = bufpos_to_bytind(buf, buflim);
1444         if (RE && !trivial_regexp_p(string)) {
1445                 struct re_pattern_buffer *bufp;
1446
1447                 bufp = compile_pattern(string, &search_regs, trt, posix,
1448                                        ERROR_ME);
1449
1450                 /* Get pointers and sizes of the two strings
1451                    that make up the visible portion of the buffer. */
1452
1453                 p1 = BI_BUF_BEGV(buf);
1454                 p2 = BI_BUF_CEILING_OF(buf, p1);
1455                 s1 = p2 - p1;
1456                 s2 = BI_BUF_ZV(buf) - p2;
1457                 regex_match_object = Qnil;
1458
1459                 while (n < 0) {
1460                         Bytecount val;
1461                         QUIT;
1462                         regex_emacs_buffer = buf;
1463                         val = re_search_2(bufp,
1464                                           (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
1465                                           s1, (char *)BI_BUF_BYTE_ADDRESS(buf,
1466                                                                           p2),
1467                                           s2, pos - BI_BUF_BEGV(buf), lim - pos,
1468                                           &search_regs, pos - BI_BUF_BEGV(buf));
1469
1470                         if (val == -2) {
1471                                 matcher_overflow();
1472                         }
1473                         if (val >= 0) {
1474                                 int num_regs = search_regs.num_regs;
1475                                 j = BI_BUF_BEGV(buf);
1476                                 for (i = 0; i < num_regs; i++)
1477                                         if (search_regs.start[i] >= 0) {
1478                                                 search_regs.start[i] += j;
1479                                                 search_regs.end[i] += j;
1480                                         }
1481                                 /* re_match (called from re_search et al) does this for us */
1482                                 /* clear_unused_search_regs (search_regs, bufp->no_sub);   */
1483                                 XSETBUFFER(last_thing_searched, buf);
1484                                 /* Set pos to the new position. */
1485                                 pos = search_regs.start[0];
1486                                 fixup_search_regs_for_buffer(buf);
1487                                 /* And bufpos too. */
1488                                 bufpos = search_regs.start[0];
1489                         } else {
1490                                 return n;
1491                         }
1492                         n++;
1493                 }
1494                 while (n > 0) {
1495                         Bytecount val;
1496                         QUIT;
1497                         regex_emacs_buffer = buf;
1498                         val = re_search_2(bufp,
1499                                           (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
1500                                           s1, (char *)BI_BUF_BYTE_ADDRESS(buf,
1501                                                                           p2),
1502                                           s2, pos - BI_BUF_BEGV(buf), lim - pos,
1503                                           &search_regs, lim - BI_BUF_BEGV(buf));
1504                         if (val == -2) {
1505                                 matcher_overflow();
1506                         }
1507                         if (val >= 0) {
1508                                 int num_regs = search_regs.num_regs;
1509                                 j = BI_BUF_BEGV(buf);
1510                                 for (i = 0; i < num_regs; i++)
1511                                         if (search_regs.start[i] >= 0) {
1512                                                 search_regs.start[i] += j;
1513                                                 search_regs.end[i] += j;
1514                                         }
1515                                 /* re_match (called from re_search et al) does this for us */
1516                                 /* clear_unused_search_regs (search_regs, bufp->no_sub);   */
1517                                 XSETBUFFER(last_thing_searched, buf);
1518                                 /* Set pos to the new position. */
1519                                 pos = search_regs.end[0];
1520                                 fixup_search_regs_for_buffer(buf);
1521                                 /* And bufpos too. */
1522                                 bufpos = search_regs.end[0];
1523                         } else {
1524                                 return 0 - n;
1525                         }
1526                         n--;
1527                 }
1528                 return bufpos;
1529         } else {                /* non-RE case */
1530
1531                 int charset_base = -1;
1532                 int boyer_moore_ok = 1;
1533                 Bufbyte *pat = 0;
1534                 Bufbyte *patbuf = alloca_array(Bufbyte, len * MAX_EMCHAR_LEN);
1535                 pat = patbuf;
1536 #ifdef MULE
1537                 while (len > 0) {
1538                         Bufbyte tmp_str[MAX_EMCHAR_LEN];
1539                         Emchar c, translated, inverse;
1540                         Bytecount orig_bytelen, new_bytelen, inv_bytelen;
1541
1542                         /* If we got here and the RE flag is set, it's because
1543                            we're dealing with a regexp known to be trivial, so the
1544                            backslash just quotes the next character.  */
1545                         if (RE && *base_pat == '\\') {
1546                                 len--;
1547                                 base_pat++;
1548                         }
1549                         c = charptr_emchar(base_pat);
1550                         translated = TRANSLATE(trt, c);
1551                         inverse = TRANSLATE(inverse_trt, c);
1552
1553                         orig_bytelen = charcount_to_bytecount(base_pat, 1);
1554                         inv_bytelen = set_charptr_emchar(tmp_str, inverse);
1555                         new_bytelen = set_charptr_emchar(tmp_str, translated);
1556
1557                         if (new_bytelen != orig_bytelen
1558                             || inv_bytelen != orig_bytelen)
1559                                 boyer_moore_ok = 0;
1560                         if (translated != c || inverse != c) {
1561                                 /* Keep track of which character set row
1562                                    contains the characters that need translation.  */
1563                                 int charset_base_code = c & ~CHAR_FIELD3_MASK;
1564                                 if (charset_base == -1)
1565                                         charset_base = charset_base_code;
1566                                 else if (charset_base != charset_base_code)
1567                                         /* If two different rows appear, needing translation,
1568                                            then we cannot use boyer_moore search.  */
1569                                         boyer_moore_ok = 0;
1570                         }
1571                         memcpy(pat, tmp_str, new_bytelen);
1572                         pat += new_bytelen;
1573                         base_pat += orig_bytelen;
1574                         len -= orig_bytelen;
1575                 }
1576 #else                           /* not MULE */
1577                 while (--len >= 0) {
1578                         /* If we got here and the RE flag is set, it's because
1579                            we're dealing with a regexp known to be trivial, so the
1580                            backslash just quotes the next character.  */
1581                         if (RE && *base_pat == '\\') {
1582                                 len--;
1583                                 base_pat++;
1584                         }
1585                         *pat++ = TRANSLATE(trt, *base_pat++);
1586                 }
1587 #endif                          /* MULE */
1588                 len = pat - patbuf;
1589                 pat = base_pat = patbuf;
1590                 if (boyer_moore_ok)
1591                         return boyer_moore(buf, base_pat, len, pos, lim, n,
1592                                            trt, inverse_trt, charset_base);
1593                 else
1594                         return simple_search(buf, base_pat, len, pos, lim, n,
1595                                              trt);
1596         }
1597 }
1598
1599 /* Do a simple string search N times for the string PAT,
1600    whose length is LEN/LEN_BYTE,
1601    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1602    TRT is the translation table.
1603
1604    Return the character position where the match is found.
1605    Otherwise, if M matches remained to be found, return -M.
1606
1607    This kind of search works regardless of what is in PAT and
1608    regardless of what is in TRT.  It is used in cases where
1609    boyer_moore cannot work.  */
1610
1611 static Bufpos
1612 simple_search(struct buffer *buf, Bufbyte * base_pat, Bytecount len_byte,
1613               Bytind idx, Bytind lim, EMACS_INT n, Lisp_Object trt)
1614 {
1615         int forward = n > 0;
1616         Bytecount buf_len = 0;  /* Shut up compiler. */
1617
1618         if (lim > idx) {
1619                 while (n > 0) {
1620                         while (1) {
1621                                 Bytecount this_len = len_byte;
1622                                 Bytind this_idx = idx;
1623                                 const Bufbyte *p = base_pat;
1624                                 if (idx >= lim)
1625                                         goto stop;
1626
1627                                 while (this_len > 0) {
1628                                         Emchar pat_ch, buf_ch;
1629                                         Bytecount pat_len;
1630
1631                                         pat_ch = charptr_emchar(p);
1632                                         buf_ch =
1633                                             BI_BUF_FETCH_CHAR(buf, this_idx);
1634
1635                                         buf_ch = TRANSLATE(trt, buf_ch);
1636
1637                                         if (buf_ch != pat_ch)
1638                                                 break;
1639
1640                                         pat_len = charcount_to_bytecount(p, 1);
1641                                         p += pat_len;
1642                                         this_len -= pat_len;
1643                                         INC_BYTIND(buf, this_idx);
1644                                 }
1645                                 if (this_len == 0) {
1646                                         buf_len = this_idx - idx;
1647                                         idx = this_idx;
1648                                         break;
1649                                 }
1650                                 INC_BYTIND(buf, idx);
1651                         }
1652                         n--;
1653                 }
1654         } else {
1655                 while (n < 0) {
1656                         while (1) {
1657                                 Bytecount this_len = len_byte;
1658                                 Bytind this_idx = idx;
1659                                 const Bufbyte *p = base_pat + len_byte;
1660
1661                                 if (idx <= lim) {
1662                                         goto stop;
1663                                 }
1664
1665                                 while (this_len > 0) {
1666                                         Emchar pat_ch, buf_ch;
1667
1668                                         DEC_CHARPTR(p);
1669                                         DEC_BYTIND(buf, this_idx);
1670                                         pat_ch = charptr_emchar(p);
1671                                         buf_ch =
1672                                             BI_BUF_FETCH_CHAR(buf, this_idx);
1673
1674                                         buf_ch = TRANSLATE(trt, buf_ch);
1675
1676                                         if (buf_ch != pat_ch)
1677                                                 break;
1678
1679                                         this_len -=
1680                                             charcount_to_bytecount(p, 1);
1681                                 }
1682                                 if (this_len == 0) {
1683                                         buf_len = idx - this_idx;
1684                                         idx = this_idx;
1685                                         break;
1686                                 }
1687                                 DEC_BYTIND(buf, idx);
1688                         }
1689                         n++;
1690                 }
1691         }
1692 stop:
1693         if (n == 0) {
1694                 Bufpos beg, end, retval;
1695                 if (forward) {
1696                         beg = bytind_to_bufpos(buf, idx - buf_len);
1697                         retval = end = bytind_to_bufpos(buf, idx);
1698                 } else {
1699                         retval = beg = bytind_to_bufpos(buf, idx);
1700                         end = bytind_to_bufpos(buf, idx + buf_len);
1701                 }
1702                 set_search_regs(buf, beg, end - beg);
1703                 clear_unused_search_regs(&search_regs, 0);
1704
1705                 return retval;
1706         } else if (n > 0) {
1707                 return -n;
1708         } else {
1709                 return n;
1710         }
1711 }
1712
1713 /* Do Boyer-Moore search N times for the string PAT,
1714    whose length is LEN/LEN_BYTE,
1715    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1716    DIRECTION says which direction we search in.
1717    TRT and INVERSE_TRT are translation tables.
1718
1719    This kind of search works if all the characters in PAT that have
1720    nontrivial translation are the same aside from the last byte.  This
1721    makes it possible to translate just the last byte of a character,
1722    and do so after just a simple test of the context.
1723
1724    If that criterion is not satisfied, do not call this function.  */
1725
1726 static Bufpos
1727 boyer_moore(struct buffer *buf, Bufbyte * base_pat, Bytecount len,
1728             Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
1729             Lisp_Object inverse_trt, int charset_base)
1730 {
1731         /* #### Someone really really really needs to comment the workings
1732            of this junk somewhat better.
1733
1734            BTW "BM" stands for Boyer-Moore, which is one of the standard
1735            string-searching algorithms.  It's the best string-searching
1736            algorithm out there, provided that:
1737
1738            a) You're not fazed by algorithm complexity. (Rabin-Karp, which
1739            uses hashing, is much much easier to code but not as fast.)
1740            b) You can freely move backwards in the string that you're
1741            searching through.
1742
1743            As the comment below tries to explain (but garbles in typical
1744            programmer-ese), the idea is that you don't have to do a
1745            string match at every successive position in the text.  For
1746            example, let's say the pattern is "a very long string".  We
1747            compare the last character in the string (`g') with the
1748            corresponding character in the text.  If it mismatches, and
1749            it is, say, `z', then we can skip forward by the entire
1750            length of the pattern because `z' does not occur anywhere
1751            in the pattern.  If the mismatching character does occur
1752            in the pattern, we can usually still skip forward by more
1753            than one: e.g. if it is `l', then we can skip forward
1754            by the length of the substring "ong string" -- i.e. the
1755            largest end section of the pattern that does not contain
1756            the mismatched character.  So what we do is compute, for
1757            each possible character, the distance we can skip forward
1758            (the "stride") and use it in the string matching.  This
1759            is what the BM_tab holds. */
1760         REGISTER EMACS_INT *BM_tab;
1761         EMACS_INT *BM_tab_base;
1762         REGISTER Bytecount dirlen;
1763         EMACS_INT infinity;
1764         Bytind limit;
1765         Bytecount stride_for_teases = 0;
1766         REGISTER EMACS_INT i, j;
1767         Bufbyte *pat, *pat_end;
1768         REGISTER Bufbyte *cursor, *p_limit, *ptr2;
1769         Bufbyte simple_translate[REGEXP_FASTMAP_SIZE];
1770         REGISTER int direction = ((n > 0) ? 1 : -1);
1771 #ifdef MULE
1772         Bufbyte translate_prev_byte = 0;
1773         Bufbyte translate_anteprev_byte = 0;
1774 #endif
1775 #ifdef C_ALLOCA
1776         EMACS_INT BM_tab_space[REGEXP_FASTMAP_SIZE];
1777         BM_tab = &BM_tab_space[0];
1778 #else
1779         BM_tab = alloca_array(EMACS_INT, 256);
1780 #endif
1781
1782         /* The general approach is that we are going to maintain that we
1783            know the first (closest to the present position, in whatever
1784            direction we're searching) character that could possibly be
1785            the last (furthest from present position) character of a
1786            valid match.  We advance the state of our knowledge by
1787            looking at that character and seeing whether it indeed
1788            matches the last character of the pattern.  If it does, we
1789            take a closer look.  If it does not, we move our pointer (to
1790            putative last characters) as far as is logically possible.
1791            This amount of movement, which I call a stride, will be the
1792            length of the pattern if the actual character appears nowhere
1793            in the pattern, otherwise it will be the distance from the
1794            last occurrence of that character to the end of the pattern.
1795            As a coding trick, an enormous stride is coded into the table
1796            for characters that match the last character.  This allows
1797            use of only a single test, a test for having gone past the
1798            end of the permissible match region, to test for both
1799            possible matches (when the stride goes past the end
1800            immediately) and failure to match (where you get nudged past
1801            the end one stride at a time).
1802
1803            Here we make a "mickey mouse" BM table.  The stride of the
1804            search is determined only by the last character of the
1805            putative match.  If that character does not match, we will
1806            stride the proper distance to propose a match that
1807            superimposes it on the last instance of a character that
1808            matches it (per trt), or misses it entirely if there is
1809            none. */
1810
1811         dirlen = len * direction;
1812         infinity = dirlen - (lim + pos + len + len) * direction;
1813         /* Record position after the end of the pattern.  */
1814         pat_end = base_pat + len;
1815         if (direction < 0)
1816                 base_pat = pat_end - 1;
1817         BM_tab_base = BM_tab;
1818         BM_tab += REGEXP_FASTMAP_SIZE;
1819         j = dirlen;             /* to get it in a register */
1820         /* A character that does not appear in the pattern induces a
1821            stride equal to the pattern length. */
1822         while (BM_tab_base != BM_tab) {
1823                 *--BM_tab = j;
1824                 *--BM_tab = j;
1825                 *--BM_tab = j;
1826                 *--BM_tab = j;
1827         }
1828         /* We use this for translation, instead of TRT itself.  We
1829            fill this in to handle the characters that actually occur
1830            in the pattern.  Others don't matter anyway!  */
1831         xzero(simple_translate);
1832         for (i = 0; i < REGEXP_FASTMAP_SIZE; i++)
1833                 simple_translate[i] = (Bufbyte) i;
1834         i = 0;
1835         while (i != infinity) {
1836                 Bufbyte *ptr = base_pat + i;
1837                 i += direction;
1838                 if (i == dirlen)
1839                         i = infinity;
1840                 if (!NILP(trt)) {
1841 #ifdef MULE
1842                         Emchar ch, untranslated;
1843                         int this_translated = 1;
1844
1845                         /* Is *PTR the last byte of a character?  */
1846                         if (pat_end - ptr == 1 || BUFBYTE_FIRST_BYTE_P(ptr[1])) {
1847                                 Bufbyte *charstart = ptr;
1848                                 while (!BUFBYTE_FIRST_BYTE_P(*charstart))
1849                                         charstart--;
1850                                 untranslated = charptr_emchar(charstart);
1851                                 if (charset_base ==
1852                                     (untranslated & ~CHAR_FIELD3_MASK)) {
1853                                         ch = TRANSLATE(trt, untranslated);
1854                                         if (!BUFBYTE_FIRST_BYTE_P(*ptr)) {
1855                                                 translate_prev_byte = ptr[-1];
1856                                                 if (!BUFBYTE_FIRST_BYTE_P
1857                                                     (translate_prev_byte))
1858                                                         translate_anteprev_byte
1859                                                             = ptr[-2];
1860                                         }
1861                                 } else {
1862                                         this_translated = 0;
1863                                         ch = *ptr;
1864                                 }
1865                         } else {
1866                                 ch = *ptr;
1867                                 this_translated = 0;
1868                         }
1869                         if (ch > REGEXP_FASTMAP_SIZE)
1870                                 j = ((unsigned char)ch | 0200);
1871                         else
1872                                 j = (unsigned char)ch;
1873
1874                         if (i == infinity)
1875                                 stride_for_teases = BM_tab[j];
1876                         BM_tab[j] = dirlen - i;
1877                         /* A translation table is accompanied by its inverse --
1878                            see comment following downcase_table for details */
1879                         if (this_translated) {
1880                                 Emchar starting_ch = ch;
1881                                 EMACS_INT starting_j = j;
1882                                 while (1) {
1883                                         ch = TRANSLATE(inverse_trt, ch);
1884                                         if (ch > REGEXP_FASTMAP_SIZE)
1885                                                 j = ((unsigned char)ch | 0200);
1886                                         else
1887                                                 j = (unsigned char)ch;
1888
1889                                         /* For all the characters that map into CH,
1890                                            set up simple_translate to map the last byte
1891                                            into STARTING_J.  */
1892                                         simple_translate[j] = starting_j;
1893                                         if (ch == starting_ch)
1894                                                 break;
1895                                         BM_tab[j] = dirlen - i;
1896                                 }
1897                         }
1898 #else
1899                         EMACS_INT k;
1900                         j = *ptr;
1901                         k = (j = TRANSLATE(trt, j));
1902                         if (i == infinity)
1903                                 stride_for_teases = BM_tab[j];
1904                         BM_tab[j] = dirlen - i;
1905                         /* A translation table is accompanied by its inverse --
1906                            see comment following downcase_table for details */
1907
1908                         while ((j = TRANSLATE(inverse_trt, j)) != k) {
1909                                 simple_translate[j] = (Bufbyte) k;
1910                                 BM_tab[j] = dirlen - i;
1911                         }
1912 #endif
1913                 } else {
1914                         j = *ptr;
1915
1916                         if (i == infinity)
1917                                 stride_for_teases = BM_tab[j];
1918                         BM_tab[j] = dirlen - i;
1919                 }
1920                 /* stride_for_teases tells how much to stride if we get a
1921                    match on the far character but are subsequently
1922                    disappointed, by recording what the stride would have been
1923                    for that character if the last character had been
1924                    different. */
1925         }
1926         infinity = dirlen - infinity;
1927         pos += dirlen - ((direction > 0) ? direction : 0);
1928         /* loop invariant - pos points at where last char (first char if
1929            reverse) of pattern would align in a possible match.  */
1930         while (n != 0) {
1931                 Bytind tail_end;
1932                 Bufbyte *tail_end_ptr;
1933                 /* It's been reported that some (broken) compiler thinks
1934                    that Boolean expressions in an arithmetic context are
1935                    unsigned.  Using an explicit ?1:0 prevents this.  */
1936                 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
1937                         return n * (0 - direction);
1938                 /* First we do the part we can by pointers (maybe
1939                    nothing) */
1940                 QUIT;
1941                 pat = base_pat;
1942                 limit = pos - dirlen + direction;
1943                 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1944                    have changed.  See buffer.h. */
1945                 limit = ((direction > 0)
1946                          ? BI_BUF_CEILING_OF(buf, limit) - 1
1947                          : BI_BUF_FLOOR_OF(buf, limit + 1));
1948                 /* LIMIT is now the last (not beyond-last!) value POS can
1949                    take on without hitting edge of buffer or the gap.  */
1950                 limit = ((direction > 0)
1951                          ? min(lim - 1, min(limit, pos + 20000))
1952                          : max(lim, max(limit, pos - 20000)));
1953                 tail_end = BI_BUF_CEILING_OF(buf, pos);
1954                 tail_end_ptr = BI_BUF_BYTE_ADDRESS(buf, tail_end);
1955
1956                 if ((limit - pos) * direction > 20) {
1957                         p_limit = BI_BUF_BYTE_ADDRESS(buf, limit);
1958                         ptr2 = (cursor = BI_BUF_BYTE_ADDRESS(buf, pos));
1959                         /* In this loop, pos + cursor - ptr2 is the surrogate
1960                            for pos */
1961                         while (1) {     /* use one cursor setting as long as i can */
1962                                 if (direction > 0) {    /* worth duplicating */
1963                                         /* Use signed comparison if appropriate to make
1964                                            cursor+infinity sure to be > p_limit.
1965                                            Assuming that the buffer lies in a range of
1966                                            addresses that are all "positive" (as ints)
1967                                            or all "negative", either kind of comparison
1968                                            will work as long as we don't step by
1969                                            infinity.  So pick the kind that works when
1970                                            we do step by infinity.  */
1971                                         if ((EMACS_INT) (p_limit + infinity) >
1972                                             (EMACS_INT) p_limit)
1973                                                 while ((EMACS_INT) cursor <=
1974                                                        (EMACS_INT) p_limit)
1975                                                         cursor +=
1976                                                             BM_tab[*cursor];
1977                                         else
1978                                                 while ((EMACS_UINT) cursor <=
1979                                                        (EMACS_UINT) p_limit)
1980                                                         cursor +=
1981                                                             BM_tab[*cursor];
1982                                 } else {
1983                                         if ((EMACS_INT) (p_limit + infinity) <
1984                                             (EMACS_INT) p_limit)
1985                                                 while ((EMACS_INT) cursor >=
1986                                                        (EMACS_INT) p_limit)
1987                                                         cursor +=
1988                                                             BM_tab[*cursor];
1989                                         else
1990                                                 while ((EMACS_UINT) cursor >=
1991                                                        (EMACS_UINT) p_limit)
1992                                                         cursor +=
1993                                                             BM_tab[*cursor];
1994                                 }
1995                                 /* If you are here, cursor is beyond the end of the
1996                                    searched region.  This can happen if you match on
1997                                    the far character of the pattern, because the
1998                                    "stride" of that character is infinity, a number
1999                                    able to throw you well beyond the end of the
2000                                    search.  It can also happen if you fail to match
2001                                    within the permitted region and would otherwise
2002                                    try a character beyond that region */
2003                                 if ((cursor - p_limit) * direction <= len)
2004                                         break;  /* a small overrun is genuine */
2005                                 cursor -= infinity;     /* large overrun = hit */
2006                                 i = dirlen - direction;
2007                                 if (!NILP(trt)) {
2008                                         while ((i -=
2009                                                 direction) + direction != 0) {
2010 #ifdef MULE
2011                                                 Emchar ch;
2012                                                 cursor -= direction;
2013                                                 /* Translate only the last byte of a character.  */
2014                                                 if ((cursor == tail_end_ptr
2015                                                      ||
2016                                                      BUFBYTE_FIRST_BYTE_P(cursor
2017                                                                           [1]))
2018                                                     &&
2019                                                     (BUFBYTE_FIRST_BYTE_P
2020                                                      (cursor[0])
2021                                                      || (translate_prev_byte ==
2022                                                          cursor[-1]
2023                                                          &&
2024                                                          (BUFBYTE_FIRST_BYTE_P
2025                                                           (translate_prev_byte)
2026                                                           ||
2027                                                           translate_anteprev_byte
2028                                                           == cursor[-2]))))
2029                                                         ch = simple_translate
2030                                                             [*cursor];
2031                                                 else
2032                                                         ch = *cursor;
2033                                                 if (pat[i] != ch)
2034                                                         break;
2035 #else
2036                                                 if (pat[i] !=
2037                                                     TRANSLATE(trt,
2038                                                               *(cursor -=
2039                                                                 direction)))
2040                                                         break;
2041 #endif
2042                                         }
2043                                 } else {
2044                                         while ((i -=
2045                                                 direction) + direction != 0)
2046                                                 if (pat[i] !=
2047                                                     *(cursor -= direction))
2048                                                         break;
2049                                 }
2050                                 cursor += dirlen - i - direction;       /* fix cursor */
2051                                 if (i + direction == 0) {
2052                                         cursor -= direction;
2053
2054                                         {
2055                                                 Bytind bytstart =
2056                                                     (pos + cursor - ptr2 +
2057                                                      ((direction > 0)
2058                                                       ? 1 - len : 0));
2059                                                 Bufpos bufstart =
2060                                                     bytind_to_bufpos(buf,
2061                                                                      bytstart);
2062                                                 Bufpos bufend =
2063                                                     bytind_to_bufpos(buf,
2064                                                                      bytstart +
2065                                                                      len);
2066
2067                                                 set_search_regs(buf, bufstart,
2068                                                                 bufend -
2069                                                                 bufstart);
2070                                                 clear_unused_search_regs
2071                                                     (&search_regs, 0);
2072                                         }
2073
2074                                         if ((n -= direction) != 0)
2075                                                 cursor += dirlen;       /* to resume search */
2076                                         else
2077                                                 return ((direction > 0)
2078                                                         ? search_regs.
2079                                                         end[0] : search_regs.
2080                                                         start[0]);
2081                                 } else
2082                                         cursor += stride_for_teases;    /* <sigh> we lose -  */
2083                         }
2084                         pos += cursor - ptr2;
2085                 } else
2086                         /* Now we'll pick up a clump that has to be done the hard
2087                            way because it covers a discontinuity */
2088                 {
2089                         /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
2090                            have changed.  See buffer.h. */
2091                         limit = ((direction > 0)
2092                                  ? BI_BUF_CEILING_OF(buf, pos - dirlen + 1) - 1
2093                                  : BI_BUF_FLOOR_OF(buf, pos - dirlen));
2094                         limit = ((direction > 0)
2095                                  ? min(limit + len, lim - 1)
2096                                  : max(limit - len, lim));
2097                         /* LIMIT is now the last value POS can have
2098                            and still be valid for a possible match.  */
2099                         while (1) {
2100                                 /* This loop can be coded for space rather than
2101                                    speed because it will usually run only once.
2102                                    (the reach is at most len + 21, and typically
2103                                    does not exceed len) */
2104                                 while ((limit - pos) * direction >= 0)
2105                                         /* *not* BI_BUF_FETCH_CHAR.  We are working here
2106                                            with bytes, not characters. */
2107                                         pos +=
2108                                             BM_tab[*BI_BUF_BYTE_ADDRESS
2109                                                    (buf, pos)];
2110                                 /* now run the same tests to distinguish going off
2111                                    the end, a match or a phony match. */
2112                                 if ((pos - limit) * direction <= len)
2113                                         break;  /* ran off the end */
2114                                 /* Found what might be a match.
2115                                    Set POS back to last (first if reverse) char pos.  */
2116                                 pos -= infinity;
2117                                 i = dirlen - direction;
2118                                 while ((i -= direction) + direction != 0) {
2119 #ifdef MULE
2120                                         Emchar ch;
2121                                         Bufbyte *ptr;
2122 #endif
2123                                         pos -= direction;
2124 #ifdef MULE
2125                                         ptr = BI_BUF_BYTE_ADDRESS(buf, pos);
2126                                         if ((ptr == tail_end_ptr
2127                                              || BUFBYTE_FIRST_BYTE_P(ptr[1]))
2128                                             && (BUFBYTE_FIRST_BYTE_P(ptr[0])
2129                                                 || (translate_prev_byte ==
2130                                                     ptr[-1]
2131                                                     &&
2132                                                     (BUFBYTE_FIRST_BYTE_P
2133                                                      (translate_prev_byte)
2134                                                      || translate_anteprev_byte
2135                                                      == ptr[-2]))))
2136                                                 ch = simple_translate[*ptr];
2137                                         else
2138                                                 ch = *ptr;
2139                                         if (pat[i] != ch)
2140                                                 break;
2141
2142 #else
2143                                         if (pat[i] != TRANSLATE(trt,
2144                                                                 *BI_BUF_BYTE_ADDRESS
2145                                                                 (buf, pos)))
2146                                                 break;
2147 #endif
2148                                 }
2149                                 /* Above loop has moved POS part or all the way back
2150                                    to the first char pos (last char pos if reverse).
2151                                    Set it once again at the last (first if reverse)
2152                                    char.  */
2153                                 pos += dirlen - i - direction;
2154                                 if (i + direction == 0) {
2155                                         pos -= direction;
2156
2157                                         {
2158                                                 Bytind bytstart = (pos +
2159                                                                    ((direction >
2160                                                                      0)
2161                                                                     ? 1 -
2162                                                                     len : 0));
2163                                                 Bufpos bufstart =
2164                                                     bytind_to_bufpos(buf,
2165                                                                      bytstart);
2166                                                 Bufpos bufend =
2167                                                     bytind_to_bufpos(buf,
2168                                                                      bytstart +
2169                                                                      len);
2170
2171                                                 set_search_regs(buf, bufstart,
2172                                                                 bufend -
2173                                                                 bufstart);
2174                                                 clear_unused_search_regs
2175                                                     (&search_regs, 0);
2176                                         }
2177
2178                                         if ((n -= direction) != 0)
2179                                                 pos += dirlen;  /* to resume search */
2180                                         else
2181                                                 return ((direction > 0)
2182                                                         ? search_regs.
2183                                                         end[0] : search_regs.
2184                                                         start[0]);
2185                                 } else
2186                                         pos += stride_for_teases;
2187                         }
2188                 }
2189                 /* We have done one clump.  Can we continue? */
2190                 if ((lim - pos) * direction < 0)
2191                         return (0 - n) * direction;
2192         }
2193         return bytind_to_bufpos(buf, pos);
2194 }
2195
2196 /* Record the whole-match data (beginning BEG and end BEG + LEN) and the
2197    buffer for a match just found.  */
2198
2199 static void set_search_regs(struct buffer *buf, Bufpos beg, Charcount len)
2200 {
2201         /* This function has been Mule-ized. */
2202         /* Make sure we have registers in which to store
2203            the match position.  */
2204         if (search_regs.num_regs == 0) {
2205                 search_regs.start = xnew_atomic(regoff_t);
2206                 search_regs.end = xnew_atomic(regoff_t);
2207                 search_regs.num_regs = 1;
2208         }
2209
2210         search_regs.start[0] = beg;
2211         search_regs.end[0] = beg + len;
2212         XSETBUFFER(last_thing_searched, buf);
2213 }
2214
2215 /* Clear unused search registers so match data will be null.
2216    REGP is a pointer to the register structure to clear, usually the global
2217    search_regs.
2218    NO_SUB is the number of subexpressions to allow for.  (Does not count
2219    the whole match, ie, for a string search NO_SUB == 0.)
2220    It is an error if NO_SUB > REGP.num_regs - 1. */
2221
2222 static void clear_unused_search_regs(struct re_registers *regp, int no_sub)
2223 {
2224         /* This function has been Mule-ized. */
2225         int i;
2226
2227         assert(no_sub >= 0 && no_sub < regp->num_regs);
2228         for (i = no_sub + 1; i < regp->num_regs; i++)
2229                 regp->start[i] = regp->end[i] = -1;
2230 }
2231 \f
2232 /* Given a string of words separated by word delimiters,
2233    compute a regexp that matches those exact words
2234    separated by arbitrary punctuation.  */
2235
2236 static Lisp_Object wordify(Lisp_Object buffer, Lisp_Object string)
2237 {
2238         Charcount i, len;
2239         EMACS_INT punct_count = 0, word_count = 0;
2240         struct buffer *buf = decode_buffer(buffer, 0);
2241         Lisp_Char_Table *syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
2242
2243         CHECK_STRING(string);
2244         len = XSTRING_CHAR_LENGTH(string);
2245
2246         for (i = 0; i < len; i++)
2247                 if (!WORD_SYNTAX_P
2248                     (syntax_table, string_char(XSTRING(string), i))) {
2249                         punct_count++;
2250                         if (i > 0 && WORD_SYNTAX_P(syntax_table,
2251                                                    string_char(XSTRING(string),
2252                                                                i - 1)))
2253                                 word_count++;
2254                 }
2255         if (WORD_SYNTAX_P(syntax_table, string_char(XSTRING(string), len - 1)))
2256                 word_count++;
2257         if (!word_count)
2258                 return build_string("");
2259
2260         {
2261                 /* The following value is an upper bound on the amount of storage we
2262                    need.  In non-Mule, it is exact. */
2263                 Bufbyte *storage =
2264                     (Bufbyte *) alloca(XSTRING_LENGTH(string) - punct_count +
2265                                        5 * (word_count - 1) + 4);
2266                 Bufbyte *o = storage;
2267
2268                 *o++ = '\\';
2269                 *o++ = 'b';
2270
2271                 for (i = 0; i < len; i++) {
2272                         Emchar ch = string_char(XSTRING(string), i);
2273
2274                         if (WORD_SYNTAX_P(syntax_table, ch))
2275                                 o += set_charptr_emchar(o, ch);
2276                         else if (i > 0
2277                                  && WORD_SYNTAX_P(syntax_table,
2278                                                   string_char(XSTRING(string),
2279                                                               i - 1))
2280                                  && --word_count) {
2281                                 *o++ = '\\';
2282                                 *o++ = 'W';
2283                                 *o++ = '\\';
2284                                 *o++ = 'W';
2285                                 *o++ = '*';
2286                         }
2287                 }
2288
2289                 *o++ = '\\';
2290                 *o++ = 'b';
2291
2292                 return make_string(storage, o - storage);
2293         }
2294 }
2295 \f
2296 DEFUN("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ",  /*
2297 Search backward from point for STRING.
2298 Set point to the beginning of the occurrence found, and return point.
2299
2300 Optional second argument LIMIT bounds the search; it is a buffer
2301 position.  The match found must not extend before that position.
2302 The value nil is equivalent to (point-min).
2303
2304 Optional third argument NOERROR, if t, means just return nil (no
2305 error) if the search fails.  If neither nil nor t, set point to LIMIT
2306 and return nil.
2307
2308 Optional fourth argument COUNT is a repeat count--search for
2309 successive occurrences.
2310
2311 Optional fifth argument BUFFER specifies the buffer to search in and
2312 defaults to the current buffer.
2313
2314 See also the functions `match-beginning', `match-end' and `replace-match'.
2315 */
2316       (string, limit, noerror, count, buffer))
2317 {
2318         return search_command(string, limit, noerror, count, buffer, -1, 0, 0);
2319 }
2320
2321 DEFUN("search-forward", Fsearch_forward, 1, 5, "sSearch: ",     /*
2322 Search forward from point for STRING.
2323 Set point to the end of the occurrence found, and return point.
2324
2325 Optional second argument LIMIT bounds the search; it is a buffer
2326 position.  The match found must not extend after that position.  The
2327 value nil is equivalent to (point-max).
2328
2329 Optional third argument NOERROR, if t, means just return nil (no
2330 error) if the search fails.  If neither nil nor t, set point to LIMIT
2331 and return nil.
2332
2333 Optional fourth argument COUNT is a repeat count--search for
2334 successive occurrences.
2335
2336 Optional fifth argument BUFFER specifies the buffer to search in and
2337 defaults to the current buffer.
2338
2339 See also the functions `match-beginning', `match-end' and `replace-match'.
2340 */
2341       (string, limit, noerror, count, buffer))
2342 {
2343         return search_command(string, limit, noerror, count, buffer, 1, 0, 0);
2344 }
2345
2346 DEFUN("word-search-backward", Fword_search_backward, 1, 5, "sWord search backward: ",   /*
2347 Search backward from point for STRING, ignoring differences in punctuation.
2348 Set point to the beginning of the occurrence found, and return point.
2349
2350 Optional second argument LIMIT bounds the search; it is a buffer
2351 position.  The match found must not extend before that position.
2352 The value nil is equivalent to (point-min).
2353
2354 Optional third argument NOERROR, if t, means just return nil (no
2355 error) if the search fails.  If neither nil nor t, set point to LIMIT
2356 and return nil.
2357
2358 Optional fourth argument COUNT is a repeat count--search for
2359 successive occurrences.
2360
2361 Optional fifth argument BUFFER specifies the buffer to search in and
2362 defaults to the current buffer.
2363
2364 See also the functions `match-beginning', `match-end' and `replace-match'.
2365 */
2366       (string, limit, noerror, count, buffer))
2367 {
2368         return search_command(wordify(buffer, string), limit, noerror, count,
2369                               buffer, -1, 1, 0);
2370 }
2371
2372 DEFUN("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ",      /*
2373 Search forward from point for STRING, ignoring differences in punctuation.
2374 Set point to the end of the occurrence found, and return point.
2375
2376 Optional second argument LIMIT bounds the search; it is a buffer
2377 position.  The match found must not extend after that position.  The
2378 value nil is equivalent to (point-max).
2379
2380 Optional third argument NOERROR, if t, means just return nil (no
2381 error) if the search fails.  If neither nil nor t, set point to LIMIT
2382 and return nil.
2383
2384 Optional fourth argument COUNT is a repeat count--search for
2385 successive occurrences.
2386
2387 Optional fifth argument BUFFER specifies the buffer to search in and
2388 defaults to the current buffer.
2389
2390 See also the functions `match-beginning', `match-end' and `replace-match'.
2391 */
2392       (string, limit, noerror, count, buffer))
2393 {
2394         return search_command(wordify(buffer, string), limit, noerror, count,
2395                               buffer, 1, 1, 0);
2396 }
2397
2398 DEFUN("re-search-backward", Fre_search_backward, 1, 5, "sRE search backward: ", /*
2399 Search backward from point for match for regular expression REGEXP.
2400 Set point to the beginning of the match, and return point.
2401 The match found is the one starting last in the buffer
2402 and yet ending before the origin of the search.
2403
2404 Optional second argument LIMIT bounds the search; it is a buffer
2405 position.  The match found must not extend before that position.
2406 The value nil is equivalent to (point-min).
2407
2408 Optional third argument NOERROR, if t, means just return nil (no
2409 error) if the search fails.  If neither nil nor t, set point to LIMIT
2410 and return nil.
2411
2412 Optional fourth argument COUNT is a repeat count--search for
2413 successive occurrences.
2414
2415 Optional fifth argument BUFFER specifies the buffer to search in and
2416 defaults to the current buffer.
2417
2418 See also the functions `match-beginning', `match-end' and `replace-match'.
2419 */
2420       (regexp, limit, noerror, count, buffer))
2421 {
2422         return search_command(regexp, limit, noerror, count, buffer, -1, 1, 0);
2423 }
2424
2425 DEFUN("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ",    /*
2426 Search forward from point for regular expression REGEXP.
2427 Set point to the end of the occurrence found, and return point.
2428
2429 Optional second argument LIMIT bounds the search; it is a buffer
2430 position.  The match found must not extend after that position.  The
2431 value nil is equivalent to (point-max).
2432
2433 Optional third argument NOERROR, if t, means just return nil (no
2434 error) if the search fails.  If neither nil nor t, set point to LIMIT
2435 and return nil.
2436
2437 Optional fourth argument COUNT is a repeat count--search for
2438 successive occurrences.
2439
2440 Optional fifth argument BUFFER specifies the buffer to search in and
2441 defaults to the current buffer.
2442
2443 See also the functions `match-beginning', `match-end' and `replace-match'.
2444 */
2445       (regexp, limit, noerror, count, buffer))
2446 {
2447         return search_command(regexp, limit, noerror, count, buffer, 1, 1, 0);
2448 }
2449
2450 DEFUN("posix-search-backward", Fposix_search_backward, 1, 5, "sPosix search backward: ",        /*
2451 Search backward from point for match for regular expression REGEXP.
2452 Find the longest match in accord with Posix regular expression rules.
2453 Set point to the beginning of the match, and return point.
2454 The match found is the one starting last in the buffer
2455 and yet ending before the origin of the search.
2456
2457 Optional second argument LIMIT bounds the search; it is a buffer
2458 position.  The match found must not extend before that position.
2459 The value nil is equivalent to (point-min).
2460
2461 Optional third argument NOERROR, if t, means just return nil (no
2462 error) if the search fails.  If neither nil nor t, set point to LIMIT
2463 and return nil.
2464
2465 Optional fourth argument COUNT is a repeat count--search for
2466 successive occurrences.
2467
2468 Optional fifth argument BUFFER specifies the buffer to search in and
2469 defaults to the current buffer.
2470
2471 See also the functions `match-beginning', `match-end' and `replace-match'.
2472 */
2473       (regexp, limit, noerror, count, buffer))
2474 {
2475         return search_command(regexp, limit, noerror, count, buffer, -1, 1, 1);
2476 }
2477
2478 DEFUN("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ",   /*
2479 Search forward from point for regular expression REGEXP.
2480 Find the longest match in accord with Posix regular expression rules.
2481 Set point to the end of the occurrence found, and return point.
2482
2483 Optional second argument LIMIT bounds the search; it is a buffer
2484 position.  The match found must not extend after that position.  The
2485 value nil is equivalent to (point-max).
2486
2487 Optional third argument NOERROR, if t, means just return nil (no
2488 error) if the search fails.  If neither nil nor t, set point to LIMIT
2489 and return nil.
2490
2491 Optional fourth argument COUNT is a repeat count--search for
2492 successive occurrences.
2493
2494 Optional fifth argument BUFFER specifies the buffer to search in and
2495 defaults to the current buffer.
2496
2497 See also the functions `match-beginning', `match-end' and `replace-match'.
2498 */
2499       (regexp, limit, noerror, count, buffer))
2500 {
2501         return search_command(regexp, limit, noerror, count, buffer, 1, 1, 1);
2502 }
2503 \f
2504 static Lisp_Object free_created_dynarrs(Lisp_Object cons)
2505 {
2506         Dynarr_free(get_opaque_ptr(XCAR(cons)));
2507         Dynarr_free(get_opaque_ptr(XCDR(cons)));
2508         free_opaque_ptr(XCAR(cons));
2509         free_opaque_ptr(XCDR(cons));
2510         free_cons(XCONS(cons));
2511         return Qnil;
2512 }
2513
2514 DEFUN("replace-match", Freplace_match, 1, 5, 0, /*
2515 Replace text matched by last search with REPLACEMENT.
2516 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2517 Otherwise maybe capitalize the whole text, or maybe just word initials,
2518 based on the replaced text.
2519 If the replaced text has only capital letters
2520 and has at least one multiletter word, convert REPLACEMENT to all caps.
2521 If the replaced text has at least one word starting with a capital letter,
2522 then capitalize each word in REPLACEMENT.
2523
2524 If third arg LITERAL is non-nil, insert REPLACEMENT literally.
2525 Otherwise treat `\\' as special:
2526 `\\&' in REPLACEMENT means substitute original matched text.
2527 `\\N' means substitute what matched the Nth `\\(...\\)'.
2528 If Nth parens didn't match, substitute nothing.
2529 `\\\\' means insert one `\\'.
2530 `\\u' means upcase the next character.
2531 `\\l' means downcase the next character.
2532 `\\U' means begin upcasing all following characters.
2533 `\\L' means begin downcasing all following characters.
2534 `\\E' means terminate the effect of any `\\U' or `\\L'.
2535 Case changes made with `\\u', `\\l', `\\U', and `\\L' override
2536 all other case changes that may be made in the replaced text.
2537 FIXEDCASE and LITERAL are optional arguments.
2538 Leaves point at end of replacement text.
2539
2540 The optional fourth argument STRING can be a string to modify.
2541 In that case, this function creates and returns a new string
2542 which is made by replacing the part of STRING that was matched.
2543 When fourth argument is a string, fifth argument STRBUFFER specifies
2544 the buffer to be used for syntax-table and case-table lookup and
2545 defaults to the current buffer.  When fourth argument is not a string,
2546 the buffer that the match occurred in has automatically been remembered
2547 and you do not need to specify it.
2548
2549 When fourth argument is nil, STRBUFFER specifies a subexpression of
2550 the match.  It says to replace just that subexpression instead of the
2551 whole match.  This is useful only after a regular expression search or
2552 match since only regular expressions have distinguished subexpressions.
2553 */
2554       (replacement, fixedcase, literal, string, strbuffer))
2555 {
2556         /* This function has been Mule-ized. */
2557         /* This function can GC */
2558         enum { nochange, all_caps, cap_initial } case_action;
2559         Bufpos pos, last;
2560         int some_multiletter_word;
2561         int some_lowercase;
2562         int some_uppercase;
2563         int some_nonuppercase_initial;
2564         Emchar c, prevc;
2565         Charcount inslen;
2566         struct buffer *buf;
2567         Lisp_Char_Table *syntax_table;
2568         int mc_count;
2569         Lisp_Object buffer;
2570         int_dynarr *ul_action_dynarr = 0;
2571         int_dynarr *ul_pos_dynarr = 0;
2572         int sub = 0;
2573         int speccount;
2574
2575         CHECK_STRING(replacement);
2576
2577         if (!NILP(string)) {
2578                 CHECK_STRING(string);
2579                 if (!EQ(last_thing_searched, Qt))
2580                         error("last thing matched was not a string");
2581                 /* If the match data
2582                    were abstracted into a special "match data" type instead
2583                    of the typical half-assed "let the implementation be
2584                    visible" form it's in, we could extend it to include
2585                    the last string matched and the buffer used for that
2586                    matching.  But of course we can't change it as it is. */
2587                 buf = decode_buffer(strbuffer, 0);
2588                 XSETBUFFER(buffer, buf);
2589         } else {
2590                 if (!NILP(strbuffer)) {
2591                         CHECK_INT(strbuffer);
2592                         sub = XINT(strbuffer);
2593                         if (sub < 0 || sub >= (int)search_regs.num_regs)
2594                                 args_out_of_range(strbuffer,
2595                                                   make_int(search_regs.
2596                                                            num_regs));
2597                 }
2598                 if (!BUFFERP(last_thing_searched))
2599                         error("last thing matched was not a buffer");
2600                 buffer = last_thing_searched;
2601                 buf = XBUFFER(buffer);
2602         }
2603
2604         syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
2605
2606         case_action = nochange; /* We tried an initialization */
2607         /* but some C compilers blew it */
2608
2609         if (search_regs.num_regs == 0)
2610                 error("replace-match called before any match found");
2611
2612         if (NILP(string)) {
2613                 if (search_regs.start[sub] < BUF_BEGV(buf)
2614                     || search_regs.start[sub] > search_regs.end[sub]
2615                     || search_regs.end[sub] > BUF_ZV(buf))
2616                         args_out_of_range(make_int(search_regs.start[sub]),
2617                                           make_int(search_regs.end[sub]));
2618         } else {
2619                 if (search_regs.start[0] < 0
2620                     || search_regs.start[0] > search_regs.end[0]
2621                     || search_regs.end[0] > XSTRING_CHAR_LENGTH(string))
2622                         args_out_of_range(make_int(search_regs.start[0]),
2623                                           make_int(search_regs.end[0]));
2624         }
2625
2626         if (NILP(fixedcase)) {
2627                 /* Decide how to casify by examining the matched text. */
2628
2629                 last = search_regs.end[sub];
2630                 prevc = '\n';
2631                 case_action = all_caps;
2632
2633                 /* some_multiletter_word is set nonzero if any original word
2634                    is more than one letter long. */
2635                 some_multiletter_word = 0;
2636                 some_lowercase = 0;
2637                 some_nonuppercase_initial = 0;
2638                 some_uppercase = 0;
2639
2640                 for (pos = search_regs.start[sub]; pos < last; pos++) {
2641                         if (NILP(string))
2642                                 c = BUF_FETCH_CHAR(buf, pos);
2643                         else
2644                                 c = string_char(XSTRING(string), pos);
2645
2646                         if (LOWERCASEP(buf, c)) {
2647                                 /* Cannot be all caps if any original char is lower case */
2648
2649                                 some_lowercase = 1;
2650                                 if (!WORD_SYNTAX_P(syntax_table, prevc))
2651                                         some_nonuppercase_initial = 1;
2652                                 else
2653                                         some_multiletter_word = 1;
2654                         } else if (!NOCASEP(buf, c)) {
2655                                 some_uppercase = 1;
2656                                 if (!WORD_SYNTAX_P(syntax_table, prevc)) ;
2657                                 else
2658                                         some_multiletter_word = 1;
2659                         } else {
2660                                 /* If the initial is a caseless word constituent,
2661                                    treat that like a lowercase initial.  */
2662                                 if (!WORD_SYNTAX_P(syntax_table, prevc))
2663                                         some_nonuppercase_initial = 1;
2664                         }
2665
2666                         prevc = c;
2667                 }
2668
2669                 /* Convert to all caps if the old text is all caps
2670                    and has at least one multiletter word.  */
2671                 if (!some_lowercase && some_multiletter_word)
2672                         case_action = all_caps;
2673                 /* Capitalize each word, if the old text has all capitalized words.  */
2674                 else if (!some_nonuppercase_initial && some_multiletter_word)
2675                         case_action = cap_initial;
2676                 else if (!some_nonuppercase_initial && some_uppercase)
2677                         /* Should x -> yz, operating on X, give Yz or YZ?
2678                            We'll assume the latter.  */
2679                         case_action = all_caps;
2680                 else
2681                         case_action = nochange;
2682         }
2683
2684         /* Do replacement in a string.  */
2685         if (!NILP(string)) {
2686                 Lisp_Object before, after;
2687
2688                 speccount = specpdl_depth();
2689                 before =
2690                     Fsubstring(string, Qzero, make_int(search_regs.start[0]));
2691                 after = Fsubstring(string, make_int(search_regs.end[0]), Qnil);
2692
2693                 /* Do case substitution into REPLACEMENT if desired.  */
2694                 if (NILP(literal)) {
2695                         Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2696                         Charcount strpos;
2697                         /* XEmacs change: rewrote this loop somewhat to make it
2698                            cleaner.  Also added \U, \E, etc. */
2699                         Charcount literal_start = 0;
2700                         /* We build up the substituted string in ACCUM.  */
2701                         Lisp_Object accum;
2702
2703                         accum = Qnil;
2704
2705                         /* OK, the basic idea here is that we scan through the
2706                            replacement string until we find a backslash, which
2707                            represents a substring of the original string to be
2708                            substituted.  We then append onto ACCUM the literal
2709                            text before the backslash (LASTPOS marks the
2710                            beginning of this) followed by the substring of the
2711                            original string that needs to be inserted. */
2712                         for (strpos = 0; strpos < stlen; strpos++) {
2713                                 /* If LITERAL_END is set, we've encountered a backslash
2714                                    (the end of literal text to be inserted). */
2715                                 Charcount literal_end = -1;
2716                                 /* If SUBSTART is set, we need to also insert the
2717                                    text from SUBSTART to SUBEND in the original string. */
2718                                 Charcount substart = -1;
2719                                 Charcount subend = -1;
2720
2721                                 c = string_char(XSTRING(replacement), strpos);
2722                                 if (c == '\\' && strpos < stlen - 1) {
2723                                         c = string_char(XSTRING(replacement),
2724                                                         ++strpos);
2725                                         if (c == '&') {
2726                                                 literal_end = strpos - 1;
2727                                                 substart = search_regs.start[0];
2728                                                 subend = search_regs.end[0];
2729                                         } else if (c >= '1' && c <= '9' &&
2730                                                    c <=
2731                                                    search_regs.num_regs + '0') {
2732                                                 if (search_regs.
2733                                                     start[c - '0'] >= 0) {
2734                                                         literal_end =
2735                                                             strpos - 1;
2736                                                         substart =
2737                                                             search_regs.
2738                                                             start[c - '0'];
2739                                                         subend =
2740                                                             search_regs.end[c -
2741                                                                             '0'];
2742                                                 }
2743                                         } else if (c == 'U' || c == 'u'
2744                                                    || c == 'L' || c == 'l'
2745                                                    || c == 'E') {
2746                                                 /* Keep track of all case changes requested, but don't
2747                                                    make them now.  Do them later so we override
2748                                                    everything else. */
2749                                                 if (!ul_pos_dynarr) {
2750                                                         ul_pos_dynarr =
2751                                                             Dynarr_new(int);
2752                                                         ul_action_dynarr =
2753                                                             Dynarr_new(int);
2754                                                         record_unwind_protect
2755                                                             (free_created_dynarrs,
2756                                                              noseeum_cons
2757                                                              (make_opaque_ptr
2758                                                               (ul_pos_dynarr),
2759                                                               make_opaque_ptr
2760                                                               (ul_action_dynarr)));
2761                                                 }
2762                                                 literal_end = strpos - 1;
2763                                                 Dynarr_add(ul_pos_dynarr,
2764                                                            (!NILP(accum)
2765                                                             ?
2766                                                             XSTRING_CHAR_LENGTH
2767                                                             (accum)
2768                                                             : 0) +
2769                                                            (literal_end -
2770                                                             literal_start));
2771                                                 Dynarr_add(ul_action_dynarr, c);
2772                                         } else if (c == '\\')
2773                                                 /* So we get just one backslash. */
2774                                                 literal_end = strpos;
2775                                 }
2776                                 if (literal_end >= 0) {
2777                                         Lisp_Object literal_text = Qnil;
2778                                         Lisp_Object substring = Qnil;
2779                                         if (literal_end != literal_start)
2780                                                 literal_text =
2781                                                     Fsubstring(replacement,
2782                                                                make_int
2783                                                                (literal_start),
2784                                                                make_int
2785                                                                (literal_end));
2786                                         if (substart >= 0 && subend != substart)
2787                                                 substring = Fsubstring(string,
2788                                                                        make_int
2789                                                                        (substart),
2790                                                                        make_int
2791                                                                        (subend));
2792                                         if (!NILP(literal_text)
2793                                             || !NILP(substring))
2794                                                 accum =
2795                                                     concat3(accum, literal_text,
2796                                                             substring);
2797                                         literal_start = strpos + 1;
2798                                 }
2799                         }
2800
2801                         if (strpos != literal_start)
2802                                 /* some literal text at end to be inserted */
2803                                 replacement =
2804                                     concat2(accum,
2805                                             Fsubstring(replacement,
2806                                                        make_int(literal_start),
2807                                                        make_int(strpos)));
2808                         else
2809                                 replacement = accum;
2810                 }
2811
2812                 /* replacement can be nil. */
2813                 if (NILP(replacement))
2814                         replacement = build_string("");
2815
2816                 if (case_action == all_caps)
2817                         replacement = Fupcase(replacement, buffer);
2818                 else if (case_action == cap_initial)
2819                         replacement = Fupcase_initials(replacement, buffer);
2820
2821                 /* Now finally, we need to process the \U's, \E's, etc. */
2822                 if (ul_pos_dynarr) {
2823                         int i = 0;
2824                         int cur_action = 'E';
2825                         Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2826                         Charcount strpos;
2827
2828                         for (strpos = 0; strpos < stlen; strpos++) {
2829                                 Emchar curchar =
2830                                     string_char(XSTRING(replacement), strpos);
2831                                 Emchar newchar = -1;
2832                                 if (i < Dynarr_length(ul_pos_dynarr) &&
2833                                     strpos == Dynarr_at(ul_pos_dynarr, i)) {
2834                                         int new_action =
2835                                             Dynarr_at(ul_action_dynarr, i);
2836                                         i++;
2837                                         if (new_action == 'u')
2838                                                 newchar = UPCASE(buf, curchar);
2839                                         else if (new_action == 'l')
2840                                                 newchar =
2841                                                     DOWNCASE(buf, curchar);
2842                                         else
2843                                                 cur_action = new_action;
2844                                 }
2845                                 if (newchar == -1) {
2846                                         if (cur_action == 'U')
2847                                                 newchar = UPCASE(buf, curchar);
2848                                         else if (cur_action == 'L')
2849                                                 newchar =
2850                                                     DOWNCASE(buf, curchar);
2851                                         else
2852                                                 newchar = curchar;
2853                                 }
2854                                 if (newchar != curchar)
2855                                         set_string_char(XSTRING(replacement),
2856                                                         strpos, newchar);
2857                         }
2858                 }
2859
2860                 /* frees the Dynarrs if necessary. */
2861                 unbind_to(speccount, Qnil);
2862                 return concat3(before, replacement, after);
2863         }
2864
2865         mc_count = begin_multiple_change(buf, search_regs.start[sub],
2866                                          search_regs.end[sub]);
2867
2868         /* begin_multiple_change() records an unwind-protect, so we need to
2869            record this value now. */
2870         speccount = specpdl_depth();
2871
2872         /* We insert the replacement text before the old text, and then
2873            delete the original text.  This means that markers at the
2874            beginning or end of the original will float to the corresponding
2875            position in the replacement.  */
2876         BUF_SET_PT(buf, search_regs.start[sub]);
2877         if (!NILP(literal))
2878                 Finsert(1, &replacement);
2879         else {
2880                 Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2881                 Charcount strpos;
2882                 struct gcpro gcpro1;
2883                 GCPRO1(replacement);
2884                 for (strpos = 0; strpos < stlen; strpos++) {
2885                         /* on the first iteration assert(offset==0),
2886                            exactly complementing BUF_SET_PT() above.
2887                            During the loop, it keeps track of the amount inserted.
2888                          */
2889                         Charcount offset = BUF_PT(buf) - search_regs.start[sub];
2890
2891                         c = string_char(XSTRING(replacement), strpos);
2892                         if (c == '\\' && strpos < stlen - 1) {
2893                                 /* XXX FIXME: replacing just a substring non-literally
2894                                    using backslash refs to the match looks dangerous.  But
2895                                    <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe
2896                                    <duwe@caldera.de> claims Finsert_buffer_substring already
2897                                    handles this correctly.
2898                                  */
2899                                 c = string_char(XSTRING(replacement), ++strpos);
2900                                 if (c == '&')
2901                                         Finsert_buffer_substring
2902                                             (buffer,
2903                                              make_int(search_regs.start[0] +
2904                                                       offset),
2905                                              make_int(search_regs.end[0] +
2906                                                       offset));
2907                                 else if (c >= '1' && c <= '9'
2908                                          && c <= search_regs.num_regs + '0') {
2909                                         if (search_regs.start[c - '0'] >= 1)
2910                                                 Finsert_buffer_substring
2911                                                     (buffer,
2912                                                      make_int(search_regs.
2913                                                               start[c - '0'] +
2914                                                               offset),
2915                                                      make_int(search_regs.
2916                                                               end[c - '0'] +
2917                                                               offset));
2918                                 } else if (c == 'U' || c == 'u' || c == 'L'
2919                                            || c == 'l' || c == 'E') {
2920                                         /* Keep track of all case changes requested, but don't
2921                                            make them now.  Do them later so we override
2922                                            everything else. */
2923                                         if (!ul_pos_dynarr) {
2924                                                 ul_pos_dynarr = Dynarr_new(int);
2925                                                 ul_action_dynarr =
2926                                                     Dynarr_new(int);
2927                                                 record_unwind_protect
2928                                                     (free_created_dynarrs,
2929                                                      Fcons(make_opaque_ptr
2930                                                            (ul_pos_dynarr),
2931                                                            make_opaque_ptr
2932                                                            (ul_action_dynarr)));
2933                                         }
2934                                         Dynarr_add(ul_pos_dynarr, BUF_PT(buf));
2935                                         Dynarr_add(ul_action_dynarr, c);
2936                                 } else
2937                                         buffer_insert_emacs_char(buf, c);
2938                         } else
2939                                 buffer_insert_emacs_char(buf, c);
2940                 }
2941                 UNGCPRO;
2942         }
2943
2944         inslen = BUF_PT(buf) - (search_regs.start[sub]);
2945         buffer_delete_range(buf, search_regs.start[sub] + inslen,
2946                             search_regs.end[sub] + inslen, 0);
2947
2948         if (case_action == all_caps)
2949                 Fupcase_region(make_int(BUF_PT(buf) - inslen),
2950                                make_int(BUF_PT(buf)), buffer);
2951         else if (case_action == cap_initial)
2952                 Fupcase_initials_region(make_int(BUF_PT(buf) - inslen),
2953                                         make_int(BUF_PT(buf)), buffer);
2954
2955         /* Now go through and make all the case changes that were requested
2956            in the replacement string. */
2957         if (ul_pos_dynarr) {
2958                 Bufpos eend = BUF_PT(buf);
2959                 int i = 0;
2960                 int cur_action = 'E';
2961
2962                 for (pos = BUF_PT(buf) - inslen; pos < eend; pos++) {
2963                         Emchar curchar = BUF_FETCH_CHAR(buf, pos);
2964                         Emchar newchar = -1;
2965                         if (i < Dynarr_length(ul_pos_dynarr) &&
2966                             pos == Dynarr_at(ul_pos_dynarr, i)) {
2967                                 int new_action = Dynarr_at(ul_action_dynarr, i);
2968                                 i++;
2969                                 if (new_action == 'u')
2970                                         newchar = UPCASE(buf, curchar);
2971                                 else if (new_action == 'l')
2972                                         newchar = DOWNCASE(buf, curchar);
2973                                 else
2974                                         cur_action = new_action;
2975                         }
2976                         if (newchar == -1) {
2977                                 if (cur_action == 'U')
2978                                         newchar = UPCASE(buf, curchar);
2979                                 else if (cur_action == 'L')
2980                                         newchar = DOWNCASE(buf, curchar);
2981                                 else
2982                                         newchar = curchar;
2983                         }
2984                         if (newchar != curchar)
2985                                 buffer_replace_char(buf, pos, newchar, 0, 0);
2986                 }
2987         }
2988
2989         /* frees the Dynarrs if necessary. */
2990         unbind_to(speccount, Qnil);
2991         end_multiple_change(buf, mc_count);
2992
2993         return Qnil;
2994 }
2995 \f
2996 static Lisp_Object match_limit(Lisp_Object num, int beginningp)
2997 {
2998         /* This function has been Mule-ized. */
2999         int n;
3000
3001         CHECK_INT(num);
3002         n = XINT(num);
3003         if (n < 0 || search_regs.num_regs <= 0)
3004                 args_out_of_range(num, make_int(search_regs.num_regs));
3005         if (n >= search_regs.num_regs || search_regs.start[n] < 0)
3006                 return Qnil;
3007         return make_int(beginningp ? search_regs.start[n] : search_regs.end[n]);
3008 }
3009
3010 DEFUN("match-beginning", Fmatch_beginning, 1, 1, 0,     /*
3011 Return position of start of text matched by last regexp search.
3012 NUM, specifies which parenthesized expression in the last regexp.
3013 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
3014 Zero means the entire text matched by the whole regexp or whole string.
3015 */
3016       (num))
3017 {
3018         return match_limit(num, 1);
3019 }
3020
3021 DEFUN("match-end", Fmatch_end, 1, 1, 0, /*
3022 Return position of end of text matched by last regexp search.
3023 NUM specifies which parenthesized expression in the last regexp.
3024 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
3025 Zero means the entire text matched by the whole regexp or whole string.
3026 */
3027       (num))
3028 {
3029         return match_limit(num, 0);
3030 }
3031
3032 DEFUN("match-data", Fmatch_data, 0, 2, 0,       /*
3033 Return a list containing all info on what the last regexp search matched.
3034 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
3035 All the elements are markers or nil (nil if the Nth pair didn't match)
3036 if the last match was on a buffer; integers or nil if a string was matched.
3037 Use `store-match-data' to reinstate the data in this list.
3038
3039 If INTEGERS (the optional first argument) is non-nil, always use integers
3040 \(rather than markers) to represent buffer positions.
3041 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough
3042 to hold all the values, and if INTEGERS is non-nil, no consing is done.
3043 */
3044       (integers, reuse))
3045 {
3046         /* This function has been Mule-ized. */
3047         Lisp_Object tail, prev;
3048         Lisp_Object *data;
3049         int i;
3050         Charcount len;
3051
3052         if (NILP(last_thing_searched))
3053                 /*error ("match-data called before any match found"); */
3054                 return Qnil;
3055
3056         data = alloca_array(Lisp_Object, 2 * search_regs.num_regs);
3057
3058         len = -1;
3059         for (i = 0; i < search_regs.num_regs; i++) {
3060                 Bufpos start = search_regs.start[i];
3061                 if (start >= 0) {
3062                         if (EQ(last_thing_searched, Qt)
3063                             || !NILP(integers)) {
3064                                 data[2 * i] = make_int(start);
3065                                 data[2 * i + 1] = make_int(search_regs.end[i]);
3066                         } else if (BUFFERP(last_thing_searched)) {
3067                                 data[2 * i] = Fmake_marker();
3068                                 Fset_marker(data[2 * i],
3069                                             make_int(start),
3070                                             last_thing_searched);
3071                                 data[2 * i + 1] = Fmake_marker();
3072                                 Fset_marker(data[2 * i + 1],
3073                                             make_int(search_regs.end[i]),
3074                                             last_thing_searched);
3075                         } else
3076                                 /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
3077                                 abort();
3078
3079                         len = i;
3080                 } else
3081                         data[2 * i] = data[2 * i + 1] = Qnil;
3082         }
3083         if (!CONSP(reuse))
3084                 return Flist(2 * len + 2, data);
3085
3086         /* If REUSE is a list, store as many value elements as will fit
3087            into the elements of REUSE.  */
3088         for (prev = Qnil, i = 0, tail = reuse; CONSP(tail);
3089              i++, tail = XCDR(tail)) {
3090                 if (i < 2 * len + 2)
3091                         XCAR(tail) = data[i];
3092                 else
3093                         XCAR(tail) = Qnil;
3094                 prev = tail;
3095         }
3096
3097         /* If we couldn't fit all value elements into REUSE,
3098            cons up the rest of them and add them to the end of REUSE.  */
3099         if (i < 2 * len + 2)
3100                 XCDR(prev) = Flist(2 * len + 2 - i, data + i);
3101
3102         return reuse;
3103 }
3104
3105 DEFUN("store-match-data", Fstore_match_data, 1, 1, 0,   /*
3106 Set internal data on last search match from elements of LIST.
3107 LIST should have been created by calling `match-data' previously.
3108 */
3109       (list))
3110 {
3111         /* This function has been Mule-ized. */
3112         REGISTER int i;
3113         REGISTER Lisp_Object marker;
3114         int num_regs;
3115         int length;
3116
3117         if (running_asynch_code)
3118                 save_search_regs();
3119
3120         CONCHECK_LIST(list);
3121
3122         /* Unless we find a marker with a buffer in LIST, assume that this
3123            match data came from a string.  */
3124         last_thing_searched = Qt;
3125
3126         /* Allocate registers if they don't already exist.  */
3127         length = XINT(Flength(list)) / 2;
3128         num_regs = search_regs.num_regs;
3129
3130         if (length > num_regs) {
3131                 if (search_regs.num_regs == 0) {
3132                         search_regs.start = xnew_atomic_array(regoff_t, length);
3133                         search_regs.end = xnew_atomic_array(regoff_t, length);
3134                 } else {
3135                         XREALLOC_ARRAY(search_regs.start, regoff_t, length);
3136                         XREALLOC_ARRAY(search_regs.end, regoff_t, length);
3137                 }
3138
3139                 search_regs.num_regs = length;
3140         }
3141
3142         for (i = 0; i < num_regs; i++) {
3143                 marker = Fcar(list);
3144                 if (NILP(marker)) {
3145                         search_regs.start[i] = -1;
3146                         list = Fcdr(list);
3147                 } else {
3148                         if (MARKERP(marker)) {
3149                                 if (XMARKER(marker)->buffer == 0)
3150                                         marker = Qzero;
3151                                 else
3152                                         XSETBUFFER(last_thing_searched,
3153                                                    XMARKER(marker)->buffer);
3154                         }
3155
3156                         CHECK_INT_COERCE_MARKER(marker);
3157                         search_regs.start[i] = XINT(marker);
3158                         list = Fcdr(list);
3159
3160                         marker = Fcar(list);
3161                         if (MARKERP(marker) && XMARKER(marker)->buffer == 0)
3162                                 marker = Qzero;
3163
3164                         CHECK_INT_COERCE_MARKER(marker);
3165                         search_regs.end[i] = XINT(marker);
3166                 }
3167                 list = Fcdr(list);
3168         }
3169
3170         return Qnil;
3171 }
3172
3173 /* If non-zero the match data have been saved in saved_search_regs
3174    during the execution of a sentinel or filter. */
3175 static int search_regs_saved;
3176 static struct re_registers saved_search_regs;
3177
3178 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3179    if asynchronous code (filter or sentinel) is running. */
3180 static void save_search_regs(void)
3181 {
3182         if (!search_regs_saved) {
3183                 saved_search_regs.num_regs = search_regs.num_regs;
3184                 saved_search_regs.start = search_regs.start;
3185                 saved_search_regs.end = search_regs.end;
3186                 search_regs.num_regs = 0;
3187                 search_regs.start = 0;
3188                 search_regs.end = 0;
3189
3190                 search_regs_saved = 1;
3191         }
3192 }
3193
3194 /* Called upon exit from filters and sentinels. */
3195 void restore_match_data(void)
3196 {
3197         if (search_regs_saved) {
3198                 if (search_regs.num_regs > 0) {
3199                         xfree(search_regs.start);
3200                         xfree(search_regs.end);
3201                 }
3202                 search_regs.num_regs = saved_search_regs.num_regs;
3203                 search_regs.start = saved_search_regs.start;
3204                 search_regs.end = saved_search_regs.end;
3205
3206                 search_regs_saved = 0;
3207         }
3208 }
3209
3210 /* Quote a string to inactivate reg-expr chars */
3211
3212 DEFUN("regexp-quote", Fregexp_quote, 1, 1, 0,   /*
3213 Return a regexp string which matches exactly STRING and nothing else.
3214 */
3215       (string))
3216 {
3217         REGISTER Bufbyte *in, *out, *end;
3218         REGISTER Bufbyte *temp;
3219
3220         CHECK_STRING(string);
3221
3222         temp = (Bufbyte *) alloca(XSTRING_LENGTH(string) * 2);
3223
3224         /* Now copy the data into the new string, inserting escapes. */
3225
3226         in = XSTRING_DATA(string);
3227         end = in + XSTRING_LENGTH(string);
3228         out = temp;
3229
3230         while (in < end) {
3231                 Emchar c = charptr_emchar(in);
3232
3233                 if (c == '[' || c == ']'
3234                     || c == '*' || c == '.' || c == '\\'
3235                     || c == '?' || c == '+' || c == '^' || c == '$')
3236                         *out++ = '\\';
3237                 out += set_charptr_emchar(out, c);
3238                 INC_CHARPTR(in);
3239         }
3240
3241         return make_string(temp, out - temp);
3242 }
3243
3244 DEFUN("set-word-regexp", Fset_word_regexp, 1, 1, 0,     /*
3245 Set the regexp to be used to match a word in regular-expression searching.
3246 #### Not yet implemented.  Currently does nothing.
3247 #### Do not use this yet.  Its calling interface is likely to change.
3248 */
3249       (regexp))
3250 {
3251         return Qnil;
3252 }
3253 \f
3254 /************************************************************************/
3255 /*                            initialization                            */
3256 /************************************************************************/
3257
3258 void syms_of_search(void)
3259 {
3260
3261         DEFERROR_STANDARD(Qsearch_failed, Qinvalid_operation);
3262         DEFERROR_STANDARD(Qinvalid_regexp, Qsyntax_error);
3263
3264 #ifdef EF_USE_COMPRE
3265         defsymbol(&Qcompre, "compre");
3266         defsymbol(&Qcomprep, "comprep");
3267
3268         DEFSUBR(Fcompile_regexp);
3269         DEFSUBR(Fdefregexp);
3270 #endif
3271
3272         DEFSUBR(Flooking_at);
3273         DEFSUBR(Fposix_looking_at);
3274         DEFSUBR(Fstring_match);
3275         DEFSUBR(Fposix_string_match);
3276         DEFSUBR(Fskip_chars_forward);
3277         DEFSUBR(Fskip_chars_backward);
3278         DEFSUBR(Fskip_syntax_forward);
3279         DEFSUBR(Fskip_syntax_backward);
3280         DEFSUBR(Fsearch_forward);
3281         DEFSUBR(Fsearch_backward);
3282         DEFSUBR(Fword_search_forward);
3283         DEFSUBR(Fword_search_backward);
3284         DEFSUBR(Fre_search_forward);
3285         DEFSUBR(Fre_search_backward);
3286         DEFSUBR(Fposix_search_forward);
3287         DEFSUBR(Fposix_search_backward);
3288         DEFSUBR(Freplace_match);
3289         DEFSUBR(Fmatch_beginning);
3290         DEFSUBR(Fmatch_end);
3291         DEFSUBR(Fmatch_data);
3292         DEFSUBR(Fstore_match_data);
3293         DEFSUBR(Fregexp_quote);
3294         DEFSUBR(Fset_word_regexp);
3295 }
3296
3297 void reinit_vars_of_search(void)
3298 {
3299         int i;
3300
3301         last_thing_searched = Qnil;
3302         staticpro_nodump(&last_thing_searched);
3303
3304         for (i = 0; i < REGEXP_CACHE_SIZE; ++i) {
3305                 searchbufs[i].buf.allocated = 100;
3306                 searchbufs[i].buf.buffer = (unsigned char *)xmalloc_atomic(100);
3307                 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3308                 searchbufs[i].regexp = Qnil;
3309                 staticpro_nodump(&searchbufs[i].regexp);
3310                 searchbufs[i].next =
3311                         (i == REGEXP_CACHE_SIZE - 1 ? 0 : &searchbufs[i + 1]);
3312         }
3313         searchbuf_head = &searchbufs[0];
3314 }
3315
3316 void vars_of_search(void)
3317 {
3318         reinit_vars_of_search();
3319
3320         DEFVAR_LISP("forward-word-regexp", &Vforward_word_regexp        /*
3321 *Regular expression to be used in `forward-word'.
3322 #### Not yet implemented.
3323                                                                          */ );
3324         Vforward_word_regexp = Qnil;
3325
3326         DEFVAR_LISP("backward-word-regexp", &Vbackward_word_regexp      /*
3327 *Regular expression to be used in `backward-word'.
3328 #### Not yet implemented.
3329                                                                          */ );
3330         Vbackward_word_regexp = Qnil;
3331 }
3332
3333 void complex_vars_of_search(void)
3334 {
3335         Vskip_chars_range_table = Fmake_range_table();
3336         staticpro(&Vskip_chars_range_table);
3337 }