cgit.sxemacs.org Git - sxemacs/blob - src/search.c

   1 /* String search routines for SXEmacs.
   2    Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
   3    Copyright (C) 1995 Sun Microsystems, Inc.
   4
   5 This file is part of SXEmacs
   6
   7 SXEmacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 SXEmacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  19
  20
  21 /* Synched up with: FSF 19.29, except for region-cache stuff. */
  22
  23 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */
  24
  25 /* This file has been Mule-ized except for the TRT stuff. */
  26
  27 #include <config.h>
  28 #include "lisp.h"
  29
  30 #include "buffer.h"
  31 #include "ui/insdel.h"
  32 #include "opaque.h"
  33 #ifdef REGION_CACHE_NEEDS_WORK
  34 #include "region-cache.h"
  35 #endif
  36 #include "syntax.h"
  37
  38 #include <sys/types.h>
  39 #include "regex.h"
  40 #include "casetab.h"
  41 #include "chartab.h"
  42
  43 #include "dynacat.h"
  44
  45 #define TRANSLATE(table, pos)   \
  46  (!NILP (table) ? TRT_TABLE_OF (table, (Emchar) pos) : pos)
  47 \f
  48 #include "elhash.h"
  49 #define REGEXP_CACHE_SIZE 0x80
  50 #define REGEXP_CACHE_HASH_MASK (REGEXP_CACHE_SIZE-1)
  51 #define REGEXP_FASTMAP_SIZE 0400
  52
  53 #define __REGEXP_DEBUG__(args...)       fprintf(stderr, "REGEXP " args)
  54 #ifndef REGEXP_DEBUG_FLAG
  55 #define REGEXP_DEBUG(args...)
  56 #else
  57 #define REGEXP_DEBUG(args...)           __REGEXP_DEBUG__(args)
  58 #endif
  59 #define REGEXP_DEBUG_COMPRE(args...)    REGEXP_DEBUG("[compre]: " args)
  60 #define REGEXP_DEBUG_COMPRE_C(args...)  REGEXP_DEBUG("[compre/cache]: " args)
  61 #define REGEXP_DEBUG_COMPRE_H(args...)  REGEXP_DEBUG("[compre/hash]: " args)
  62 #define REGEXP_CRITICAL(args...)        __REGEXP_DEBUG__("CRITICAL: " args)
  63
  64
  65 /* If the regexp is non-nil, then the buffer contains the compiled form
  66    of that regexp, suitable for searching.  */
  67 struct regexp_cache {
  68         struct regexp_cache *next;
  69         Lisp_Object regexp;
  70         struct re_pattern_buffer buf;
  71         char fastmap[REGEXP_FASTMAP_SIZE];
  72         /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  73         char posix;
  74 };
  75
  76 /* The instances of that struct.  */
  77 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  78
  79 /* The head of the linked list; points to the most recently used buffer.  */
  80 static struct regexp_cache *searchbuf_head;
  81
  82 /* Every call to re_match, etc., must pass &search_regs as the regs
  83    argument unless you can show it is unnecessary (i.e., if re_match
  84    is certainly going to be called again before region-around-match
  85    can be called).
  86
  87    Since the registers are now dynamically allocated, we need to make
  88    sure not to refer to the Nth register before checking that it has
  89    been allocated by checking search_regs.num_regs.
  90
  91    The regex code keeps track of whether it has allocated the search
  92    buffer using bits in the re_pattern_buffer.  This means that whenever
  93    you compile a new pattern, it completely forgets whether it has
  94    allocated any registers, and will allocate new registers the next
  95    time you call a searching or matching function.  Therefore, we need
  96    to call re_set_registers after compiling a new pattern or after
  97    setting the match registers, so that the regex functions will be
  98    able to free or re-allocate it properly.  */
  99
 100 /* Note: things get trickier under Mule because the values returned from
 101    the regexp routines are in Bytinds but we need them to be in Bufpos's.
 102    We take the easy way out for the moment and just convert them immediately.
 103    We could be more clever by not converting them until necessary, but
 104    that gets real ugly real fast since the buffer might have changed and
 105    the positions might be out of sync or out of range.
 106    */
 107 static struct re_registers search_regs;
 108
 109 /* The buffer in which the last search was performed, or
 110    Qt if the last search was done in a string;
 111    Qnil if no searching has been done yet.  */
 112 static Lisp_Object last_thing_searched;
 113
 114 /* error condition signalled when regexp compile_pattern fails */
 115
 116 Lisp_Object Qinvalid_regexp;
 117
 118 /* Regular expressions used in forward/backward-word */
 119 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
 120
 121 /* range table for use with skip_chars.  Only needed for Mule. */
 122 Lisp_Object Vskip_chars_range_table;
 123
 124 static void set_search_regs(struct buffer *buf, Bufpos beg, Charcount len);
 125 static void clear_unused_search_regs(struct re_registers *regp, int no_sub);
 126 static void save_search_regs(void);
 127 static Bufpos simple_search(struct buffer *buf, Bufbyte * base_pat,
 128                             Bytecount len, Bytind pos, Bytind lim,
 129                             EMACS_INT n, Lisp_Object trt);
 130 static Bufpos boyer_moore(struct buffer *buf, Bufbyte * base_pat,
 131                           Bytecount len, Bytind pos, Bytind lim,
 132                           EMACS_INT n, Lisp_Object trt,
 133                           Lisp_Object inverse_trt, int charset_base);
 134 static Bufpos search_buffer(struct buffer *buf, Lisp_Object str,
 135                             Bufpos bufpos, Bufpos buflim, EMACS_INT n, int RE,
 136                             Lisp_Object trt, Lisp_Object inverse_trt,
 137                             int posix);
 138
 139 static void matcher_overflow(void)
 140 {
 141         error("Stack overflow in regexp matcher");
 142 }
 143
 144 #ifdef EF_USE_COMPRE
 145 EXFUN(Fget, 3);
 146 EXFUN(Fput, 3);
 147 EXFUN(Fremprop, 2);
 148 #define COMPRE_T                struct re_pattern_buffer
 149 #define COMPREP(obj)                                    \
 150         (DYNACATP(obj) && EQ(XDYNACAT_TYPE(obj), Qcompre))
 151 #define COMPRE_GET(obj) (COMPRE_T*)XDYNACAT(obj)->ptr
 152 #define COMPRE_PUT(obj, rec)    do {                            \
 153                 obj = make_dynacat((void*)rec);                 \
 154                 set_dynacat_printer(obj, compre_prfun);         \
 155                 set_dynacat_finaliser(obj, compre_finfun);      \
 156                 XDYNACAT_TYPE(obj) = Qcompre;                   \
 157         } while (0)
 158 #define XCOMPRE_GET(obj)        XSTRING(obj)->compre
 159 #define XCOMPRE_PUT(obj, b)     XSTRING(obj)->compre = b
 160 #define XCOMPRE_REM(obj)        XSTRING(obj)->compre = Qnil
 161
 162 /* idenitifier for hard-cached rexp */
 163 Lisp_Object Qcompre, Qcomprep;
 164 static Lisp_Object cache_regexp(Lisp_Object, struct re_pattern_buffer*);
 165 static COMPRE_T* make_compre(void)
 166 #if defined(__GNUC__)
 167         __attribute__((unused))
 168 #endif
 169         ;
 170 static COMPRE_T* clone_compre(COMPRE_T*);
 171 static inline void free_compre(COMPRE_T*);
 172 #endif
 173
 174 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 175    PATTERN is the pattern to compile.
 176    CP is the place to put the result.
 177    TRANSLATE is a translation table for ignoring case, or NULL for none.
 178    REGP is the structure that says where to store the "register"
 179    values that will result from matching this pattern.
 180    If it is 0, we should compile the pattern not to record any
 181    subexpression bounds.
 182    POSIX is nonzero if we want full backtracking (POSIX style)
 183    for this pattern.  0 means backtrack only enough to get a valid match.  */
 184
 185 static int
 186 compile_pattern_1(struct regexp_cache *cp, Lisp_Object pattern,
 187                   Lisp_Object translate, struct re_registers *regp, int posix,
 188                   Error_behavior errb)
 189 {
 190         const char *val;
 191         reg_syntax_t old;
 192
 193         cp->regexp = Qnil;
 194         cp->buf.translate = translate;
 195         cp->posix = posix;
 196         old = re_set_syntax(RE_SYNTAX_EMACS
 197                             | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 198         val = (const char *)
 199             re_compile_pattern((char *)XSTRING_DATA(pattern),
 200                                XSTRING_LENGTH(pattern), &cp->buf);
 201         re_set_syntax(old);
 202         if (val) {
 203                 maybe_signal_error(Qinvalid_regexp, list1(build_string(val)),
 204                                    Qsearch, errb);
 205                 return 0;
 206         }
 207
 208         cp->regexp = Fcopy_sequence(pattern);
 209         return 1;
 210 }
 211
 212 #ifdef EF_USE_COMPRE
 213 /* extremely simple hash table, no collision management is performed */
 214 static struct re_pattern_buffer *
 215 try_pattern_from_cache(Lisp_Object pattern,
 216                        struct re_registers *regp,
 217                        Lisp_Object translate,
 218                        int posix,
 219                        Error_behavior errb)
 220 {
 221         struct regexp_cache *cp;
 222         hcode_t pathash;
 223         int place;
 224
 225         pathash = internal_hash(pattern, 0);
 226         place = (int)((pathash>>4) & REGEXP_CACHE_HASH_MASK);
 227         cp = &(searchbufs[place]);
 228         REGEXP_DEBUG_COMPRE_H("trying hash place: %d.\n", place);
 229
 230         if (!NILP(Fstring_equal(cp->regexp, pattern))
 231             && EQ(cp->buf.translate, translate) && cp->posix == posix) {
 232                 /* found the pig */
 233                 REGEXP_DEBUG_COMPRE_H("match: %d.\n", place);
 234                 ;
 235         } else {
 236                 /* collision or empty slot, dont care just put the
 237                  * new regexp there */
 238                 if (!compile_pattern_1(
 239                             cp, pattern, translate, regp, posix, errb))
 240                         return 0;
 241                 REGEXP_DEBUG_COMPRE_H("collision: %d.\n", place);
 242         }
 243
 244         return &cp->buf;
 245 }
 246 #else  /* !EF_USE_COMPRE */
 247 static struct re_pattern_buffer *
 248 try_pattern_from_cache(Lisp_Object pattern,
 249                        struct re_registers *regp,
 250                        Lisp_Object translate,
 251                        int posix,
 252                        Error_behavior errb)
 253 {
 254         struct regexp_cache *cp, **cpp;
 255
 256         for (cpp = &searchbuf_head;; cpp = &cp->next) {
 257                 cp = *cpp;
 258                 if (!NILP(Fstring_equal(cp->regexp, pattern))
 259                     && EQ(cp->buf.translate, translate)
 260                     && cp->posix == posix)
 261                         break;
 262
 263                 /* If we're at the end of the cache, compile
 264                  * into the last cell.  */
 265                 if (cp->next == 0) {
 266                         if (!compile_pattern_1(
 267                                     cp, pattern, translate, regp, posix, errb))
 268                                 return 0;
 269                         break;
 270                 }
 271         }
 272
 273         /* When we get here, cp (aka *cpp) contains the compiled pattern,
 274            either because we found it in the cache or because we just compiled it.
 275            Move it to the front of the queue to mark it as most recently used.  */
 276         *cpp = cp->next;
 277         cp->next = searchbuf_head;
 278         searchbuf_head = cp;
 279
 280         return &cp->buf;
 281 }
 282 #endif  /* EF_USE_COMPRE */
 283
 284 /* Compile a regexp if necessary, but first check to see if there's one in
 285    the cache.
 286    PATTERN is the pattern to compile.
 287    TRANSLATE is a translation table for ignoring case, or NULL for none.
 288    REGP is the structure that says where to store the "register"
 289    values that will result from matching this pattern.
 290    If it is 0, we should compile the pattern not to record any
 291    subexpression bounds.
 292    POSIX is nonzero if we want full backtracking (POSIX style)
 293    for this pattern.  0 means backtrack only enough to get a valid match.  */
 294
 295 struct re_pattern_buffer *
 296 compile_pattern(Lisp_Object pattern,
 297                 struct re_registers *regp,
 298                 Lisp_Object translate,
 299                 int posix,
 300                 Error_behavior errb)
 301 {
 302         struct re_pattern_buffer *result = NULL;
 303
 304 #ifdef EF_USE_COMPRE
 305         Lisp_Object rc = XCOMPRE_GET(pattern);
 306
 307         if (!NILP(rc) && COMPREP(rc) &&
 308             (result = COMPRE_GET(rc)) &&
 309             result->re_ngroups >= 0 && result->re_ngroups < 256) {
 310                 REGEXP_DEBUG_COMPRE_C("using cache: 0x%x.\n",
 311                                       (unsigned int)result);
 312                 ;
 313         } else {
 314 #endif
 315                 result = try_pattern_from_cache(
 316                         pattern, regp, translate, posix, errb);
 317 #ifdef EF_USE_COMPRE
 318                 cache_regexp(pattern, result);
 319         }
 320 #endif
 321
 322
 323         /* Advise the searching functions about the space we have allocated
 324            for register data.  */
 325         if (regp)
 326                 re_set_registers(result, regp, regp->num_regs,
 327                                  regp->start, regp->end);
 328
 329         return result;
 330 }
 331
 332 #ifdef EF_USE_COMPRE
 333 static void
 334 compre_prfun(Lisp_Object obj, Lisp_Object pcfun, int escflag)
 335 {
 336         char buf[18];
 337         write_c_string("#<compiled regexp ", pcfun);
 338         snprintf(buf, 17, "%lx>",
 339                  (long unsigned int)((COMPRE_GET(obj))->buffer));
 340         write_c_string(buf, pcfun);
 341
 342         if (escflag);
 343 }
 344
 345 static void
 346 compre_finfun(Lisp_Object obj, int unused)
 347 {
 348         REGEXP_DEBUG_COMPRE_C("0x%lx@0x%lx will pass away\n",
 349                               (long unsigned int)COMPRE_GET(obj),
 350                               (long unsigned int)obj);
 351         free_compre(COMPRE_GET(obj));
 352 }
 353
 354 #if 1
 355 static COMPRE_T *
 356 make_compre(void)
 357 {
 358         COMPRE_T *result = xnew_and_zero(COMPRE_T);
 359
 360         result->fastmap = xmalloc_atomic(REGEXP_FASTMAP_SIZE);
 361
 362         return result;
 363 }
 364 #endif  /* 0 */
 365
 366 static COMPRE_T *
 367 clone_compre(COMPRE_T *src)
 368 {
 369         COMPRE_T *result = xnew_and_zero(COMPRE_T);
 370
 371         /* alloc and clone fastmap */
 372         result->fastmap = (char*)xmalloc_atomic(REGEXP_FASTMAP_SIZE);
 373         memcpy(result->fastmap, src->fastmap, REGEXP_FASTMAP_SIZE);
 374
 375         /* alloc and clone buffer */
 376         result->buffer = (unsigned char *)xmalloc_atomic(src->allocated);
 377         memcpy(result->buffer, src->buffer, src->allocated);
 378         result->allocated = src->allocated;
 379         result->used = src->used;
 380
 381         result->syntax = src->syntax;
 382         result->translate = src->translate;
 383         result->re_nsub = src->re_nsub;
 384         result->re_ngroups = src->re_ngroups;
 385
 386         result->can_be_null = src->can_be_null;
 387         result->regs_allocated = src->regs_allocated;
 388         result->fastmap_accurate = src->fastmap_accurate;
 389
 390         result->no_sub = src->no_sub;
 391         result->not_bol = src->not_bol;
 392         result->not_eol = src->not_eol;
 393         result->newline_anchor = src->newline_anchor;
 394
 395         /* alloc and clone ext_to_int_register */
 396         result->external_to_internal_register =
 397                 (int*)xmalloc_atomic(
 398                         sizeof(int) * src->external_to_internal_register_size);
 399         memcpy(result->external_to_internal_register,
 400                src->external_to_internal_register,
 401                sizeof(int)*src->external_to_internal_register_size);
 402         result->external_to_internal_register_size =
 403                 src->external_to_internal_register_size;
 404
 405         return result;
 406 }
 407
 408 static inline void
 409 free_compre(COMPRE_T *buf)
 410 {
 411         if (buf->buffer) {
 412                 xfree(buf->buffer);
 413                 buf->buffer = NULL;
 414         }
 415         if (buf->fastmap) {
 416                 xfree(buf->fastmap);
 417                 buf->fastmap = NULL;
 418         }
 419         if (buf->external_to_internal_register) {
 420                 xfree(buf->external_to_internal_register);
 421                 buf->external_to_internal_register = NULL;
 422         }
 423         xfree(buf);
 424 }
 425
 426 static Lisp_Object
 427 cache_regexp(Lisp_Object regexp, COMPRE_T *buf)
 428 {
 429         Lisp_Object rc;
 430         COMPRE_T *resbuf;
 431
 432         if (buf == NULL)
 433                 return Qnil;
 434
 435         resbuf = clone_compre(buf);
 436         COMPRE_PUT(rc, resbuf);
 437         XCOMPRE_PUT(regexp, rc);
 438
 439         REGEXP_DEBUG_COMPRE_C("caching 0x%08x into 0x%08x\n",
 440                               (unsigned int)resbuf, (unsigned int)rc);
 441         return rc;
 442 }
 443
 444 DEFUN("compile-regexp", Fcompile_regexp, 1, 1, 0, /*
 445 Forcibly compile REGEXP and store the result in object-plist.
 446 */
 447       (regexp))
 448 {
 449         CHECK_STRING(regexp);
 450
 451         XCOMPRE_REM(regexp);
 452         compile_pattern(regexp, &search_regs, Qnil, 0, ERROR_ME);
 453
 454         return regexp;
 455 }
 456
 457 DEFUN("defregexp", Fdefregexp, 2, UNEVALLED, 0, /*
 458 \(defregexp SYMBOL REGEXP DOCSTRING\)
 459 Like `defconst' but for forcing compiled regexps.
 460
 461 The same restrictions that apply to `defconst' apply here in regard
 462 to user variables.  You shouldn't use this for regular expressions
 463 that a user might want to customise.  Instead, use `defcustom' with
 464 :type 'regexp.
 465 */
 466       (args))
 467 {
 468         /* This function can GC */
 469         Lisp_Object sym = XCAR(args);
 470         Lisp_Object pat = Feval(XCAR(args = XCDR(args)));
 471         struct gcpro gcpro1;
 472
 473         CHECK_STRING(pat);
 474
 475         GCPRO1(pat);
 476         pat = Fcompile_regexp(pat);
 477         Fset_default(sym, pat);
 478         UNGCPRO;
 479
 480         if (!NILP(args = XCDR(args))) {
 481                 Lisp_Object doc = XCAR(args);
 482                 Fput(sym, Qvariable_documentation, doc);
 483                 if (!NILP(args = XCDR(args)))
 484                         error("too many arguments");
 485         }
 486 #ifdef I18N3
 487         if (!NILP(Vfile_domain))
 488                 Fput(sym, Qvariable_domain, Vfile_domain);
 489 #endif  /* I18N3 */
 490
 491         LOADHIST_ATTACH(sym);
 492         return sym;
 493 }
 494 #endif  /* EF_USE_COMPRE */
 495
 496 /* Error condition used for failing searches */
 497 Lisp_Object Qsearch_failed;
 498
 499 static Lisp_Object signal_failure(Lisp_Object arg)
 500 {
 501         for (;;)
 502                 Fsignal(Qsearch_failed, list1(arg));
 503         return Qnil;            /* Not reached. */
 504 }
 505
 506 /* Convert the search registers from Bytinds to Bufpos's.  Needs to be
 507    done after each regexp match that uses the search regs.
 508
 509    We could get a potential speedup by not converting the search registers
 510    until it's really necessary, e.g. when match-data or replace-match is
 511    called.  However, this complexifies the code a lot (e.g. the buffer
 512    could have changed and the Bytinds stored might be invalid) and is
 513    probably not a great time-saver. */
 514
 515 static void fixup_search_regs_for_buffer(struct buffer *buf)
 516 {
 517         int i;
 518         int num_regs = search_regs.num_regs;
 519
 520         for (i = 0; i < num_regs; i++) {
 521                 if (search_regs.start[i] >= 0)
 522                         search_regs.start[i] =
 523                             bytind_to_bufpos(buf, search_regs.start[i]);
 524                 if (search_regs.end[i] >= 0)
 525                         search_regs.end[i] =
 526                             bytind_to_bufpos(buf, search_regs.end[i]);
 527         }
 528 }
 529
 530 /* Similar but for strings. */
 531 static void fixup_search_regs_for_string(Lisp_Object string)
 532 {
 533         int i;
 534         int num_regs = search_regs.num_regs;
 535
 536         /* #### bytecount_to_charcount() is not that efficient.  This function
 537            could be faster if it did its own conversion (using INC_CHARPTR()
 538            and such), because the register ends are likely to be somewhat ordered.
 539            (Even if not, you could sort them.)
 540
 541            Think about this if this function is a time hog, which it's probably
 542            not. */
 543         for (i = 0; i < num_regs; i++) {
 544                 if (search_regs.start[i] > 0) {
 545                         search_regs.start[i] =
 546                             bytecount_to_charcount(XSTRING_DATA(string),
 547                                                    search_regs.start[i]);
 548                 }
 549                 if (search_regs.end[i] > 0) {
 550                         search_regs.end[i] =
 551                             bytecount_to_charcount(XSTRING_DATA(string),
 552                                                    search_regs.end[i]);
 553                 }
 554         }
 555 }
 556 \f
 557 static Lisp_Object
 558 looking_at_1(Lisp_Object string, struct buffer *buf, int posix)
 559 {
 560         /* This function has been Mule-ized, except for the trt table handling. */
 561         Lisp_Object val;
 562         Bytind p1, p2;
 563         Bytecount s1, s2;
 564         REGISTER int i;
 565         struct re_pattern_buffer *bufp;
 566
 567         if (running_asynch_code)
 568                 save_search_regs();
 569
 570         CHECK_STRING(string);
 571         bufp = compile_pattern(string, &search_regs,
 572                                (!NILP(buf->case_fold_search)
 573                                 ? XCASE_TABLE_DOWNCASE(buf->case_table) : Qnil),
 574                                posix, ERROR_ME);
 575
 576         QUIT;
 577
 578         /* Get pointers and sizes of the two strings
 579            that make up the visible portion of the buffer. */
 580
 581         p1 = BI_BUF_BEGV(buf);
 582         p2 = BI_BUF_CEILING_OF(buf, p1);
 583         s1 = p2 - p1;
 584         s2 = BI_BUF_ZV(buf) - p2;
 585
 586         regex_match_object = Qnil;
 587         regex_emacs_buffer = buf;
 588         i = re_match_2(bufp, (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
 589                        s1, (char *)BI_BUF_BYTE_ADDRESS(buf, p2), s2,
 590                        BI_BUF_PT(buf) - BI_BUF_BEGV(buf), &search_regs,
 591                        BI_BUF_ZV(buf) - BI_BUF_BEGV(buf));
 592
 593         if (i == -2)
 594                 matcher_overflow();
 595
 596         val = (0 <= i ? Qt : Qnil);
 597         if (NILP(val))
 598                 return Qnil;
 599         {
 600                 int num_regs = search_regs.num_regs;
 601                 for (i = 0; i < num_regs; i++)
 602                         if (search_regs.start[i] >= 0) {
 603                                 search_regs.start[i] += BI_BUF_BEGV(buf);
 604                                 search_regs.end[i] += BI_BUF_BEGV(buf);
 605                         }
 606         }
 607         XSETBUFFER(last_thing_searched, buf);
 608         fixup_search_regs_for_buffer(buf);
 609         return val;
 610 }
 611
 612 DEFUN("looking-at", Flooking_at, 1, 2, 0,       /*
 613 Return t if text after point matches regular expression REGEXP.
 614 This function modifies the match data that `match-beginning',
 615 `match-end' and `match-data' access; save and restore the match
 616 data if you want to preserve them.
 617
 618 Optional argument BUFFER defaults to the current buffer.
 619 */
 620       (regexp, buffer))
 621 {
 622         return looking_at_1(regexp, decode_buffer(buffer, 0), 0);
 623 }
 624
 625 DEFUN("posix-looking-at", Fposix_looking_at, 1, 2, 0,   /*
 626 Return t if text after point matches regular expression REGEXP.
 627 Find the longest match, in accord with Posix regular expression rules.
 628 This function modifies the match data that `match-beginning',
 629 `match-end' and `match-data' access; save and restore the match
 630 data if you want to preserve them.
 631
 632 Optional argument BUFFER defaults to the current buffer.
 633 */
 634       (regexp, buffer))
 635 {
 636         return looking_at_1(regexp, decode_buffer(buffer, 0), 1);
 637 }
 638 \f
 639 static Lisp_Object
 640 string_match_1(Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
 641                struct buffer *buf, int posix)
 642 {
 643         /* This function has been Mule-ized, except for the trt table handling. */
 644         Bytecount val;
 645         Charcount s;
 646         struct re_pattern_buffer *bufp;
 647
 648         if (running_asynch_code)
 649                 save_search_regs();
 650
 651         CHECK_STRING(regexp);
 652         CHECK_STRING(string);
 653
 654         if (NILP(start))
 655                 s = 0;
 656         else {
 657                 Charcount len = XSTRING_CHAR_LENGTH(string);
 658
 659                 CHECK_INT(start);
 660                 s = XINT(start);
 661                 if (s < 0 && -s <= len)
 662                         s = len + s;
 663                 else if (0 > s || s > len)
 664                         args_out_of_range(string, start);
 665         }
 666
 667         bufp = compile_pattern(regexp, &search_regs,
 668                                (!NILP(buf->case_fold_search)
 669                                 ? XCASE_TABLE_DOWNCASE(buf->case_table) : Qnil),
 670                                0, ERROR_ME);
 671         QUIT;
 672         {
 673                 Bytecount bis = charcount_to_bytecount(XSTRING_DATA(string), s);
 674                 regex_match_object = string;
 675                 regex_emacs_buffer = buf;
 676                 val = re_search(bufp, (char *)XSTRING_DATA(string),
 677                                 XSTRING_LENGTH(string), bis,
 678                                 XSTRING_LENGTH(string) - bis, &search_regs);
 679         }
 680         if (val == -2)
 681                 matcher_overflow();
 682         if (val < 0)
 683                 return Qnil;
 684         last_thing_searched = Qt;
 685         fixup_search_regs_for_string(string);
 686         return make_int(bytecount_to_charcount(XSTRING_DATA(string), val));
 687 }
 688
 689 DEFUN("string-match", Fstring_match, 2, 4, 0,   /*
 690 Return index of start of first match for REGEXP in STRING, or nil.
 691 If third arg START is non-nil, start search at that index in STRING.
 692 For index of first char beyond the match, do (match-end 0).
 693 `match-end' and `match-beginning' also give indices of substrings
 694 matched by parenthesis constructs in the pattern.
 695
 696 Optional arg BUFFER controls how case folding is done (according to
 697 the value of `case-fold-search' in that buffer and that buffer's case
 698 tables) and defaults to the current buffer.
 699 */
 700       (regexp, string, start, buffer))
 701 {
 702         return string_match_1(regexp, string, start, decode_buffer(buffer, 0),
 703                               0);
 704 }
 705
 706 DEFUN("posix-string-match", Fposix_string_match, 2, 4, 0,       /*
 707 Return index of start of first match for REGEXP in STRING, or nil.
 708 Find the longest match, in accord with Posix regular expression rules.
 709 If third arg START is non-nil, start search at that index in STRING.
 710 For index of first char beyond the match, do (match-end 0).
 711 `match-end' and `match-beginning' also give indices of substrings
 712 matched by parenthesis constructs in the pattern.
 713
 714 Optional arg BUFFER controls how case folding is done (according to
 715 the value of `case-fold-search' in that buffer and that buffer's case
 716 tables) and defaults to the current buffer.
 717 */
 718       (regexp, string, start, buffer))
 719 {
 720         return string_match_1(regexp, string, start, decode_buffer(buffer, 0),
 721                               1);
 722 }
 723
 724 /* Match REGEXP against STRING, searching all of STRING,
 725    and return the index of the match, or negative on failure.
 726    This does not clobber the match data. */
 727
 728 Bytecount
 729 fast_string_match(Lisp_Object regexp, const Bufbyte * nonreloc,
 730                   Lisp_Object reloc, Bytecount offset,
 731                   Bytecount length, int case_fold_search,
 732                   Error_behavior errb, int no_quit)
 733 {
 734         /* This function has been Mule-ized, except for the trt table handling. */
 735         Bytecount val;
 736         const Bufbyte *newnonreloc = (const Bufbyte*)nonreloc;
 737         struct re_pattern_buffer *bufp;
 738
 739         bufp = compile_pattern(regexp, 0,
 740                                (case_fold_search
 741                                 ? XCASE_TABLE_DOWNCASE(current_buffer->
 742                                                        case_table)
 743                                 : Qnil), 0, errb);
 744         if (!bufp)
 745                 return -1;      /* will only do this when errb != ERROR_ME */
 746         if (!no_quit)
 747                 QUIT;
 748         else
 749                 no_quit_in_re_search = 1;
 750
 751         fixup_internal_substring(nonreloc, reloc, offset, &length);
 752
 753         if (!NILP(reloc)) {
 754                 if (no_quit) {
 755                         newnonreloc = XSTRING_DATA(reloc);
 756                 } else {
 757                         /* QUIT could relocate RELOC.  Therefore we must
 758                            alloca() and copy.  No way around this except some
 759                            serious rewriting of re_search(). */
 760                         /* yeah, let's rewrite this bugger, the warning
 761                            hereafter is inevitable too */
 762                         newnonreloc = alloca(length);
 763                         memcpy((void*)newnonreloc, (void*)XSTRING_DATA(reloc), length);
 764                 }
 765         }
 766
 767         /* #### evil current-buffer dependency */
 768         regex_match_object = reloc;
 769         regex_emacs_buffer = current_buffer;
 770         val = re_search(bufp, (const char*)newnonreloc + offset, length, 0,
 771                         length, 0);
 772
 773         no_quit_in_re_search = 0;
 774         return val;
 775 }
 776
 777 Bytecount fast_lisp_string_match(Lisp_Object regex, Lisp_Object string)
 778 {
 779         return fast_string_match(regex, 0, string, 0, -1, 0, ERROR_ME, 0);
 780 }
 781 \f
 782 #ifdef REGION_CACHE_NEEDS_WORK
 783 /* The newline cache: remembering which sections of text have no newlines.  */
 784
 785 /* If the user has requested newline caching, make sure it's on.
 786    Otherwise, make sure it's off.
 787    This is our cheezy way of associating an action with the change of
 788    state of a buffer-local variable.  */
 789 static void newline_cache_on_off(struct buffer *buf)
 790 {
 791         if (NILP(buf->cache_long_line_scans)) {
 792                 /* It should be off.  */
 793                 if (buf->newline_cache) {
 794                         free_region_cache(buf->newline_cache);
 795                         buf->newline_cache = 0;
 796                 }
 797         } else {
 798                 /* It should be on.  */
 799                 if (buf->newline_cache == 0)
 800                         buf->newline_cache = new_region_cache();
 801         }
 802 }
 803 #endif
 804 \f
 805 /* Search in BUF for COUNT instances of the character TARGET between
 806    START and END.
 807
 808    If COUNT is positive, search forwards; END must be >= START.
 809    If COUNT is negative, search backwards for the -COUNTth instance;
 810       END must be <= START.
 811    If COUNT is zero, do anything you please; run rogue, for all I care.
 812
 813    If END is zero, use BEGV or ZV instead, as appropriate for the
 814    direction indicated by COUNT.
 815
 816    If we find COUNT instances, set *SHORTAGE to zero, and return the
 817    position after the COUNTth match.  Note that for reverse motion
 818    this is not the same as the usual convention for Emacs motion commands.
 819
 820    If we don't find COUNT instances before reaching END, set *SHORTAGE
 821    to the number of TARGETs left unfound, and return END.
 822
 823    If ALLOW_QUIT is non-zero, call QUIT periodically. */
 824
 825 static Bytind
 826 bi_scan_buffer(struct buffer *buf, Emchar target, Bytind st, Bytind en,
 827                EMACS_INT count, EMACS_INT * shortage, int allow_quit)
 828 {
 829         /* This function has been Mule-ized. */
 830         Bytind lim = en > 0 ? en :
 831             ((count > 0) ? BI_BUF_ZV(buf) : BI_BUF_BEGV(buf));
 832
 833         /* #### newline cache stuff in this function not yet ported */
 834
 835         assert(count != 0);
 836
 837         if (shortage)
 838                 *shortage = 0;
 839
 840         if (count > 0) {
 841 #ifdef MULE
 842                 /* Due to the Mule representation of characters in a buffer,
 843                    we can simply search for characters in the range 0 - 127
 844                    directly.  For other characters, we do it the "hard" way.
 845                    Note that this way works for all characters but the other
 846                    way is faster. */
 847                 if (target >= 0200) {
 848                         while (st < lim && count > 0) {
 849                                 if (BI_BUF_FETCH_CHAR(buf, st) == target)
 850                                         count--;
 851                                 INC_BYTIND(buf, st);
 852                         }
 853                 } else
 854 #endif
 855                 {
 856                         while (st < lim && count > 0) {
 857                                 Bytind _ceil_;
 858                                 Bufbyte *bufptr;
 859
 860                                 _ceil_ = BI_BUF_CEILING_OF(buf, st);
 861                                 _ceil_ = min(lim, _ceil_);
 862                                 bufptr =
 863                                     (Bufbyte *)
 864                                     memchr(BI_BUF_BYTE_ADDRESS(buf, st),
 865                                            (int)target, _ceil_ - st);
 866                                 if (bufptr) {
 867                                         count--;
 868                                         st = BI_BUF_PTR_BYTE_POS(buf,
 869                                                                  bufptr) + 1;
 870                                 } else
 871                                         st = _ceil_;
 872                         }
 873                 }
 874
 875                 if (shortage)
 876                         *shortage = count;
 877                 if (allow_quit)
 878                         QUIT;
 879                 return st;
 880         } else {
 881 #ifdef MULE
 882                 if (target >= 0200) {
 883                         while (st > lim && count < 0) {
 884                                 DEC_BYTIND(buf, st);
 885                                 if (BI_BUF_FETCH_CHAR(buf, st) == target)
 886                                         count++;
 887                         }
 888                 } else
 889 #endif
 890                 {
 891                         while (st > lim && count < 0) {
 892                                 Bytind _floor_;
 893                                 Bufbyte *bufptr;
 894                                 Bufbyte *floorptr;
 895
 896                                 _floor_ = BI_BUF_FLOOR_OF(buf, st);
 897                                 _floor_ = max(lim, _floor_);
 898                                 /* No memrchr() ... */
 899                                 bufptr = BI_BUF_BYTE_ADDRESS_BEFORE(buf, st);
 900                                 floorptr = BI_BUF_BYTE_ADDRESS(buf, _floor_);
 901                                 while (bufptr >= floorptr) {
 902                                         st--;
 903                                         /* At this point, both ST and BUFPTR
 904                                            refer to the same character.  When
 905                                            the loop terminates, ST will always
 906                                            point to the last character we
 907                                            tried. */
 908                                         if (*(unsigned char *)bufptr ==
 909                                             (unsigned char)target) {
 910                                                 count++;
 911                                                 break;
 912                                         }
 913                                         bufptr--;
 914                                 }
 915                         }
 916                 }
 917
 918                 if (shortage)
 919                         *shortage = -count;
 920                 if (allow_quit)
 921                         QUIT;
 922                 if (count)
 923                         return st;
 924                 else {
 925                         /* We found the character we were looking for; we have to return
 926                            the position *after* it due to the strange way that the return
 927                            value is defined. */
 928                         INC_BYTIND(buf, st);
 929                         return st;
 930                 }
 931         }
 932 }
 933
 934 Bufpos
 935 scan_buffer(struct buffer * buf, Emchar target, Bufpos start, Bufpos end,
 936             EMACS_INT count, EMACS_INT * shortage, int allow_quit)
 937 {
 938         Bytind bi_retval;
 939         Bytind bi_start, bi_end;
 940
 941         bi_start = bufpos_to_bytind(buf, start);
 942         if (end)
 943                 bi_end = bufpos_to_bytind(buf, end);
 944         else
 945                 bi_end = 0;
 946         bi_retval = bi_scan_buffer(buf, target, bi_start, bi_end, count,
 947                                    shortage, allow_quit);
 948         return bytind_to_bufpos(buf, bi_retval);
 949 }
 950
 951 Bytind bi_find_next_newline_no_quit(struct buffer * buf, Bytind from, int count)
 952 {
 953         return bi_scan_buffer(buf, '\n', from, 0, count, 0, 0);
 954 }
 955
 956 Bufpos find_next_newline_no_quit(struct buffer * buf, Bufpos from, int count)
 957 {
 958         return scan_buffer(buf, '\n', from, 0, count, 0, 0);
 959 }
 960
 961 Bufpos find_next_newline(struct buffer * buf, Bufpos from, int count)
 962 {
 963         return scan_buffer(buf, '\n', from, 0, count, 0, 1);
 964 }
 965
 966 Bytind
 967 bi_find_next_emchar_in_string(Lisp_String * str, Emchar target, Bytind st,
 968                               EMACS_INT count)
 969 {
 970         /* This function has been Mule-ized. */
 971         Bytind lim = string_length(str) - 1;
 972         Bufbyte *s = string_data(str);
 973
 974         assert(count >= 0);
 975
 976 #ifdef MULE
 977         /* Due to the Mule representation of characters in a buffer,
 978            we can simply search for characters in the range 0 - 127
 979            directly.  For other characters, we do it the "hard" way.
 980            Note that this way works for all characters but the other
 981            way is faster. */
 982         if (target >= 0200) {
 983                 while (st < lim && count > 0) {
 984                         if (string_char(str, st) == target)
 985                                 count--;
 986                         INC_CHARBYTIND(s, st);
 987                 }
 988         } else
 989 #endif
 990         {
 991                 while (st < lim && count > 0) {
 992                         Bufbyte *bufptr =
 993                             (Bufbyte *) memchr(charptr_n_addr(s, st),
 994                                                (int)target, lim - st);
 995                         if (bufptr) {
 996                                 count--;
 997                                 st = (Bytind) (bufptr - s) + 1;
 998                         } else
 999                                 st = lim;
1000                 }
1001         }
1002         return st;
1003 }
1004
1005 /* Like find_next_newline, but returns position before the newline,
1006    not after, and only search up to TO.  This isn't just
1007    find_next_newline (...)-1, because you might hit TO.  */
1008 Bufpos
1009 find_before_next_newline(struct buffer * buf, Bufpos from, Bufpos to, int count)
1010 {
1011         EMACS_INT shortage;
1012         Bufpos pos = scan_buffer(buf, '\n', from, to, count, &shortage, 1);
1013
1014         if (shortage == 0)
1015                 pos--;
1016
1017         return pos;
1018 }
1019 \f
1020 /* This function synched with FSF 21.1 */
1021 static Lisp_Object
1022 skip_chars(struct buffer *buf, int forwardp, int syntaxp,
1023            Lisp_Object string, Lisp_Object lim)
1024 {
1025         /* This function has been Mule-ized. */
1026         REGISTER Bufbyte *p, *pend;
1027         REGISTER Emchar c;
1028         /* We store the first 256 chars in an array here and the rest in
1029            a range table. */
1030         unsigned char fastmap[REGEXP_FASTMAP_SIZE];
1031         int negate = 0;
1032         REGISTER int i;
1033 #ifndef emacs
1034         Lisp_Char_Table *syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
1035 #endif
1036         Bufpos limit;
1037
1038         if (NILP(lim))
1039                 limit = forwardp ? BUF_ZV(buf) : BUF_BEGV(buf);
1040         else {
1041                 CHECK_INT_COERCE_MARKER(lim);
1042                 limit = XINT(lim);
1043
1044                 /* In any case, don't allow scan outside bounds of buffer.  */
1045                 if (limit > BUF_ZV(buf))
1046                         limit = BUF_ZV(buf);
1047                 if (limit < BUF_BEGV(buf))
1048                         limit = BUF_BEGV(buf);
1049         }
1050
1051         CHECK_STRING(string);
1052         p = XSTRING_DATA(string);
1053         pend = p + XSTRING_LENGTH(string);
1054         memset(fastmap, 0, sizeof(fastmap));
1055
1056         Fclear_range_table(Vskip_chars_range_table);
1057
1058         if (p != pend && *p == '^') {
1059                 negate = 1;
1060                 p++;
1061         }
1062
1063         /* Find the characters specified and set their elements of fastmap.
1064            If syntaxp, each character counts as itself.
1065            Otherwise, handle backslashes and ranges specially  */
1066
1067         while (p != pend) {
1068                 c = charptr_emchar(p);
1069                 INC_CHARPTR(p);
1070                 if (syntaxp) {
1071                         if (c < REGEXP_FASTMAP_SIZE
1072                             && syntax_spec_code[c] < (unsigned char)Smax)
1073                                 fastmap[c] = 1;
1074                         else
1075                                 signal_simple_error("Invalid syntax designator",
1076                                                     make_char(c));
1077                 } else {
1078                         if (c == '\\') {
1079                                 if (p == pend)
1080                                         break;
1081                                 c = charptr_emchar(p);
1082                                 INC_CHARPTR(p);
1083                         }
1084                         if (p != pend && *p == '-') {
1085                                 Emchar cend;
1086
1087                                 /* Skip over the dash.  */
1088                                 p++;
1089                                 if (p == pend)
1090                                         break;
1091                                 cend = charptr_emchar(p);
1092                                 while (c <= cend && c < REGEXP_FASTMAP_SIZE) {
1093                                         fastmap[c] = 1;
1094                                         c++;
1095                                 }
1096                                 if (c <= cend)
1097                                         Fput_range_table(make_int(c),
1098                                                          make_int(cend), Qt,
1099                                                          Vskip_chars_range_table);
1100                                 INC_CHARPTR(p);
1101                         } else {
1102                                 if (c < REGEXP_FASTMAP_SIZE)
1103                                         fastmap[c] = 1;
1104                                 else
1105                                         Fput_range_table(make_int(c),
1106                                                          make_int(c), Qt,
1107                                                          Vskip_chars_range_table);
1108                         }
1109                 }
1110         }
1111
1112         /* #### Not in FSF 21.1 */
1113         if (syntaxp && fastmap['-'] != 0)
1114                 fastmap[' '] = 1;
1115
1116         /* If ^ was the first character, complement the fastmap.
1117            We don't complement the range table, however; we just use negate
1118            in the comparisons below. */
1119
1120         if (negate)
1121                 for (i = 0; i < (int)(sizeof fastmap); i++)
1122                         fastmap[i] ^= 1;
1123
1124         {
1125                 Bufpos start_point = BUF_PT(buf);
1126                 Bufpos pos = start_point;
1127                 Bytind pos_byte = BI_BUF_PT(buf);
1128
1129                 if (syntaxp) {
1130                         SETUP_SYNTAX_CACHE_FOR_BUFFER(buf, pos,
1131                                                       forwardp ? 1 : -1);
1132                         /* All syntax designators are normal chars so nothing strange
1133                            to worry about */
1134                         if (forwardp) {
1135                                 if (pos < limit)
1136                                         while (fastmap[(unsigned char)
1137                                                        syntax_code_spec
1138                                                        [(int)SYNTAX_FROM_CACHE
1139                                                         (syntax_table,
1140                                                          BI_BUF_FETCH_CHAR(buf,
1141                                                                            pos_byte))]])
1142                                         {
1143                                                 pos++;
1144                                                 INC_BYTIND(buf, pos_byte);
1145                                                 if (pos >= limit)
1146                                                         break;
1147                                                 UPDATE_SYNTAX_CACHE_FORWARD
1148                                                     (pos);
1149                                         }
1150                         } else {
1151                                 while (pos > limit) {
1152                                         Bufpos savepos = pos_byte;
1153                                         pos--;
1154                                         DEC_BYTIND(buf, pos_byte);
1155                                         UPDATE_SYNTAX_CACHE_BACKWARD(pos);
1156                                         if (!fastmap[(unsigned char)
1157                                                      syntax_code_spec
1158                                                      [(int)SYNTAX_FROM_CACHE
1159                                                       (syntax_table,
1160                                                        BI_BUF_FETCH_CHAR(buf,
1161                                                                          pos_byte))]])
1162                                         {
1163                                                 pos++;
1164                                                 pos_byte = savepos;
1165                                                 break;
1166                                         }
1167                                 }
1168                         }
1169                 } else {
1170                         if (forwardp) {
1171                                 while (pos < limit) {
1172                                         Emchar ch =
1173                                             BI_BUF_FETCH_CHAR(buf, pos_byte);
1174                                         if ((ch <
1175                                              REGEXP_FASTMAP_SIZE) ? fastmap[ch]
1176                                             : (NILP
1177                                                (Fget_range_table
1178                                                 (make_int(ch),
1179                                                  Vskip_chars_range_table, Qnil))
1180                                                == negate)) {
1181                                                 pos++;
1182                                                 INC_BYTIND(buf, pos_byte);
1183                                         } else
1184                                                 break;
1185                                 }
1186                         } else {
1187                                 while (pos > limit) {
1188                                         Bufpos prev_pos_byte = pos_byte;
1189                                         Emchar ch;
1190
1191                                         DEC_BYTIND(buf, prev_pos_byte);
1192                                         ch = BI_BUF_FETCH_CHAR(buf,
1193                                                                prev_pos_byte);
1194                                         if ((ch <
1195                                              REGEXP_FASTMAP_SIZE) ? fastmap[ch]
1196                                             : (NILP
1197                                                (Fget_range_table
1198                                                 (make_int(ch),
1199                                                  Vskip_chars_range_table, Qnil))
1200                                                == negate)) {
1201                                                 pos--;
1202                                                 pos_byte = prev_pos_byte;
1203                                         } else
1204                                                 break;
1205                                 }
1206                         }
1207                 }
1208                 QUIT;
1209                 BOTH_BUF_SET_PT(buf, pos, pos_byte);
1210                 return make_int(BUF_PT(buf) - start_point);
1211         }
1212 }
1213
1214 DEFUN("skip-chars-forward", Fskip_chars_forward, 1, 3, 0,       /*
1215 Move point forward, stopping before a char not in STRING, or at pos LIMIT.
1216 STRING is like the inside of a `[...]' in a regular expression
1217 except that `]' is never special and `\\' quotes `^', `-' or `\\'.
1218 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1219 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1220 Returns the distance traveled, either zero or positive.
1221
1222 Optional argument BUFFER defaults to the current buffer.
1223 */
1224       (string, limit, buffer))
1225 {
1226         return skip_chars(decode_buffer(buffer, 0), 1, 0, string, limit);
1227 }
1228
1229 DEFUN("skip-chars-backward", Fskip_chars_backward, 1, 3, 0,     /*
1230 Move point backward, stopping after a char not in STRING, or at pos LIMIT.
1231 See `skip-chars-forward' for details.
1232 Returns the distance traveled, either zero or negative.
1233
1234 Optional argument BUFFER defaults to the current buffer.
1235 */
1236       (string, limit, buffer))
1237 {
1238         return skip_chars(decode_buffer(buffer, 0), 0, 0, string, limit);
1239 }
1240
1241 DEFUN("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0,     /*
1242 Move point forward across chars in specified syntax classes.
1243 SYNTAX is a string of syntax code characters.
1244 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT.
1245 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1246 This function returns the distance traveled, either zero or positive.
1247
1248 Optional argument BUFFER defaults to the current buffer.
1249 */
1250       (syntax, limit, buffer))
1251 {
1252         return skip_chars(decode_buffer(buffer, 0), 1, 1, syntax, limit);
1253 }
1254
1255 DEFUN("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0,   /*
1256 Move point backward across chars in specified syntax classes.
1257 SYNTAX is a string of syntax code characters.
1258 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT.
1259 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1260 This function returns the distance traveled, either zero or negative.
1261
1262 Optional argument BUFFER defaults to the current buffer.
1263 */
1264       (syntax, limit, buffer))
1265 {
1266         return skip_chars(decode_buffer(buffer, 0), 0, 1, syntax, limit);
1267 }
1268 \f
1269 /* Subroutines of Lisp buffer search functions. */
1270
1271 static Lisp_Object
1272 search_command(Lisp_Object string, Lisp_Object limit, Lisp_Object noerror,
1273                Lisp_Object count, Lisp_Object buffer, int direction,
1274                int RE, int posix)
1275 {
1276         /* This function has been Mule-ized, except for the trt table handling. */
1277         REGISTER Bufpos np;
1278         Bufpos lim;
1279         EMACS_INT n = direction;
1280         struct buffer *buf;
1281
1282         if (!NILP(count)) {
1283                 CHECK_INT(count);
1284                 n *= XINT(count);
1285         }
1286
1287         buf = decode_buffer(buffer, 0);
1288         CHECK_STRING(string);
1289         if (NILP(limit))
1290                 lim = n > 0 ? BUF_ZV(buf) : BUF_BEGV(buf);
1291         else {
1292                 CHECK_INT_COERCE_MARKER(limit);
1293                 lim = XINT(limit);
1294                 if (n > 0 ? lim < BUF_PT(buf) : lim > BUF_PT(buf))
1295                         error("Invalid search limit (wrong side of point)");
1296                 if (lim > BUF_ZV(buf))
1297                         lim = BUF_ZV(buf);
1298                 if (lim < BUF_BEGV(buf))
1299                         lim = BUF_BEGV(buf);
1300         }
1301
1302         np = search_buffer(buf, string, BUF_PT(buf), lim, n, RE,
1303                            (!NILP(buf->case_fold_search)
1304                             ? XCASE_TABLE_CANON(buf->case_table)
1305                             : Qnil), (!NILP(buf->case_fold_search)
1306                                       ? XCASE_TABLE_EQV(buf->case_table)
1307                                       : Qnil), posix);
1308
1309         if (np <= 0) {
1310                 if (NILP(noerror))
1311                         return signal_failure(string);
1312                 if (!EQ(noerror, Qt)) {
1313                         if (lim < BUF_BEGV(buf) || lim > BUF_ZV(buf))
1314                                 abort();
1315                         BUF_SET_PT(buf, lim);
1316                         return Qnil;
1317 #if 0                           /* This would be clean, but maybe programs depend on
1318                                    a value of nil here.  */
1319                         np = lim;
1320 #endif
1321                 } else
1322                         return Qnil;
1323         }
1324
1325         if (np < BUF_BEGV(buf) || np > BUF_ZV(buf))
1326                 abort();
1327
1328         BUF_SET_PT(buf, np);
1329
1330         return make_int(np);
1331 }
1332 \f
1333 static int trivial_regexp_p(Lisp_Object regexp)
1334 {
1335         /* This function has been Mule-ized. */
1336         Bytecount len = XSTRING_LENGTH(regexp);
1337         Bufbyte *s = XSTRING_DATA(regexp);
1338         while (--len >= 0) {
1339                 switch (*s++) {
1340                         /* ']' doesn't appear here because it's only special after ] */
1341                 case '.':
1342                 case '*':
1343                 case '+':
1344                 case '?':
1345                 case '[':
1346                 case '^':
1347                 case '$':
1348                         return 0;
1349                 case '\\':
1350                         if (--len < 0)
1351                                 return 0;
1352                         switch (*s++) {
1353                         case '|':
1354                         case '(':
1355                         case ')':
1356                         case '`':
1357                         case '\'':
1358                         case 'b':
1359                         case 'B':
1360                         case '<':
1361                         case '>':
1362                         case 'w':
1363                         case 'W':
1364                         case 's':
1365                         case 'S':
1366                         case '=':
1367                         case '{':
1368                         case '}':
1369 #ifdef MULE
1370                                 /* 97/2/25 jhod Added for category matches */
1371                         case 'c':
1372                         case 'C':
1373 #endif                          /* MULE */
1374                         case '1':
1375                         case '2':
1376                         case '3':
1377                         case '4':
1378                         case '5':
1379                         case '6':
1380                         case '7':
1381                         case '8':
1382                         case '9':
1383                                 return 0;
1384                         default:
1385                                 break;
1386                         }
1387                 default:
1388                         break;
1389                 }
1390         }
1391         return 1;
1392 }
1393
1394 /* Search for the n'th occurrence of STRING in BUF,
1395    starting at position BUFPOS and stopping at position BUFLIM,
1396    treating PAT as a literal string if RE is false or as
1397    a regular expression if RE is true.
1398
1399    If N is positive, searching is forward and BUFLIM must be greater
1400    than BUFPOS.
1401    If N is negative, searching is backward and BUFLIM must be less
1402    than BUFPOS.
1403
1404    Returns -x if only N-x occurrences found (x > 0),
1405    or else the position at the beginning of the Nth occurrence
1406    (if searching backward) or the end (if searching forward).
1407
1408    POSIX is nonzero if we want full backtracking (POSIX style)
1409    for this pattern.  0 means backtrack only enough to get a valid match.  */
1410 static Bufpos
1411 search_buffer(struct buffer *buf, Lisp_Object string, Bufpos bufpos,
1412               Bufpos buflim, EMACS_INT n, int RE, Lisp_Object trt,
1413               Lisp_Object inverse_trt, int posix)
1414 {
1415         /* This function has been Mule-ized, except for the trt table handling. */
1416         Bytecount len = XSTRING_LENGTH(string);
1417         Bufbyte *base_pat = XSTRING_DATA(string);
1418         REGISTER EMACS_INT i, j;
1419         Bytind p1, p2;
1420         Bytecount s1, s2;
1421         Bytind pos, lim;
1422
1423         if (running_asynch_code)
1424                 save_search_regs();
1425
1426         /* Null string is found at starting position.  */
1427         if (len == 0) {
1428                 set_search_regs(buf, bufpos, 0);
1429                 clear_unused_search_regs(&search_regs, 0);
1430                 return bufpos;
1431         }
1432
1433         /* Searching 0 times means noop---don't move, don't touch registers.  */
1434         if (n == 0)
1435                 return bufpos;
1436
1437         pos = bufpos_to_bytind(buf, bufpos);
1438         lim = bufpos_to_bytind(buf, buflim);
1439         if (RE && !trivial_regexp_p(string)) {
1440                 struct re_pattern_buffer *bufp;
1441
1442                 bufp = compile_pattern(string, &search_regs, trt, posix,
1443                                        ERROR_ME);
1444
1445                 /* Get pointers and sizes of the two strings
1446                    that make up the visible portion of the buffer. */
1447
1448                 p1 = BI_BUF_BEGV(buf);
1449                 p2 = BI_BUF_CEILING_OF(buf, p1);
1450                 s1 = p2 - p1;
1451                 s2 = BI_BUF_ZV(buf) - p2;
1452                 regex_match_object = Qnil;
1453
1454                 while (n < 0) {
1455                         Bytecount val;
1456                         QUIT;
1457                         regex_emacs_buffer = buf;
1458                         val = re_search_2(bufp,
1459                                           (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
1460                                           s1, (char *)BI_BUF_BYTE_ADDRESS(buf,
1461                                                                           p2),
1462                                           s2, pos - BI_BUF_BEGV(buf), lim - pos,
1463                                           &search_regs, pos - BI_BUF_BEGV(buf));
1464
1465                         if (val == -2) {
1466                                 matcher_overflow();
1467                         }
1468                         if (val >= 0) {
1469                                 int num_regs = search_regs.num_regs;
1470                                 j = BI_BUF_BEGV(buf);
1471                                 for (i = 0; i < num_regs; i++)
1472                                         if (search_regs.start[i] >= 0) {
1473                                                 search_regs.start[i] += j;
1474                                                 search_regs.end[i] += j;
1475                                         }
1476                                 /* re_match (called from re_search et al) does this for us */
1477                                 /* clear_unused_search_regs (search_regs, bufp->no_sub);   */
1478                                 XSETBUFFER(last_thing_searched, buf);
1479                                 /* Set pos to the new position. */
1480                                 pos = search_regs.start[0];
1481                                 fixup_search_regs_for_buffer(buf);
1482                                 /* And bufpos too. */
1483                                 bufpos = search_regs.start[0];
1484                         } else {
1485                                 return n;
1486                         }
1487                         n++;
1488                 }
1489                 while (n > 0) {
1490                         Bytecount val;
1491                         QUIT;
1492                         regex_emacs_buffer = buf;
1493                         val = re_search_2(bufp,
1494                                           (char *)BI_BUF_BYTE_ADDRESS(buf, p1),
1495                                           s1, (char *)BI_BUF_BYTE_ADDRESS(buf,
1496                                                                           p2),
1497                                           s2, pos - BI_BUF_BEGV(buf), lim - pos,
1498                                           &search_regs, lim - BI_BUF_BEGV(buf));
1499                         if (val == -2) {
1500                                 matcher_overflow();
1501                         }
1502                         if (val >= 0) {
1503                                 int num_regs = search_regs.num_regs;
1504                                 j = BI_BUF_BEGV(buf);
1505                                 for (i = 0; i < num_regs; i++)
1506                                         if (search_regs.start[i] >= 0) {
1507                                                 search_regs.start[i] += j;
1508                                                 search_regs.end[i] += j;
1509                                         }
1510                                 /* re_match (called from re_search et al) does this for us */
1511                                 /* clear_unused_search_regs (search_regs, bufp->no_sub);   */
1512                                 XSETBUFFER(last_thing_searched, buf);
1513                                 /* Set pos to the new position. */
1514                                 pos = search_regs.end[0];
1515                                 fixup_search_regs_for_buffer(buf);
1516                                 /* And bufpos too. */
1517                                 bufpos = search_regs.end[0];
1518                         } else {
1519                                 return 0 - n;
1520                         }
1521                         n--;
1522                 }
1523                 return bufpos;
1524         } else {                /* non-RE case */
1525
1526                 int charset_base = -1;
1527                 int boyer_moore_ok = 1;
1528                 Bufbyte *pat = 0;
1529                 Bufbyte *patbuf = alloca_array(Bufbyte, len * MAX_EMCHAR_LEN);
1530                 pat = patbuf;
1531 #ifdef MULE
1532                 while (len > 0) {
1533                         Bufbyte tmp_str[MAX_EMCHAR_LEN];
1534                         Emchar c, translated, inverse;
1535                         Bytecount orig_bytelen, new_bytelen, inv_bytelen;
1536
1537                         /* If we got here and the RE flag is set, it's because
1538                            we're dealing with a regexp known to be trivial, so the
1539                            backslash just quotes the next character.  */
1540                         if (RE && *base_pat == '\\') {
1541                                 len--;
1542                                 base_pat++;
1543                         }
1544                         c = charptr_emchar(base_pat);
1545                         translated = TRANSLATE(trt, c);
1546                         inverse = TRANSLATE(inverse_trt, c);
1547
1548                         orig_bytelen = charcount_to_bytecount(base_pat, 1);
1549                         inv_bytelen = set_charptr_emchar(tmp_str, inverse);
1550                         new_bytelen = set_charptr_emchar(tmp_str, translated);
1551
1552                         if (new_bytelen != orig_bytelen
1553                             || inv_bytelen != orig_bytelen)
1554                                 boyer_moore_ok = 0;
1555                         if (translated != c || inverse != c) {
1556                                 /* Keep track of which character set row
1557                                    contains the characters that need translation.  */
1558                                 int charset_base_code = c & ~CHAR_FIELD3_MASK;
1559                                 if (charset_base == -1)
1560                                         charset_base = charset_base_code;
1561                                 else if (charset_base != charset_base_code)
1562                                         /* If two different rows appear, needing translation,
1563                                            then we cannot use boyer_moore search.  */
1564                                         boyer_moore_ok = 0;
1565                         }
1566                         memcpy(pat, tmp_str, new_bytelen);
1567                         pat += new_bytelen;
1568                         base_pat += orig_bytelen;
1569                         len -= orig_bytelen;
1570                 }
1571 #else                           /* not MULE */
1572                 while (--len >= 0) {
1573                         /* If we got here and the RE flag is set, it's because
1574                            we're dealing with a regexp known to be trivial, so the
1575                            backslash just quotes the next character.  */
1576                         if (RE && *base_pat == '\\') {
1577                                 len--;
1578                                 base_pat++;
1579                         }
1580                         *pat++ = TRANSLATE(trt, *base_pat++);
1581                 }
1582 #endif                          /* MULE */
1583                 len = pat - patbuf;
1584                 pat = base_pat = patbuf;
1585                 if (boyer_moore_ok)
1586                         return boyer_moore(buf, base_pat, len, pos, lim, n,
1587                                            trt, inverse_trt, charset_base);
1588                 else
1589                         return simple_search(buf, base_pat, len, pos, lim, n,
1590                                              trt);
1591         }
1592 }
1593
1594 /* Do a simple string search N times for the string PAT,
1595    whose length is LEN/LEN_BYTE,
1596    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1597    TRT is the translation table.
1598
1599    Return the character position where the match is found.
1600    Otherwise, if M matches remained to be found, return -M.
1601
1602    This kind of search works regardless of what is in PAT and
1603    regardless of what is in TRT.  It is used in cases where
1604    boyer_moore cannot work.  */
1605
1606 static Bufpos
1607 simple_search(struct buffer *buf, Bufbyte * base_pat, Bytecount len_byte,
1608               Bytind idx, Bytind lim, EMACS_INT n, Lisp_Object trt)
1609 {
1610         int forward = n > 0;
1611         Bytecount buf_len = 0;  /* Shut up compiler. */
1612
1613         if (lim > idx) {
1614                 while (n > 0) {
1615                         while (1) {
1616                                 Bytecount this_len = len_byte;
1617                                 Bytind this_idx = idx;
1618                                 const Bufbyte *p = base_pat;
1619                                 if (idx >= lim)
1620                                         goto stop;
1621
1622                                 while (this_len > 0) {
1623                                         Emchar pat_ch, buf_ch;
1624                                         Bytecount pat_len;
1625
1626                                         pat_ch = charptr_emchar(p);
1627                                         buf_ch =
1628                                             BI_BUF_FETCH_CHAR(buf, this_idx);
1629
1630                                         buf_ch = TRANSLATE(trt, buf_ch);
1631
1632                                         if (buf_ch != pat_ch)
1633                                                 break;
1634
1635                                         pat_len = charcount_to_bytecount(p, 1);
1636                                         p += pat_len;
1637                                         this_len -= pat_len;
1638                                         INC_BYTIND(buf, this_idx);
1639                                 }
1640                                 if (this_len == 0) {
1641                                         buf_len = this_idx - idx;
1642                                         idx = this_idx;
1643                                         break;
1644                                 }
1645                                 INC_BYTIND(buf, idx);
1646                         }
1647                         n--;
1648                 }
1649         } else {
1650                 while (n < 0) {
1651                         while (1) {
1652                                 Bytecount this_len = len_byte;
1653                                 Bytind this_idx = idx;
1654                                 const Bufbyte *p = base_pat + len_byte;
1655
1656                                 if (idx <= lim) {
1657                                         goto stop;
1658                                 }
1659
1660                                 while (this_len > 0) {
1661                                         Emchar pat_ch, buf_ch;
1662
1663                                         DEC_CHARPTR(p);
1664                                         DEC_BYTIND(buf, this_idx);
1665                                         pat_ch = charptr_emchar(p);
1666                                         buf_ch =
1667                                             BI_BUF_FETCH_CHAR(buf, this_idx);
1668
1669                                         buf_ch = TRANSLATE(trt, buf_ch);
1670
1671                                         if (buf_ch != pat_ch)
1672                                                 break;
1673
1674                                         this_len -=
1675                                             charcount_to_bytecount(p, 1);
1676                                 }
1677                                 if (this_len == 0) {
1678                                         buf_len = idx - this_idx;
1679                                         idx = this_idx;
1680                                         break;
1681                                 }
1682                                 DEC_BYTIND(buf, idx);
1683                         }
1684                         n++;
1685                 }
1686         }
1687 stop:
1688         if (n == 0) {
1689                 Bufpos beg, end, retval;
1690                 if (forward) {
1691                         beg = bytind_to_bufpos(buf, idx - buf_len);
1692                         retval = end = bytind_to_bufpos(buf, idx);
1693                 } else {
1694                         retval = beg = bytind_to_bufpos(buf, idx);
1695                         end = bytind_to_bufpos(buf, idx + buf_len);
1696                 }
1697                 set_search_regs(buf, beg, end - beg);
1698                 clear_unused_search_regs(&search_regs, 0);
1699
1700                 return retval;
1701         } else if (n > 0) {
1702                 return -n;
1703         } else {
1704                 return n;
1705         }
1706 }
1707
1708 /* Do Boyer-Moore search N times for the string PAT,
1709    whose length is LEN/LEN_BYTE,
1710    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1711    DIRECTION says which direction we search in.
1712    TRT and INVERSE_TRT are translation tables.
1713
1714    This kind of search works if all the characters in PAT that have
1715    nontrivial translation are the same aside from the last byte.  This
1716    makes it possible to translate just the last byte of a character,
1717    and do so after just a simple test of the context.
1718
1719    If that criterion is not satisfied, do not call this function.  */
1720
1721 static Bufpos
1722 boyer_moore(struct buffer *buf, Bufbyte * base_pat, Bytecount len,
1723             Bytind pos, Bytind lim, EMACS_INT n, Lisp_Object trt,
1724             Lisp_Object inverse_trt, int charset_base)
1725 {
1726         /* #### Someone really really really needs to comment the workings
1727            of this junk somewhat better.
1728
1729            BTW "BM" stands for Boyer-Moore, which is one of the standard
1730            string-searching algorithms.  It's the best string-searching
1731            algorithm out there, provided that:
1732
1733            a) You're not fazed by algorithm complexity. (Rabin-Karp, which
1734            uses hashing, is much much easier to code but not as fast.)
1735            b) You can freely move backwards in the string that you're
1736            searching through.
1737
1738            As the comment below tries to explain (but garbles in typical
1739            programmer-ese), the idea is that you don't have to do a
1740            string match at every successive position in the text.  For
1741            example, let's say the pattern is "a very long string".  We
1742            compare the last character in the string (`g') with the
1743            corresponding character in the text.  If it mismatches, and
1744            it is, say, `z', then we can skip forward by the entire
1745            length of the pattern because `z' does not occur anywhere
1746            in the pattern.  If the mismatching character does occur
1747            in the pattern, we can usually still skip forward by more
1748            than one: e.g. if it is `l', then we can skip forward
1749            by the length of the substring "ong string" -- i.e. the
1750            largest end section of the pattern that does not contain
1751            the mismatched character.  So what we do is compute, for
1752            each possible character, the distance we can skip forward
1753            (the "stride") and use it in the string matching.  This
1754            is what the BM_tab holds. */
1755         REGISTER EMACS_INT *BM_tab;
1756         EMACS_INT *BM_tab_base;
1757         REGISTER Bytecount dirlen;
1758         EMACS_INT infinity;
1759         Bytind limit;
1760         Bytecount stride_for_teases = 0;
1761         REGISTER EMACS_INT i, j;
1762         Bufbyte *pat, *pat_end;
1763         REGISTER Bufbyte *cursor, *p_limit, *ptr2;
1764         Bufbyte simple_translate[REGEXP_FASTMAP_SIZE];
1765         REGISTER int direction = ((n > 0) ? 1 : -1);
1766 #ifdef MULE
1767         Bufbyte translate_prev_byte = 0;
1768         Bufbyte translate_anteprev_byte = 0;
1769 #endif
1770 #ifdef C_ALLOCA
1771         EMACS_INT BM_tab_space[REGEXP_FASTMAP_SIZE];
1772         BM_tab = &BM_tab_space[0];
1773 #else
1774         BM_tab = alloca_array(EMACS_INT, 256);
1775 #endif
1776
1777         /* The general approach is that we are going to maintain that we
1778            know the first (closest to the present position, in whatever
1779            direction we're searching) character that could possibly be
1780            the last (furthest from present position) character of a
1781            valid match.  We advance the state of our knowledge by
1782            looking at that character and seeing whether it indeed
1783            matches the last character of the pattern.  If it does, we
1784            take a closer look.  If it does not, we move our pointer (to
1785            putative last characters) as far as is logically possible.
1786            This amount of movement, which I call a stride, will be the
1787            length of the pattern if the actual character appears nowhere
1788            in the pattern, otherwise it will be the distance from the
1789            last occurrence of that character to the end of the pattern.
1790            As a coding trick, an enormous stride is coded into the table
1791            for characters that match the last character.  This allows
1792            use of only a single test, a test for having gone past the
1793            end of the permissible match region, to test for both
1794            possible matches (when the stride goes past the end
1795            immediately) and failure to match (where you get nudged past
1796            the end one stride at a time).
1797
1798            Here we make a "mickey mouse" BM table.  The stride of the
1799            search is determined only by the last character of the
1800            putative match.  If that character does not match, we will
1801            stride the proper distance to propose a match that
1802            superimposes it on the last instance of a character that
1803            matches it (per trt), or misses it entirely if there is
1804            none. */
1805
1806         dirlen = len * direction;
1807         infinity = dirlen - (lim + pos + len + len) * direction;
1808         /* Record position after the end of the pattern.  */
1809         pat_end = base_pat + len;
1810         if (direction < 0)
1811                 base_pat = pat_end - 1;
1812         BM_tab_base = BM_tab;
1813         BM_tab += REGEXP_FASTMAP_SIZE;
1814         j = dirlen;             /* to get it in a register */
1815         /* A character that does not appear in the pattern induces a
1816            stride equal to the pattern length. */
1817         while (BM_tab_base != BM_tab) {
1818                 *--BM_tab = j;
1819                 *--BM_tab = j;
1820                 *--BM_tab = j;
1821                 *--BM_tab = j;
1822         }
1823         /* We use this for translation, instead of TRT itself.  We
1824            fill this in to handle the characters that actually occur
1825            in the pattern.  Others don't matter anyway!  */
1826         xzero(simple_translate);
1827         for (i = 0; i < REGEXP_FASTMAP_SIZE; i++)
1828                 simple_translate[i] = (Bufbyte) i;
1829         i = 0;
1830         while (i != infinity) {
1831                 Bufbyte *ptr = base_pat + i;
1832                 i += direction;
1833                 if (i == dirlen)
1834                         i = infinity;
1835                 if (!NILP(trt)) {
1836 #ifdef MULE
1837                         Emchar ch, untranslated;
1838                         int this_translated = 1;
1839
1840                         /* Is *PTR the last byte of a character?  */
1841                         if (pat_end - ptr == 1 || BUFBYTE_FIRST_BYTE_P(ptr[1])) {
1842                                 Bufbyte *charstart = ptr;
1843                                 while (!BUFBYTE_FIRST_BYTE_P(*charstart))
1844                                         charstart--;
1845                                 untranslated = charptr_emchar(charstart);
1846                                 if (charset_base ==
1847                                     (untranslated & ~CHAR_FIELD3_MASK)) {
1848                                         ch = TRANSLATE(trt, untranslated);
1849                                         if (!BUFBYTE_FIRST_BYTE_P(*ptr)) {
1850                                                 translate_prev_byte = ptr[-1];
1851                                                 if (!BUFBYTE_FIRST_BYTE_P
1852                                                     (translate_prev_byte))
1853                                                         translate_anteprev_byte
1854                                                             = ptr[-2];
1855                                         }
1856                                 } else {
1857                                         this_translated = 0;
1858                                         ch = *ptr;
1859                                 }
1860                         } else {
1861                                 ch = *ptr;
1862                                 this_translated = 0;
1863                         }
1864                         if (ch > REGEXP_FASTMAP_SIZE)
1865                                 j = ((unsigned char)ch | 0200);
1866                         else
1867                                 j = (unsigned char)ch;
1868
1869                         if (i == infinity)
1870                                 stride_for_teases = BM_tab[j];
1871                         BM_tab[j] = dirlen - i;
1872                         /* A translation table is accompanied by its inverse --
1873                            see comment following downcase_table for details */
1874                         if (this_translated) {
1875                                 Emchar starting_ch = ch;
1876                                 EMACS_INT starting_j = j;
1877                                 while (1) {
1878                                         ch = TRANSLATE(inverse_trt, ch);
1879                                         if (ch > REGEXP_FASTMAP_SIZE)
1880                                                 j = ((unsigned char)ch | 0200);
1881                                         else
1882                                                 j = (unsigned char)ch;
1883
1884                                         /* For all the characters that map into CH,
1885                                            set up simple_translate to map the last byte
1886                                            into STARTING_J.  */
1887                                         simple_translate[j] = starting_j;
1888                                         if (ch == starting_ch)
1889                                                 break;
1890                                         BM_tab[j] = dirlen - i;
1891                                 }
1892                         }
1893 #else
1894                         EMACS_INT k;
1895                         j = *ptr;
1896                         k = (j = TRANSLATE(trt, j));
1897                         if (i == infinity)
1898                                 stride_for_teases = BM_tab[j];
1899                         BM_tab[j] = dirlen - i;
1900                         /* A translation table is accompanied by its inverse --
1901                            see comment following downcase_table for details */
1902
1903                         while ((j = TRANSLATE(inverse_trt, j)) != k) {
1904                                 simple_translate[j] = (Bufbyte) k;
1905                                 BM_tab[j] = dirlen - i;
1906                         }
1907 #endif
1908                 } else {
1909                         j = *ptr;
1910
1911                         if (i == infinity)
1912                                 stride_for_teases = BM_tab[j];
1913                         BM_tab[j] = dirlen - i;
1914                 }
1915                 /* stride_for_teases tells how much to stride if we get a
1916                    match on the far character but are subsequently
1917                    disappointed, by recording what the stride would have been
1918                    for that character if the last character had been
1919                    different. */
1920         }
1921         infinity = dirlen - infinity;
1922         pos += dirlen - ((direction > 0) ? direction : 0);
1923         /* loop invariant - pos points at where last char (first char if
1924            reverse) of pattern would align in a possible match.  */
1925         while (n != 0) {
1926                 Bytind tail_end;
1927                 Bufbyte *tail_end_ptr;
1928                 /* It's been reported that some (broken) compiler thinks
1929                    that Boolean expressions in an arithmetic context are
1930                    unsigned.  Using an explicit ?1:0 prevents this.  */
1931                 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
1932                         return n * (0 - direction);
1933                 /* First we do the part we can by pointers (maybe
1934                    nothing) */
1935                 QUIT;
1936                 pat = base_pat;
1937                 limit = pos - dirlen + direction;
1938                 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1939                    have changed.  See buffer.h. */
1940                 limit = ((direction > 0)
1941                          ? BI_BUF_CEILING_OF(buf, limit) - 1
1942                          : BI_BUF_FLOOR_OF(buf, limit + 1));
1943                 /* LIMIT is now the last (not beyond-last!) value POS can
1944                    take on without hitting edge of buffer or the gap.  */
1945                 limit = ((direction > 0)
1946                          ? min(lim - 1, min(limit, pos + 20000))
1947                          : max(lim, max(limit, pos - 20000)));
1948                 tail_end = BI_BUF_CEILING_OF(buf, pos);
1949                 tail_end_ptr = BI_BUF_BYTE_ADDRESS(buf, tail_end);
1950
1951                 if ((limit - pos) * direction > 20) {
1952                         p_limit = BI_BUF_BYTE_ADDRESS(buf, limit);
1953                         ptr2 = (cursor = BI_BUF_BYTE_ADDRESS(buf, pos));
1954                         /* In this loop, pos + cursor - ptr2 is the surrogate
1955                            for pos */
1956                         while (1) {     /* use one cursor setting as long as i can */
1957                                 if (direction > 0) {    /* worth duplicating */
1958                                         /* Use signed comparison if appropriate to make
1959                                            cursor+infinity sure to be > p_limit.
1960                                            Assuming that the buffer lies in a range of
1961                                            addresses that are all "positive" (as ints)
1962                                            or all "negative", either kind of comparison
1963                                            will work as long as we don't step by
1964                                            infinity.  So pick the kind that works when
1965                                            we do step by infinity.  */
1966                                         if ((EMACS_INT) (p_limit + infinity) >
1967                                             (EMACS_INT) p_limit)
1968                                                 while ((EMACS_INT) cursor <=
1969                                                        (EMACS_INT) p_limit)
1970                                                         cursor +=
1971                                                             BM_tab[*cursor];
1972                                         else
1973                                                 while ((EMACS_UINT) cursor <=
1974                                                        (EMACS_UINT) p_limit)
1975                                                         cursor +=
1976                                                             BM_tab[*cursor];
1977                                 } else {
1978                                         if ((EMACS_INT) (p_limit + infinity) <
1979                                             (EMACS_INT) p_limit)
1980                                                 while ((EMACS_INT) cursor >=
1981                                                        (EMACS_INT) p_limit)
1982                                                         cursor +=
1983                                                             BM_tab[*cursor];
1984                                         else
1985                                                 while ((EMACS_UINT) cursor >=
1986                                                        (EMACS_UINT) p_limit)
1987                                                         cursor +=
1988                                                             BM_tab[*cursor];
1989                                 }
1990                                 /* If you are here, cursor is beyond the end of the
1991                                    searched region.  This can happen if you match on
1992                                    the far character of the pattern, because the
1993                                    "stride" of that character is infinity, a number
1994                                    able to throw you well beyond the end of the
1995                                    search.  It can also happen if you fail to match
1996                                    within the permitted region and would otherwise
1997                                    try a character beyond that region */
1998                                 if ((cursor - p_limit) * direction <= len)
1999                                         break;  /* a small overrun is genuine */
2000                                 cursor -= infinity;     /* large overrun = hit */
2001                                 i = dirlen - direction;
2002                                 if (!NILP(trt)) {
2003                                         while ((i -=
2004                                                 direction) + direction != 0) {
2005 #ifdef MULE
2006                                                 Emchar ch;
2007                                                 cursor -= direction;
2008                                                 /* Translate only the last byte of a character.  */
2009                                                 if ((cursor == tail_end_ptr
2010                                                      ||
2011                                                      BUFBYTE_FIRST_BYTE_P(cursor
2012                                                                           [1]))
2013                                                     &&
2014                                                     (BUFBYTE_FIRST_BYTE_P
2015                                                      (cursor[0])
2016                                                      || (translate_prev_byte ==
2017                                                          cursor[-1]
2018                                                          &&
2019                                                          (BUFBYTE_FIRST_BYTE_P
2020                                                           (translate_prev_byte)
2021                                                           ||
2022                                                           translate_anteprev_byte
2023                                                           == cursor[-2]))))
2024                                                         ch = simple_translate
2025                                                             [*cursor];
2026                                                 else
2027                                                         ch = *cursor;
2028                                                 if (pat[i] != ch)
2029                                                         break;
2030 #else
2031                                                 if (pat[i] !=
2032                                                     TRANSLATE(trt,
2033                                                               *(cursor -=
2034                                                                 direction)))
2035                                                         break;
2036 #endif
2037                                         }
2038                                 } else {
2039                                         while ((i -=
2040                                                 direction) + direction != 0)
2041                                                 if (pat[i] !=
2042                                                     *(cursor -= direction))
2043                                                         break;
2044                                 }
2045                                 cursor += dirlen - i - direction;       /* fix cursor */
2046                                 if (i + direction == 0) {
2047                                         cursor -= direction;
2048
2049                                         {
2050                                                 Bytind bytstart =
2051                                                     (pos + cursor - ptr2 +
2052                                                      ((direction > 0)
2053                                                       ? 1 - len : 0));
2054                                                 Bufpos bufstart =
2055                                                     bytind_to_bufpos(buf,
2056                                                                      bytstart);
2057                                                 Bufpos bufend =
2058                                                     bytind_to_bufpos(buf,
2059                                                                      bytstart +
2060                                                                      len);
2061
2062                                                 set_search_regs(buf, bufstart,
2063                                                                 bufend -
2064                                                                 bufstart);
2065                                                 clear_unused_search_regs
2066                                                     (&search_regs, 0);
2067                                         }
2068
2069                                         if ((n -= direction) != 0)
2070                                                 cursor += dirlen;       /* to resume search */
2071                                         else
2072                                                 return ((direction > 0)
2073                                                         ? search_regs.
2074                                                         end[0] : search_regs.
2075                                                         start[0]);
2076                                 } else
2077                                         cursor += stride_for_teases;    /* <sigh> we lose -  */
2078                         }
2079                         pos += cursor - ptr2;
2080                 } else
2081                         /* Now we'll pick up a clump that has to be done the hard
2082                            way because it covers a discontinuity */
2083                 {
2084                         /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
2085                            have changed.  See buffer.h. */
2086                         limit = ((direction > 0)
2087                                  ? BI_BUF_CEILING_OF(buf, pos - dirlen + 1) - 1
2088                                  : BI_BUF_FLOOR_OF(buf, pos - dirlen));
2089                         limit = ((direction > 0)
2090                                  ? min(limit + len, lim - 1)
2091                                  : max(limit - len, lim));
2092                         /* LIMIT is now the last value POS can have
2093                            and still be valid for a possible match.  */
2094                         while (1) {
2095                                 /* This loop can be coded for space rather than
2096                                    speed because it will usually run only once.
2097                                    (the reach is at most len + 21, and typically
2098                                    does not exceed len) */
2099                                 while ((limit - pos) * direction >= 0)
2100                                         /* *not* BI_BUF_FETCH_CHAR.  We are working here
2101                                            with bytes, not characters. */
2102                                         pos +=
2103                                             BM_tab[*BI_BUF_BYTE_ADDRESS
2104                                                    (buf, pos)];
2105                                 /* now run the same tests to distinguish going off
2106                                    the end, a match or a phony match. */
2107                                 if ((pos - limit) * direction <= len)
2108                                         break;  /* ran off the end */
2109                                 /* Found what might be a match.
2110                                    Set POS back to last (first if reverse) char pos.  */
2111                                 pos -= infinity;
2112                                 i = dirlen - direction;
2113                                 while ((i -= direction) + direction != 0) {
2114 #ifdef MULE
2115                                         Emchar ch;
2116                                         Bufbyte *ptr;
2117 #endif
2118                                         pos -= direction;
2119 #ifdef MULE
2120                                         ptr = BI_BUF_BYTE_ADDRESS(buf, pos);
2121                                         if ((ptr == tail_end_ptr
2122                                              || BUFBYTE_FIRST_BYTE_P(ptr[1]))
2123                                             && (BUFBYTE_FIRST_BYTE_P(ptr[0])
2124                                                 || (translate_prev_byte ==
2125                                                     ptr[-1]
2126                                                     &&
2127                                                     (BUFBYTE_FIRST_BYTE_P
2128                                                      (translate_prev_byte)
2129                                                      || translate_anteprev_byte
2130                                                      == ptr[-2]))))
2131                                                 ch = simple_translate[*ptr];
2132                                         else
2133                                                 ch = *ptr;
2134                                         if (pat[i] != ch)
2135                                                 break;
2136
2137 #else
2138                                         if (pat[i] != TRANSLATE(trt,
2139                                                                 *BI_BUF_BYTE_ADDRESS
2140                                                                 (buf, pos)))
2141                                                 break;
2142 #endif
2143                                 }
2144                                 /* Above loop has moved POS part or all the way back
2145                                    to the first char pos (last char pos if reverse).
2146                                    Set it once again at the last (first if reverse)
2147                                    char.  */
2148                                 pos += dirlen - i - direction;
2149                                 if (i + direction == 0) {
2150                                         pos -= direction;
2151
2152                                         {
2153                                                 Bytind bytstart = (pos +
2154                                                                    ((direction >
2155                                                                      0)
2156                                                                     ? 1 -
2157                                                                     len : 0));
2158                                                 Bufpos bufstart =
2159                                                     bytind_to_bufpos(buf,
2160                                                                      bytstart);
2161                                                 Bufpos bufend =
2162                                                     bytind_to_bufpos(buf,
2163                                                                      bytstart +
2164                                                                      len);
2165
2166                                                 set_search_regs(buf, bufstart,
2167                                                                 bufend -
2168                                                                 bufstart);
2169                                                 clear_unused_search_regs
2170                                                     (&search_regs, 0);
2171                                         }
2172
2173                                         if ((n -= direction) != 0)
2174                                                 pos += dirlen;  /* to resume search */
2175                                         else
2176                                                 return ((direction > 0)
2177                                                         ? search_regs.
2178                                                         end[0] : search_regs.
2179                                                         start[0]);
2180                                 } else
2181                                         pos += stride_for_teases;
2182                         }
2183                 }
2184                 /* We have done one clump.  Can we continue? */
2185                 if ((lim - pos) * direction < 0)
2186                         return (0 - n) * direction;
2187         }
2188         return bytind_to_bufpos(buf, pos);
2189 }
2190
2191 /* Record the whole-match data (beginning BEG and end BEG + LEN) and the
2192    buffer for a match just found.  */
2193
2194 static void set_search_regs(struct buffer *buf, Bufpos beg, Charcount len)
2195 {
2196         /* This function has been Mule-ized. */
2197         /* Make sure we have registers in which to store
2198            the match position.  */
2199         if (search_regs.num_regs == 0) {
2200                 search_regs.start = xnew_atomic(regoff_t);
2201                 search_regs.end = xnew_atomic(regoff_t);
2202                 search_regs.num_regs = 1;
2203         }
2204
2205         search_regs.start[0] = beg;
2206         search_regs.end[0] = beg + len;
2207         XSETBUFFER(last_thing_searched, buf);
2208 }
2209
2210 /* Clear unused search registers so match data will be null.
2211    REGP is a pointer to the register structure to clear, usually the global
2212    search_regs.
2213    NO_SUB is the number of subexpressions to allow for.  (Does not count
2214    the whole match, ie, for a string search NO_SUB == 0.)
2215    It is an error if NO_SUB > REGP.num_regs - 1. */
2216
2217 static void clear_unused_search_regs(struct re_registers *regp, int no_sub)
2218 {
2219         /* This function has been Mule-ized. */
2220         int i;
2221
2222         assert(no_sub >= 0 && no_sub < regp->num_regs);
2223         for (i = no_sub + 1; i < regp->num_regs; i++)
2224                 regp->start[i] = regp->end[i] = -1;
2225 }
2226 \f
2227 /* Given a string of words separated by word delimiters,
2228    compute a regexp that matches those exact words
2229    separated by arbitrary punctuation.  */
2230
2231 static Lisp_Object wordify(Lisp_Object buffer, Lisp_Object string)
2232 {
2233         Charcount i, len;
2234         EMACS_INT punct_count = 0, word_count = 0;
2235         struct buffer *buf = decode_buffer(buffer, 0);
2236         Lisp_Char_Table *syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
2237
2238         CHECK_STRING(string);
2239         len = XSTRING_CHAR_LENGTH(string);
2240
2241         for (i = 0; i < len; i++)
2242                 if (!WORD_SYNTAX_P
2243                     (syntax_table, string_char(XSTRING(string), i))) {
2244                         punct_count++;
2245                         if (i > 0 && WORD_SYNTAX_P(syntax_table,
2246                                                    string_char(XSTRING(string),
2247                                                                i - 1)))
2248                                 word_count++;
2249                 }
2250         if (WORD_SYNTAX_P(syntax_table, string_char(XSTRING(string), len - 1)))
2251                 word_count++;
2252         if (!word_count)
2253                 return build_string("");
2254
2255         {
2256                 /* The following value is an upper bound on the amount of storage we
2257                    need.  In non-Mule, it is exact. */
2258                 Bufbyte *storage =
2259                     (Bufbyte *) alloca(XSTRING_LENGTH(string) - punct_count +
2260                                        5 * (word_count - 1) + 4);
2261                 Bufbyte *o = storage;
2262
2263                 *o++ = '\\';
2264                 *o++ = 'b';
2265
2266                 for (i = 0; i < len; i++) {
2267                         Emchar ch = string_char(XSTRING(string), i);
2268
2269                         if (WORD_SYNTAX_P(syntax_table, ch))
2270                                 o += set_charptr_emchar(o, ch);
2271                         else if (i > 0
2272                                  && WORD_SYNTAX_P(syntax_table,
2273                                                   string_char(XSTRING(string),
2274                                                               i - 1))
2275                                  && --word_count) {
2276                                 *o++ = '\\';
2277                                 *o++ = 'W';
2278                                 *o++ = '\\';
2279                                 *o++ = 'W';
2280                                 *o++ = '*';
2281                         }
2282                 }
2283
2284                 *o++ = '\\';
2285                 *o++ = 'b';
2286
2287                 return make_string(storage, o - storage);
2288         }
2289 }
2290 \f
2291 DEFUN("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ",  /*
2292 Search backward from point for STRING.
2293 Set point to the beginning of the occurrence found, and return point.
2294
2295 Optional second argument LIMIT bounds the search; it is a buffer
2296 position.  The match found must not extend before that position.
2297 The value nil is equivalent to (point-min).
2298
2299 Optional third argument NOERROR, if t, means just return nil (no
2300 error) if the search fails.  If neither nil nor t, set point to LIMIT
2301 and return nil.
2302
2303 Optional fourth argument COUNT is a repeat count--search for
2304 successive occurrences.
2305
2306 Optional fifth argument BUFFER specifies the buffer to search in and
2307 defaults to the current buffer.
2308
2309 See also the functions `match-beginning', `match-end' and `replace-match'.
2310 */
2311       (string, limit, noerror, count, buffer))
2312 {
2313         return search_command(string, limit, noerror, count, buffer, -1, 0, 0);
2314 }
2315
2316 DEFUN("search-forward", Fsearch_forward, 1, 5, "sSearch: ",     /*
2317 Search forward from point for STRING.
2318 Set point to the end of the occurrence found, and return point.
2319
2320 Optional second argument LIMIT bounds the search; it is a buffer
2321 position.  The match found must not extend after that position.  The
2322 value nil is equivalent to (point-max).
2323
2324 Optional third argument NOERROR, if t, means just return nil (no
2325 error) if the search fails.  If neither nil nor t, set point to LIMIT
2326 and return nil.
2327
2328 Optional fourth argument COUNT is a repeat count--search for
2329 successive occurrences.
2330
2331 Optional fifth argument BUFFER specifies the buffer to search in and
2332 defaults to the current buffer.
2333
2334 See also the functions `match-beginning', `match-end' and `replace-match'.
2335 */
2336       (string, limit, noerror, count, buffer))
2337 {
2338         return search_command(string, limit, noerror, count, buffer, 1, 0, 0);
2339 }
2340
2341 DEFUN("word-search-backward", Fword_search_backward, 1, 5, "sWord search backward: ",   /*
2342 Search backward from point for STRING, ignoring differences in punctuation.
2343 Set point to the beginning of the occurrence found, and return point.
2344
2345 Optional second argument LIMIT bounds the search; it is a buffer
2346 position.  The match found must not extend before that position.
2347 The value nil is equivalent to (point-min).
2348
2349 Optional third argument NOERROR, if t, means just return nil (no
2350 error) if the search fails.  If neither nil nor t, set point to LIMIT
2351 and return nil.
2352
2353 Optional fourth argument COUNT is a repeat count--search for
2354 successive occurrences.
2355
2356 Optional fifth argument BUFFER specifies the buffer to search in and
2357 defaults to the current buffer.
2358
2359 See also the functions `match-beginning', `match-end' and `replace-match'.
2360 */
2361       (string, limit, noerror, count, buffer))
2362 {
2363         return search_command(wordify(buffer, string), limit, noerror, count,
2364                               buffer, -1, 1, 0);
2365 }
2366
2367 DEFUN("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ",      /*
2368 Search forward from point for STRING, ignoring differences in punctuation.
2369 Set point to the end of the occurrence found, and return point.
2370
2371 Optional second argument LIMIT bounds the search; it is a buffer
2372 position.  The match found must not extend after that position.  The
2373 value nil is equivalent to (point-max).
2374
2375 Optional third argument NOERROR, if t, means just return nil (no
2376 error) if the search fails.  If neither nil nor t, set point to LIMIT
2377 and return nil.
2378
2379 Optional fourth argument COUNT is a repeat count--search for
2380 successive occurrences.
2381
2382 Optional fifth argument BUFFER specifies the buffer to search in and
2383 defaults to the current buffer.
2384
2385 See also the functions `match-beginning', `match-end' and `replace-match'.
2386 */
2387       (string, limit, noerror, count, buffer))
2388 {
2389         return search_command(wordify(buffer, string), limit, noerror, count,
2390                               buffer, 1, 1, 0);
2391 }
2392
2393 DEFUN("re-search-backward", Fre_search_backward, 1, 5, "sRE search backward: ", /*
2394 Search backward from point for match for regular expression REGEXP.
2395 Set point to the beginning of the match, and return point.
2396 The match found is the one starting last in the buffer
2397 and yet ending before the origin of the search.
2398
2399 Optional second argument LIMIT bounds the search; it is a buffer
2400 position.  The match found must not extend before that position.
2401 The value nil is equivalent to (point-min).
2402
2403 Optional third argument NOERROR, if t, means just return nil (no
2404 error) if the search fails.  If neither nil nor t, set point to LIMIT
2405 and return nil.
2406
2407 Optional fourth argument COUNT is a repeat count--search for
2408 successive occurrences.
2409
2410 Optional fifth argument BUFFER specifies the buffer to search in and
2411 defaults to the current buffer.
2412
2413 See also the functions `match-beginning', `match-end' and `replace-match'.
2414 */
2415       (regexp, limit, noerror, count, buffer))
2416 {
2417         return search_command(regexp, limit, noerror, count, buffer, -1, 1, 0);
2418 }
2419
2420 DEFUN("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ",    /*
2421 Search forward from point for regular expression REGEXP.
2422 Set point to the end of the occurrence found, and return point.
2423
2424 Optional second argument LIMIT bounds the search; it is a buffer
2425 position.  The match found must not extend after that position.  The
2426 value nil is equivalent to (point-max).
2427
2428 Optional third argument NOERROR, if t, means just return nil (no
2429 error) if the search fails.  If neither nil nor t, set point to LIMIT
2430 and return nil.
2431
2432 Optional fourth argument COUNT is a repeat count--search for
2433 successive occurrences.
2434
2435 Optional fifth argument BUFFER specifies the buffer to search in and
2436 defaults to the current buffer.
2437
2438 See also the functions `match-beginning', `match-end' and `replace-match'.
2439 */
2440       (regexp, limit, noerror, count, buffer))
2441 {
2442         return search_command(regexp, limit, noerror, count, buffer, 1, 1, 0);
2443 }
2444
2445 DEFUN("posix-search-backward", Fposix_search_backward, 1, 5, "sPosix search backward: ",        /*
2446 Search backward from point for match for regular expression REGEXP.
2447 Find the longest match in accord with Posix regular expression rules.
2448 Set point to the beginning of the match, and return point.
2449 The match found is the one starting last in the buffer
2450 and yet ending before the origin of the search.
2451
2452 Optional second argument LIMIT bounds the search; it is a buffer
2453 position.  The match found must not extend before that position.
2454 The value nil is equivalent to (point-min).
2455
2456 Optional third argument NOERROR, if t, means just return nil (no
2457 error) if the search fails.  If neither nil nor t, set point to LIMIT
2458 and return nil.
2459
2460 Optional fourth argument COUNT is a repeat count--search for
2461 successive occurrences.
2462
2463 Optional fifth argument BUFFER specifies the buffer to search in and
2464 defaults to the current buffer.
2465
2466 See also the functions `match-beginning', `match-end' and `replace-match'.
2467 */
2468       (regexp, limit, noerror, count, buffer))
2469 {
2470         return search_command(regexp, limit, noerror, count, buffer, -1, 1, 1);
2471 }
2472
2473 DEFUN("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ",   /*
2474 Search forward from point for regular expression REGEXP.
2475 Find the longest match in accord with Posix regular expression rules.
2476 Set point to the end of the occurrence found, and return point.
2477
2478 Optional second argument LIMIT bounds the search; it is a buffer
2479 position.  The match found must not extend after that position.  The
2480 value nil is equivalent to (point-max).
2481
2482 Optional third argument NOERROR, if t, means just return nil (no
2483 error) if the search fails.  If neither nil nor t, set point to LIMIT
2484 and return nil.
2485
2486 Optional fourth argument COUNT is a repeat count--search for
2487 successive occurrences.
2488
2489 Optional fifth argument BUFFER specifies the buffer to search in and
2490 defaults to the current buffer.
2491
2492 See also the functions `match-beginning', `match-end' and `replace-match'.
2493 */
2494       (regexp, limit, noerror, count, buffer))
2495 {
2496         return search_command(regexp, limit, noerror, count, buffer, 1, 1, 1);
2497 }
2498 \f
2499 static Lisp_Object free_created_dynarrs(Lisp_Object cons)
2500 {
2501         Dynarr_free(get_opaque_ptr(XCAR(cons)));
2502         Dynarr_free(get_opaque_ptr(XCDR(cons)));
2503         free_opaque_ptr(XCAR(cons));
2504         free_opaque_ptr(XCDR(cons));
2505         free_cons(XCONS(cons));
2506         return Qnil;
2507 }
2508
2509 DEFUN("replace-match", Freplace_match, 1, 5, 0, /*
2510 Replace text matched by last search with REPLACEMENT.
2511 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2512 Otherwise maybe capitalize the whole text, or maybe just word initials,
2513 based on the replaced text.
2514 If the replaced text has only capital letters
2515 and has at least one multiletter word, convert REPLACEMENT to all caps.
2516 If the replaced text has at least one word starting with a capital letter,
2517 then capitalize each word in REPLACEMENT.
2518
2519 If third arg LITERAL is non-nil, insert REPLACEMENT literally.
2520 Otherwise treat `\\' as special:
2521 `\\&' in REPLACEMENT means substitute original matched text.
2522 `\\N' means substitute what matched the Nth `\\(...\\)'.
2523 If Nth parens didn't match, substitute nothing.
2524 `\\\\' means insert one `\\'.
2525 `\\u' means upcase the next character.
2526 `\\l' means downcase the next character.
2527 `\\U' means begin upcasing all following characters.
2528 `\\L' means begin downcasing all following characters.
2529 `\\E' means terminate the effect of any `\\U' or `\\L'.
2530 Case changes made with `\\u', `\\l', `\\U', and `\\L' override
2531 all other case changes that may be made in the replaced text.
2532 FIXEDCASE and LITERAL are optional arguments.
2533 Leaves point at end of replacement text.
2534
2535 The optional fourth argument STRING can be a string to modify.
2536 In that case, this function creates and returns a new string
2537 which is made by replacing the part of STRING that was matched.
2538 When fourth argument is a string, fifth argument STRBUFFER specifies
2539 the buffer to be used for syntax-table and case-table lookup and
2540 defaults to the current buffer.  When fourth argument is not a string,
2541 the buffer that the match occurred in has automatically been remembered
2542 and you do not need to specify it.
2543
2544 When fourth argument is nil, STRBUFFER specifies a subexpression of
2545 the match.  It says to replace just that subexpression instead of the
2546 whole match.  This is useful only after a regular expression search or
2547 match since only regular expressions have distinguished subexpressions.
2548 */
2549       (replacement, fixedcase, literal, string, strbuffer))
2550 {
2551         /* This function has been Mule-ized. */
2552         /* This function can GC */
2553         enum { nochange, all_caps, cap_initial } case_action;
2554         Bufpos pos, last;
2555         int some_multiletter_word;
2556         int some_lowercase;
2557         int some_uppercase;
2558         int some_nonuppercase_initial;
2559         Emchar c, prevc;
2560         Charcount inslen;
2561         struct buffer *buf;
2562         Lisp_Char_Table *syntax_table;
2563         int mc_count;
2564         Lisp_Object buffer;
2565         int_dynarr *ul_action_dynarr = 0;
2566         int_dynarr *ul_pos_dynarr = 0;
2567         int sub = 0;
2568         int speccount;
2569
2570         CHECK_STRING(replacement);
2571
2572         if (!NILP(string)) {
2573                 CHECK_STRING(string);
2574                 if (!EQ(last_thing_searched, Qt))
2575                         error("last thing matched was not a string");
2576                 /* If the match data
2577                    were abstracted into a special "match data" type instead
2578                    of the typical half-assed "let the implementation be
2579                    visible" form it's in, we could extend it to include
2580                    the last string matched and the buffer used for that
2581                    matching.  But of course we can't change it as it is. */
2582                 buf = decode_buffer(strbuffer, 0);
2583                 XSETBUFFER(buffer, buf);
2584         } else {
2585                 if (!NILP(strbuffer)) {
2586                         CHECK_INT(strbuffer);
2587                         sub = XINT(strbuffer);
2588                         if (sub < 0 || sub >= (int)search_regs.num_regs)
2589                                 args_out_of_range(strbuffer,
2590                                                   make_int(search_regs.
2591                                                            num_regs));
2592                 }
2593                 if (!BUFFERP(last_thing_searched))
2594                         error("last thing matched was not a buffer");
2595                 buffer = last_thing_searched;
2596                 buf = XBUFFER(buffer);
2597         }
2598
2599         syntax_table = XCHAR_TABLE(buf->mirror_syntax_table);
2600
2601         case_action = nochange; /* We tried an initialization */
2602         /* but some C compilers blew it */
2603
2604         if (search_regs.num_regs == 0)
2605                 error("replace-match called before any match found");
2606
2607         if (NILP(string)) {
2608                 if (search_regs.start[sub] < BUF_BEGV(buf)
2609                     || search_regs.start[sub] > search_regs.end[sub]
2610                     || search_regs.end[sub] > BUF_ZV(buf))
2611                         args_out_of_range(make_int(search_regs.start[sub]),
2612                                           make_int(search_regs.end[sub]));
2613         } else {
2614                 if (search_regs.start[0] < 0
2615                     || search_regs.start[0] > search_regs.end[0]
2616                     || search_regs.end[0] > XSTRING_CHAR_LENGTH(string))
2617                         args_out_of_range(make_int(search_regs.start[0]),
2618                                           make_int(search_regs.end[0]));
2619         }
2620
2621         if (NILP(fixedcase)) {
2622                 /* Decide how to casify by examining the matched text. */
2623
2624                 last = search_regs.end[sub];
2625                 prevc = '\n';
2626                 case_action = all_caps;
2627
2628                 /* some_multiletter_word is set nonzero if any original word
2629                    is more than one letter long. */
2630                 some_multiletter_word = 0;
2631                 some_lowercase = 0;
2632                 some_nonuppercase_initial = 0;
2633                 some_uppercase = 0;
2634
2635                 for (pos = search_regs.start[sub]; pos < last; pos++) {
2636                         if (NILP(string))
2637                                 c = BUF_FETCH_CHAR(buf, pos);
2638                         else
2639                                 c = string_char(XSTRING(string), pos);
2640
2641                         if (LOWERCASEP(buf, c)) {
2642                                 /* Cannot be all caps if any original char is lower case */
2643
2644                                 some_lowercase = 1;
2645                                 if (!WORD_SYNTAX_P(syntax_table, prevc))
2646                                         some_nonuppercase_initial = 1;
2647                                 else
2648                                         some_multiletter_word = 1;
2649                         } else if (!NOCASEP(buf, c)) {
2650                                 some_uppercase = 1;
2651                                 if (!WORD_SYNTAX_P(syntax_table, prevc)) ;
2652                                 else
2653                                         some_multiletter_word = 1;
2654                         } else {
2655                                 /* If the initial is a caseless word constituent,
2656                                    treat that like a lowercase initial.  */
2657                                 if (!WORD_SYNTAX_P(syntax_table, prevc))
2658                                         some_nonuppercase_initial = 1;
2659                         }
2660
2661                         prevc = c;
2662                 }
2663
2664                 /* Convert to all caps if the old text is all caps
2665                    and has at least one multiletter word.  */
2666                 if (!some_lowercase && some_multiletter_word)
2667                         case_action = all_caps;
2668                 /* Capitalize each word, if the old text has all capitalized words.  */
2669                 else if (!some_nonuppercase_initial && some_multiletter_word)
2670                         case_action = cap_initial;
2671                 else if (!some_nonuppercase_initial && some_uppercase)
2672                         /* Should x -> yz, operating on X, give Yz or YZ?
2673                            We'll assume the latter.  */
2674                         case_action = all_caps;
2675                 else
2676                         case_action = nochange;
2677         }
2678
2679         /* Do replacement in a string.  */
2680         if (!NILP(string)) {
2681                 Lisp_Object before, after;
2682
2683                 speccount = specpdl_depth();
2684                 before =
2685                     Fsubstring(string, Qzero, make_int(search_regs.start[0]));
2686                 after = Fsubstring(string, make_int(search_regs.end[0]), Qnil);
2687
2688                 /* Do case substitution into REPLACEMENT if desired.  */
2689                 if (NILP(literal)) {
2690                         Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2691                         Charcount strpos;
2692                         /* XEmacs change: rewrote this loop somewhat to make it
2693                            cleaner.  Also added \U, \E, etc. */
2694                         Charcount literal_start = 0;
2695                         /* We build up the substituted string in ACCUM.  */
2696                         Lisp_Object accum;
2697
2698                         accum = Qnil;
2699
2700                         /* OK, the basic idea here is that we scan through the
2701                            replacement string until we find a backslash, which
2702                            represents a substring of the original string to be
2703                            substituted.  We then append onto ACCUM the literal
2704                            text before the backslash (LASTPOS marks the
2705                            beginning of this) followed by the substring of the
2706                            original string that needs to be inserted. */
2707                         for (strpos = 0; strpos < stlen; strpos++) {
2708                                 /* If LITERAL_END is set, we've encountered a backslash
2709                                    (the end of literal text to be inserted). */
2710                                 Charcount literal_end = -1;
2711                                 /* If SUBSTART is set, we need to also insert the
2712                                    text from SUBSTART to SUBEND in the original string. */
2713                                 Charcount substart = -1;
2714                                 Charcount subend = -1;
2715
2716                                 c = string_char(XSTRING(replacement), strpos);
2717                                 if (c == '\\' && strpos < stlen - 1) {
2718                                         c = string_char(XSTRING(replacement),
2719                                                         ++strpos);
2720                                         if (c == '&') {
2721                                                 literal_end = strpos - 1;
2722                                                 substart = search_regs.start[0];
2723                                                 subend = search_regs.end[0];
2724                                         } else if (c >= '1' && c <= '9' &&
2725                                                    c <=
2726                                                    search_regs.num_regs + '0') {
2727                                                 if (search_regs.
2728                                                     start[c - '0'] >= 0) {
2729                                                         literal_end =
2730                                                             strpos - 1;
2731                                                         substart =
2732                                                             search_regs.
2733                                                             start[c - '0'];
2734                                                         subend =
2735                                                             search_regs.end[c -
2736                                                                             '0'];
2737                                                 }
2738                                         } else if (c == 'U' || c == 'u'
2739                                                    || c == 'L' || c == 'l'
2740                                                    || c == 'E') {
2741                                                 /* Keep track of all case changes requested, but don't
2742                                                    make them now.  Do them later so we override
2743                                                    everything else. */
2744                                                 if (!ul_pos_dynarr) {
2745                                                         ul_pos_dynarr =
2746                                                             Dynarr_new(int);
2747                                                         ul_action_dynarr =
2748                                                             Dynarr_new(int);
2749                                                         record_unwind_protect
2750                                                             (free_created_dynarrs,
2751                                                              noseeum_cons
2752                                                              (make_opaque_ptr
2753                                                               (ul_pos_dynarr),
2754                                                               make_opaque_ptr
2755                                                               (ul_action_dynarr)));
2756                                                 }
2757                                                 literal_end = strpos - 1;
2758                                                 Dynarr_add(ul_pos_dynarr,
2759                                                            (!NILP(accum)
2760                                                             ?
2761                                                             XSTRING_CHAR_LENGTH
2762                                                             (accum)
2763                                                             : 0) +
2764                                                            (literal_end -
2765                                                             literal_start));
2766                                                 Dynarr_add(ul_action_dynarr, c);
2767                                         } else if (c == '\\')
2768                                                 /* So we get just one backslash. */
2769                                                 literal_end = strpos;
2770                                 }
2771                                 if (literal_end >= 0) {
2772                                         Lisp_Object literal_text = Qnil;
2773                                         Lisp_Object substring = Qnil;
2774                                         if (literal_end != literal_start)
2775                                                 literal_text =
2776                                                     Fsubstring(replacement,
2777                                                                make_int
2778                                                                (literal_start),
2779                                                                make_int
2780                                                                (literal_end));
2781                                         if (substart >= 0 && subend != substart)
2782                                                 substring = Fsubstring(string,
2783                                                                        make_int
2784                                                                        (substart),
2785                                                                        make_int
2786                                                                        (subend));
2787                                         if (!NILP(literal_text)
2788                                             || !NILP(substring))
2789                                                 accum =
2790                                                     concat3(accum, literal_text,
2791                                                             substring);
2792                                         literal_start = strpos + 1;
2793                                 }
2794                         }
2795
2796                         if (strpos != literal_start)
2797                                 /* some literal text at end to be inserted */
2798                                 replacement =
2799                                     concat2(accum,
2800                                             Fsubstring(replacement,
2801                                                        make_int(literal_start),
2802                                                        make_int(strpos)));
2803                         else
2804                                 replacement = accum;
2805                 }
2806
2807                 /* replacement can be nil. */
2808                 if (NILP(replacement))
2809                         replacement = build_string("");
2810
2811                 if (case_action == all_caps)
2812                         replacement = Fupcase(replacement, buffer);
2813                 else if (case_action == cap_initial)
2814                         replacement = Fupcase_initials(replacement, buffer);
2815
2816                 /* Now finally, we need to process the \U's, \E's, etc. */
2817                 if (ul_pos_dynarr) {
2818                         int i = 0;
2819                         int cur_action = 'E';
2820                         Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2821                         Charcount strpos;
2822
2823                         for (strpos = 0; strpos < stlen; strpos++) {
2824                                 Emchar curchar =
2825                                     string_char(XSTRING(replacement), strpos);
2826                                 Emchar newchar = -1;
2827                                 if (i < Dynarr_length(ul_pos_dynarr) &&
2828                                     strpos == Dynarr_at(ul_pos_dynarr, i)) {
2829                                         int new_action =
2830                                             Dynarr_at(ul_action_dynarr, i);
2831                                         i++;
2832                                         if (new_action == 'u')
2833                                                 newchar = UPCASE(buf, curchar);
2834                                         else if (new_action == 'l')
2835                                                 newchar =
2836                                                     DOWNCASE(buf, curchar);
2837                                         else
2838                                                 cur_action = new_action;
2839                                 }
2840                                 if (newchar == -1) {
2841                                         if (cur_action == 'U')
2842                                                 newchar = UPCASE(buf, curchar);
2843                                         else if (cur_action == 'L')
2844                                                 newchar =
2845                                                     DOWNCASE(buf, curchar);
2846                                         else
2847                                                 newchar = curchar;
2848                                 }
2849                                 if (newchar != curchar)
2850                                         set_string_char(XSTRING(replacement),
2851                                                         strpos, newchar);
2852                         }
2853                 }
2854
2855                 /* frees the Dynarrs if necessary. */
2856                 unbind_to(speccount, Qnil);
2857                 return concat3(before, replacement, after);
2858         }
2859
2860         mc_count = begin_multiple_change(buf, search_regs.start[sub],
2861                                          search_regs.end[sub]);
2862
2863         /* begin_multiple_change() records an unwind-protect, so we need to
2864            record this value now. */
2865         speccount = specpdl_depth();
2866
2867         /* We insert the replacement text before the old text, and then
2868            delete the original text.  This means that markers at the
2869            beginning or end of the original will float to the corresponding
2870            position in the replacement.  */
2871         BUF_SET_PT(buf, search_regs.start[sub]);
2872         if (!NILP(literal))
2873                 Finsert(1, &replacement);
2874         else {
2875                 Charcount stlen = XSTRING_CHAR_LENGTH(replacement);
2876                 Charcount strpos;
2877                 struct gcpro gcpro1;
2878                 GCPRO1(replacement);
2879                 for (strpos = 0; strpos < stlen; strpos++) {
2880                         /* on the first iteration assert(offset==0),
2881                            exactly complementing BUF_SET_PT() above.
2882                            During the loop, it keeps track of the amount inserted.
2883                          */
2884                         Charcount offset = BUF_PT(buf) - search_regs.start[sub];
2885
2886                         c = string_char(XSTRING(replacement), strpos);
2887                         if (c == '\\' && strpos < stlen - 1) {
2888                                 /* XXX FIXME: replacing just a substring non-literally
2889                                    using backslash refs to the match looks dangerous.  But
2890                                    <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe
2891                                    <duwe@caldera.de> claims Finsert_buffer_substring already
2892                                    handles this correctly.
2893                                  */
2894                                 c = string_char(XSTRING(replacement), ++strpos);
2895                                 if (c == '&')
2896                                         Finsert_buffer_substring
2897                                             (buffer,
2898                                              make_int(search_regs.start[0] +
2899                                                       offset),
2900                                              make_int(search_regs.end[0] +
2901                                                       offset));
2902                                 else if (c >= '1' && c <= '9'
2903                                          && c <= search_regs.num_regs + '0') {
2904                                         if (search_regs.start[c - '0'] >= 1)
2905                                                 Finsert_buffer_substring
2906                                                     (buffer,
2907                                                      make_int(search_regs.
2908                                                               start[c - '0'] +
2909                                                               offset),
2910                                                      make_int(search_regs.
2911                                                               end[c - '0'] +
2912                                                               offset));
2913                                 } else if (c == 'U' || c == 'u' || c == 'L'
2914                                            || c == 'l' || c == 'E') {
2915                                         /* Keep track of all case changes requested, but don't
2916                                            make them now.  Do them later so we override
2917                                            everything else. */
2918                                         if (!ul_pos_dynarr) {
2919                                                 ul_pos_dynarr = Dynarr_new(int);
2920                                                 ul_action_dynarr =
2921                                                     Dynarr_new(int);
2922                                                 record_unwind_protect
2923                                                     (free_created_dynarrs,
2924                                                      Fcons(make_opaque_ptr
2925                                                            (ul_pos_dynarr),
2926                                                            make_opaque_ptr
2927                                                            (ul_action_dynarr)));
2928                                         }
2929                                         Dynarr_add(ul_pos_dynarr, BUF_PT(buf));
2930                                         Dynarr_add(ul_action_dynarr, c);
2931                                 } else
2932                                         buffer_insert_emacs_char(buf, c);
2933                         } else
2934                                 buffer_insert_emacs_char(buf, c);
2935                 }
2936                 UNGCPRO;
2937         }
2938
2939         inslen = BUF_PT(buf) - (search_regs.start[sub]);
2940         buffer_delete_range(buf, search_regs.start[sub] + inslen,
2941                             search_regs.end[sub] + inslen, 0);
2942
2943         if (case_action == all_caps)
2944                 Fupcase_region(make_int(BUF_PT(buf) - inslen),
2945                                make_int(BUF_PT(buf)), buffer);
2946         else if (case_action == cap_initial)
2947                 Fupcase_initials_region(make_int(BUF_PT(buf) - inslen),
2948                                         make_int(BUF_PT(buf)), buffer);
2949
2950         /* Now go through and make all the case changes that were requested
2951            in the replacement string. */
2952         if (ul_pos_dynarr) {
2953                 Bufpos eend = BUF_PT(buf);
2954                 int i = 0;
2955                 int cur_action = 'E';
2956
2957                 for (pos = BUF_PT(buf) - inslen; pos < eend; pos++) {
2958                         Emchar curchar = BUF_FETCH_CHAR(buf, pos);
2959                         Emchar newchar = -1;
2960                         if (i < Dynarr_length(ul_pos_dynarr) &&
2961                             pos == Dynarr_at(ul_pos_dynarr, i)) {
2962                                 int new_action = Dynarr_at(ul_action_dynarr, i);
2963                                 i++;
2964                                 if (new_action == 'u')
2965                                         newchar = UPCASE(buf, curchar);
2966                                 else if (new_action == 'l')
2967                                         newchar = DOWNCASE(buf, curchar);
2968                                 else
2969                                         cur_action = new_action;
2970                         }
2971                         if (newchar == -1) {
2972                                 if (cur_action == 'U')
2973                                         newchar = UPCASE(buf, curchar);
2974                                 else if (cur_action == 'L')
2975                                         newchar = DOWNCASE(buf, curchar);
2976                                 else
2977                                         newchar = curchar;
2978                         }
2979                         if (newchar != curchar)
2980                                 buffer_replace_char(buf, pos, newchar, 0, 0);
2981                 }
2982         }
2983
2984         /* frees the Dynarrs if necessary. */
2985         unbind_to(speccount, Qnil);
2986         end_multiple_change(buf, mc_count);
2987
2988         return Qnil;
2989 }
2990 \f
2991 static Lisp_Object match_limit(Lisp_Object num, int beginningp)
2992 {
2993         /* This function has been Mule-ized. */
2994         int n;
2995
2996         CHECK_INT(num);
2997         n = XINT(num);
2998         if (n < 0 || search_regs.num_regs <= 0)
2999                 args_out_of_range(num, make_int(search_regs.num_regs));
3000         if (n >= search_regs.num_regs || search_regs.start[n] < 0)
3001                 return Qnil;
3002         return make_int(beginningp ? search_regs.start[n] : search_regs.end[n]);
3003 }
3004
3005 DEFUN("match-beginning", Fmatch_beginning, 1, 1, 0,     /*
3006 Return position of start of text matched by last regexp search.
3007 NUM, specifies which parenthesized expression in the last regexp.
3008 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
3009 Zero means the entire text matched by the whole regexp or whole string.
3010 */
3011       (num))
3012 {
3013         return match_limit(num, 1);
3014 }
3015
3016 DEFUN("match-end", Fmatch_end, 1, 1, 0, /*
3017 Return position of end of text matched by last regexp search.
3018 NUM specifies which parenthesized expression in the last regexp.
3019 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
3020 Zero means the entire text matched by the whole regexp or whole string.
3021 */
3022       (num))
3023 {
3024         return match_limit(num, 0);
3025 }
3026
3027 DEFUN("match-data", Fmatch_data, 0, 2, 0,       /*
3028 Return a list containing all info on what the last regexp search matched.
3029 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
3030 All the elements are markers or nil (nil if the Nth pair didn't match)
3031 if the last match was on a buffer; integers or nil if a string was matched.
3032 Use `store-match-data' to reinstate the data in this list.
3033
3034 If INTEGERS (the optional first argument) is non-nil, always use integers
3035 \(rather than markers) to represent buffer positions.
3036 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough
3037 to hold all the values, and if INTEGERS is non-nil, no consing is done.
3038 */
3039       (integers, reuse))
3040 {
3041         /* This function has been Mule-ized. */
3042         Lisp_Object tail, prev;
3043         Lisp_Object *data;
3044         int i;
3045         Charcount len;
3046
3047         if (NILP(last_thing_searched))
3048                 /*error ("match-data called before any match found"); */
3049                 return Qnil;
3050
3051         data = alloca_array(Lisp_Object, 2 * search_regs.num_regs);
3052
3053         len = -1;
3054         for (i = 0; i < search_regs.num_regs; i++) {
3055                 Bufpos start = search_regs.start[i];
3056                 if (start >= 0) {
3057                         if (EQ(last_thing_searched, Qt)
3058                             || !NILP(integers)) {
3059                                 data[2 * i] = make_int(start);
3060                                 data[2 * i + 1] = make_int(search_regs.end[i]);
3061                         } else if (BUFFERP(last_thing_searched)) {
3062                                 data[2 * i] = Fmake_marker();
3063                                 Fset_marker(data[2 * i],
3064                                             make_int(start),
3065                                             last_thing_searched);
3066                                 data[2 * i + 1] = Fmake_marker();
3067                                 Fset_marker(data[2 * i + 1],
3068                                             make_int(search_regs.end[i]),
3069                                             last_thing_searched);
3070                         } else
3071                                 /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
3072                                 abort();
3073
3074                         len = i;
3075                 } else
3076                         data[2 * i] = data[2 * i + 1] = Qnil;
3077         }
3078         if (!CONSP(reuse))
3079                 return Flist(2 * len + 2, data);
3080
3081         /* If REUSE is a list, store as many value elements as will fit
3082            into the elements of REUSE.  */
3083         for (prev = Qnil, i = 0, tail = reuse; CONSP(tail);
3084              i++, tail = XCDR(tail)) {
3085                 if (i < 2 * len + 2)
3086                         XCAR(tail) = data[i];
3087                 else
3088                         XCAR(tail) = Qnil;
3089                 prev = tail;
3090         }
3091
3092         /* If we couldn't fit all value elements into REUSE,
3093            cons up the rest of them and add them to the end of REUSE.  */
3094         if (i < 2 * len + 2)
3095                 XCDR(prev) = Flist(2 * len + 2 - i, data + i);
3096
3097         return reuse;
3098 }
3099
3100 DEFUN("store-match-data", Fstore_match_data, 1, 1, 0,   /*
3101 Set internal data on last search match from elements of LIST.
3102 LIST should have been created by calling `match-data' previously.
3103 */
3104       (list))
3105 {
3106         /* This function has been Mule-ized. */
3107         REGISTER int i;
3108         REGISTER Lisp_Object marker;
3109         int num_regs;
3110         int length;
3111
3112         if (running_asynch_code)
3113                 save_search_regs();
3114
3115         CONCHECK_LIST(list);
3116
3117         /* Unless we find a marker with a buffer in LIST, assume that this
3118            match data came from a string.  */
3119         last_thing_searched = Qt;
3120
3121         /* Allocate registers if they don't already exist.  */
3122         length = XINT(Flength(list)) / 2;
3123         num_regs = search_regs.num_regs;
3124
3125         if (length > num_regs) {
3126                 if (search_regs.num_regs == 0) {
3127                         search_regs.start = xnew_atomic_array(regoff_t, length);
3128                         search_regs.end = xnew_atomic_array(regoff_t, length);
3129                 } else {
3130                         XREALLOC_ARRAY(search_regs.start, regoff_t, length);
3131                         XREALLOC_ARRAY(search_regs.end, regoff_t, length);
3132                 }
3133
3134                 search_regs.num_regs = length;
3135         }
3136
3137         for (i = 0; i < num_regs; i++) {
3138                 marker = Fcar(list);
3139                 if (NILP(marker)) {
3140                         search_regs.start[i] = -1;
3141                         list = Fcdr(list);
3142                 } else {
3143                         if (MARKERP(marker)) {
3144                                 if (XMARKER(marker)->buffer == 0)
3145                                         marker = Qzero;
3146                                 else
3147                                         XSETBUFFER(last_thing_searched,
3148                                                    XMARKER(marker)->buffer);
3149                         }
3150
3151                         CHECK_INT_COERCE_MARKER(marker);
3152                         search_regs.start[i] = XINT(marker);
3153                         list = Fcdr(list);
3154
3155                         marker = Fcar(list);
3156                         if (MARKERP(marker) && XMARKER(marker)->buffer == 0)
3157                                 marker = Qzero;
3158
3159                         CHECK_INT_COERCE_MARKER(marker);
3160                         search_regs.end[i] = XINT(marker);
3161                 }
3162                 list = Fcdr(list);
3163         }
3164
3165         return Qnil;
3166 }
3167
3168 /* If non-zero the match data have been saved in saved_search_regs
3169    during the execution of a sentinel or filter. */
3170 static int search_regs_saved;
3171 static struct re_registers saved_search_regs;
3172
3173 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3174    if asynchronous code (filter or sentinel) is running. */
3175 static void save_search_regs(void)
3176 {
3177         if (!search_regs_saved) {
3178                 saved_search_regs.num_regs = search_regs.num_regs;
3179                 saved_search_regs.start = search_regs.start;
3180                 saved_search_regs.end = search_regs.end;
3181                 search_regs.num_regs = 0;
3182                 search_regs.start = 0;
3183                 search_regs.end = 0;
3184
3185                 search_regs_saved = 1;
3186         }
3187 }
3188
3189 /* Called upon exit from filters and sentinels. */
3190 void restore_match_data(void)
3191 {
3192         if (search_regs_saved) {
3193                 if (search_regs.num_regs > 0) {
3194                         xfree(search_regs.start);
3195                         xfree(search_regs.end);
3196                 }
3197                 search_regs.num_regs = saved_search_regs.num_regs;
3198                 search_regs.start = saved_search_regs.start;
3199                 search_regs.end = saved_search_regs.end;
3200
3201                 search_regs_saved = 0;
3202         }
3203 }
3204
3205 /* Quote a string to inactivate reg-expr chars */
3206
3207 DEFUN("regexp-quote", Fregexp_quote, 1, 1, 0,   /*
3208 Return a regexp string which matches exactly STRING and nothing else.
3209 */
3210       (string))
3211 {
3212         REGISTER Bufbyte *in, *out, *end;
3213         REGISTER Bufbyte *temp;
3214
3215         CHECK_STRING(string);
3216
3217         temp = (Bufbyte *) alloca(XSTRING_LENGTH(string) * 2);
3218
3219         /* Now copy the data into the new string, inserting escapes. */
3220
3221         in = XSTRING_DATA(string);
3222         end = in + XSTRING_LENGTH(string);
3223         out = temp;
3224
3225         while (in < end) {
3226                 Emchar c = charptr_emchar(in);
3227
3228                 if (c == '[' || c == ']'
3229                     || c == '*' || c == '.' || c == '\\'
3230                     || c == '?' || c == '+' || c == '^' || c == '$')
3231                         *out++ = '\\';
3232                 out += set_charptr_emchar(out, c);
3233                 INC_CHARPTR(in);
3234         }
3235
3236         return make_string(temp, out - temp);
3237 }
3238
3239 DEFUN("set-word-regexp", Fset_word_regexp, 1, 1, 0,     /*
3240 Set the regexp to be used to match a word in regular-expression searching.
3241 #### Not yet implemented.  Currently does nothing.
3242 #### Do not use this yet.  Its calling interface is likely to change.
3243 */
3244       (regexp))
3245 {
3246         return Qnil;
3247 }
3248 \f
3249 /************************************************************************/
3250 /*                            initialization                            */
3251 /************************************************************************/
3252
3253 void syms_of_search(void)
3254 {
3255
3256         DEFERROR_STANDARD(Qsearch_failed, Qinvalid_operation);
3257         DEFERROR_STANDARD(Qinvalid_regexp, Qsyntax_error);
3258
3259 #ifdef EF_USE_COMPRE
3260         defsymbol(&Qcompre, "compre");
3261         defsymbol(&Qcomprep, "comprep");
3262
3263         DEFSUBR(Fcompile_regexp);
3264         DEFSUBR(Fdefregexp);
3265 #endif
3266
3267         DEFSUBR(Flooking_at);
3268         DEFSUBR(Fposix_looking_at);
3269         DEFSUBR(Fstring_match);
3270         DEFSUBR(Fposix_string_match);
3271         DEFSUBR(Fskip_chars_forward);
3272         DEFSUBR(Fskip_chars_backward);
3273         DEFSUBR(Fskip_syntax_forward);
3274         DEFSUBR(Fskip_syntax_backward);
3275         DEFSUBR(Fsearch_forward);
3276         DEFSUBR(Fsearch_backward);
3277         DEFSUBR(Fword_search_forward);
3278         DEFSUBR(Fword_search_backward);
3279         DEFSUBR(Fre_search_forward);
3280         DEFSUBR(Fre_search_backward);
3281         DEFSUBR(Fposix_search_forward);
3282         DEFSUBR(Fposix_search_backward);
3283         DEFSUBR(Freplace_match);
3284         DEFSUBR(Fmatch_beginning);
3285         DEFSUBR(Fmatch_end);
3286         DEFSUBR(Fmatch_data);
3287         DEFSUBR(Fstore_match_data);
3288         DEFSUBR(Fregexp_quote);
3289         DEFSUBR(Fset_word_regexp);
3290 }
3291
3292 void reinit_vars_of_search(void)
3293 {
3294         int i;
3295
3296         last_thing_searched = Qnil;
3297         staticpro_nodump(&last_thing_searched);
3298
3299         for (i = 0; i < REGEXP_CACHE_SIZE; ++i) {
3300                 searchbufs[i].buf.allocated = 100;
3301                 searchbufs[i].buf.buffer = (unsigned char *)xmalloc_atomic(100);
3302                 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3303                 searchbufs[i].regexp = Qnil;
3304                 staticpro_nodump(&searchbufs[i].regexp);
3305                 searchbufs[i].next =
3306                         (i == REGEXP_CACHE_SIZE - 1 ? 0 : &searchbufs[i + 1]);
3307         }
3308         searchbuf_head = &searchbufs[0];
3309 }
3310
3311 void vars_of_search(void)
3312 {
3313         reinit_vars_of_search();
3314
3315         DEFVAR_LISP("forward-word-regexp", &Vforward_word_regexp        /*
3316 *Regular expression to be used in `forward-word'.
3317 #### Not yet implemented.
3318                                                                          */ );
3319         Vforward_word_regexp = Qnil;
3320
3321         DEFVAR_LISP("backward-word-regexp", &Vbackward_word_regexp      /*
3322 *Regular expression to be used in `backward-word'.
3323 #### Not yet implemented.
3324                                                                          */ );
3325         Vbackward_word_regexp = Qnil;
3326 }
3327
3328 void complex_vars_of_search(void)
3329 {
3330         Vskip_chars_range_table = Fmake_range_table();
3331         staticpro(&Vskip_chars_range_table);
3332 }