Fix if/else scope in yow.c from Rudi
[sxemacs] / src / regex.c
1 /* Extended regular expression matching and search library,
2    version 0.12, extended for XEmacs.
3    (Implements POSIX draft P10003.2/D11.2, except for
4    internationalization features.)
5
6    Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
7    Copyright (C) 1995 Sun Microsystems, Inc.
8    Copyright (C) 1995 Ben Wing.
9
10 This file is part of SXEmacs
11
12 SXEmacs is free software: you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation, either version 3 of the License, or
15 (at your option) any later version.
16
17 SXEmacs is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program.  If not, see <http://www.gnu.org/licenses/>. */
24
25
26 /* Synched up with: FSF 19.29. */
27
28 /* Changes made for XEmacs:
29
30    (1) the REGEX_BEGLINE_CHECK code from the XEmacs v18 regex routines
31        was added.  This causes a huge speedup in font-locking.
32    (2) Rel-alloc is disabled when the MMAP version of rel-alloc is
33        being used, because it's too slow -- all those calls to mmap()
34        add humongous overhead.
35    (3) Lots and lots of changes for Mule.  They are bracketed by
36        `#ifdef MULE' or with comments that have `XEmacs' in them.
37  */
38
39 #ifdef HAVE_CONFIG_H
40 #include <config.h>
41 #endif
42
43 #ifndef REGISTER                /* Rigidly enforced as of 20.3 */
44 #define REGISTER
45 #endif
46
47 #ifndef _GNU_SOURCE
48 #define _GNU_SOURCE 1
49 #endif
50
51 #ifdef emacs
52 /* Converts the pointer to the char to BEG-based offset from the start.  */
53 #define PTR_TO_OFFSET(d) (MATCHING_IN_FIRST_STRING                      \
54                           ? (d) - string1 : (d) - (string2 - size1))
55 #else
56 #define PTR_TO_OFFSET(d) 0
57 #endif
58
59 /* We assume non-Mule if emacs isn't defined. */
60 #ifndef emacs
61 #undef MULE
62 #endif
63
64 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
65 #include <sys/types.h>
66
67 /* This is for other GNU distributions with internationalized messages.  */
68 #if defined (I18N3) && (defined (HAVE_LIBINTL_H) || defined (_LIBC))
69 # include <libintl.h>
70 #else
71 # define gettext(msgid) (msgid)
72 #endif
73
74 /* XEmacs: define this to add in a speedup for patterns anchored at
75    the beginning of a line.  Keep the ifdefs so that it's easier to
76    tell where/why this code has diverged from v19. */
77 #define REGEX_BEGLINE_CHECK
78
79 /* XEmacs: the current mmap-based ralloc handles small blocks very
80    poorly, so we disable it here. */
81
82 #if (defined (REL_ALLOC) && defined (HAVE_MMAP)) || defined(DOUG_LEA_MALLOC)
83 # undef REL_ALLOC
84 #endif
85
86 /* The `emacs' switch turns on certain matching commands
87    that make sense only in Emacs. */
88 #ifdef emacs
89
90 #include "lisp.h"
91 #include "buffer.h"
92 #include "syntax.h"
93
94 #ifdef MULE
95
96 Lisp_Object Vthe_lisp_rangetab;
97
98 void complex_vars_of_regex(void)
99 {
100         Vthe_lisp_rangetab = Fmake_range_table();
101         staticpro(&Vthe_lisp_rangetab);
102 }
103
104 #else  /* not MULE */
105
106 void complex_vars_of_regex(void)
107 {
108 }
109
110 #endif  /* MULE */
111
112 #define RE_TRANSLATE(ch) TRT_TABLE_OF (translate, (Emchar) ch)
113 #define TRANSLATE_P(tr) (!NILP (tr) && CHAR_TABLEP(tr))
114
115 /* just massage that xfree thing */
116 #undef xfree
117 #if defined HAVE_BDWGC && defined EF_USE_BDWGC
118 # define xfree          zfree
119 #else  /* !BDWGC */
120 # define xfree          yfree
121 #endif  /* BDWGC */
122
123 #else  /* !emacs */
124
125 /* If we are not linking with Emacs proper,
126    we can't use the relocating allocator
127    even if config.h says that we can.  */
128 #undef REL_ALLOC
129
130 #if defined (STDC_HEADERS) || defined (_LIBC)
131 # include <stdlib.h>
132 # include <stdbool.h>
133 #else
134 char *malloc();
135 char *realloc();
136 #endif
137
138 /* Types normally included via lisp.h */
139 #include <stddef.h>             /* for ptrdiff_t */
140
141 #ifdef REGEX_MALLOC
142 #ifndef DECLARE_NOTHING
143 #define DECLARE_NOTHING struct nosuchstruct
144 #endif
145 #endif
146
147 typedef int Emchar;
148
149 #define charptr_emchar(str)             ((Emchar) (str)[0])
150
151 #define INC_CHARPTR(p) ((p)++)
152 #define DEC_CHARPTR(p) ((p)--)
153
154 #include <string.h>
155
156 /* Define the syntax stuff for \<, \>, etc.  */
157
158 /* This must be nonzero for the wordchar and notwordchar pattern
159    commands in re_match_2.  */
160 #ifndef Sword
161 #define Sword 1
162 #endif
163
164 #ifdef SYNTAX_TABLE
165
166 extern char *re_syntax_table;
167
168 #else                           /* not SYNTAX_TABLE */
169
170 /* How many characters in the character set.  */
171 #define CHAR_SET_SIZE 256
172
173 static char re_syntax_table[CHAR_SET_SIZE];
174
175 static void init_syntax_once(void)
176 {
177         static int done = 0;
178
179         if (!done) {
180                 const char *word_syntax_chars =
181                     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_";
182
183                 memset(re_syntax_table, 0, sizeof(re_syntax_table));
184
185                 while (*word_syntax_chars)
186                         re_syntax_table[(unsigned int)(*word_syntax_chars++)] =
187                             Sword;
188
189                 done = 1;
190         }
191 }
192
193 #endif                          /* SYNTAX_TABLE */
194
195 #define SYNTAX_UNSAFE(ignored, c) re_syntax_table[c]
196 #undef SYNTAX_FROM_CACHE
197 #define SYNTAX_FROM_CACHE SYNTAX_UNSAFE
198
199 #define RE_TRANSLATE(c) translate[(unsigned char) (c)]
200 #define TRANSLATE_P(tr) tr
201
202 #if defined HAVE_BDWGC && defined EF_USE_BDWGC
203 # if defined HAVE_GC_GC_H
204 #  include "gc/gc.h"
205 # elif defined HAVE_GC_H
206 #  include "gc.h"
207 # endif
208 # define xmalloc        GC_MALLOC
209 # define xmalloc_atomic GC_MALLOC_ATOMIC
210 # define xrealloc       GC_REALLOC
211 # define xfree          GC_FREE
212 # define xstrdup        GC_STRDUP
213 #else  /* !HAVE_BDWGC */
214 # define xmalloc        malloc
215 # define xmalloc_atomic malloc
216 # define xrealloc       realloc
217 # define xfree          free
218 # define xstrdup        strdup
219 #endif  /* HAVE_BDWGC */
220
221 #endif  /* emacs */
222
223
224 /* Under XEmacs, this is needed because we don't define it elsewhere. */
225 #ifdef SWITCH_ENUM_BUG
226 #define SWITCH_ENUM_CAST(x) ((int)(x))
227 #else
228 #define SWITCH_ENUM_CAST(x) (x)
229 #endif
230 \f
231 /* Get the interface, including the syntax bits.  */
232 #include "regex.h"
233
234 /* isalpha etc. are used for the character classes.  */
235 #include <ctype.h>
236
237 /* Jim Meyering writes:
238
239    "... Some ctype macros are valid only for character codes that
240    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
241    using /bin/cc or gcc but without giving an ansi option).  So, all
242    ctype uses should be through macros like ISPRINT...  If
243    STDC_HEADERS is defined, then autoconf has verified that the ctype
244    macros don't need to be guarded with references to isascii. ...
245    Defining isascii to 1 should let any compiler worth its salt
246    eliminate the && through constant folding."  */
247
248 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
249 #define ISASCII_1(c) 1
250 #else
251 #define ISASCII_1(c) isascii(c)
252 #endif
253
254 #ifdef MULE
255 /* The IS*() macros can be passed any character, including an extended
256    one.  We need to make sure there are no crashes, which would occur
257    otherwise due to out-of-bounds array references. */
258 #define ISASCII(c) (((EMACS_UINT) (c)) < 0x100 && ISASCII_1 (c))
259 #else
260 #define ISASCII(c) ISASCII_1 (c)
261 #endif                          /* MULE */
262
263 #ifdef isblank
264 #define ISBLANK(c) (ISASCII (c) && isblank (c))
265 #else
266 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
267 #endif
268 #ifdef isgraph
269 #define ISGRAPH(c) (ISASCII (c) && isgraph (c))
270 #else
271 #define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
272 #endif
273
274 /* stolen from FSF/src/charset.h */
275 #define SINGLE_BYTE_CHAR_P(c)   (((unsigned int)(c) & 0xFF) == (c))
276 /* 1 if C is an ASCII character.  */
277 #define IS_REAL_ASCII(c) ((c) < 128)
278 /* 1 if C is a unibyte character.  */
279 #define ISUNIBYTE(c)    (SINGLE_BYTE_CHAR_P(c))
280
281 #define ISPRINT(c) (ISASCII (c) && isprint (c))
282 #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
283 #define ISALNUM(c) (ISASCII (c) && isalnum (c))
284 #define ISALPHA(c) (ISASCII (c) && isalpha (c))
285 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
286 #define ISLOWER(c) (ISASCII (c) && islower (c))
287 #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
288 #define ISSPACE(c) (ISASCII (c) && isspace (c))
289 #define ISUPPER(c) (ISASCII (c) && isupper (c))
290 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
291
292 #if defined emacs
293 static inline bool
294 ISWORD(int c)
295         __attribute__((always_inline));
296 static inline bool
297 ISWORD(int c)
298 {
299         Lisp_Object ct = regex_emacs_buffer->mirror_syntax_table;
300
301         return (SYNTAX(XCHAR_TABLE(ct), c) == Sword);
302 }
303 #else
304 # define ISWORD(c)      (ISALPHA(c))
305 #endif  /* emacs */
306
307 #ifndef NULL
308 #define NULL (void *)0
309 #endif
310
311 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
312    since ours (we hope) works properly with all combinations of
313    machines, compilers, `char' and `unsigned char' argument types.
314    (Per Bothner suggested the basic approach.)  */
315 #undef SIGN_EXTEND_CHAR
316 #if __STDC__
317 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
318 #else                           /* not __STDC__ */
319 /* As in Harbison and Steele.  */
320 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
321 #endif
322 \f
323 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
324    use `alloca' instead of `malloc'.  This is because using malloc in
325    re_search* or re_match* could cause memory leaks when C-g is used in
326    Emacs; also, malloc is slower and causes storage fragmentation.  On
327    the other hand, malloc is more portable, and easier to debug.
328
329    Because we sometimes use alloca, some routines have to be macros,
330    not functions -- `alloca'-allocated space disappears at the end of the
331    function it is called in.  */
332
333 #ifdef REGEX_MALLOC
334
335 #define REGEX_ALLOCATE  xmalloc
336 #define REGEX_REALLOCATE(source, osize, nsize) xrealloc(source, nsize)
337 #define REGEX_FREE      xfree
338
339 #else  /* !REGEX_MALLOC */
340
341 /* Emacs already defines alloca, sometimes.  */
342 #ifndef alloca
343
344 /* Make alloca work the best possible way.  */
345 #ifdef __GNUC__
346 #define alloca __builtin_alloca
347 #else                           /* not __GNUC__ */
348 #if HAVE_ALLOCA_H
349 #include <alloca.h>
350 #else                           /* not __GNUC__ or HAVE_ALLOCA_H */
351 #ifndef _AIX                    /* Already did AIX, up at the top.  */
352 void *alloca();
353 #endif                          /* not _AIX */
354 #endif                          /* HAVE_ALLOCA_H */
355 #endif                          /* __GNUC__ */
356
357 #endif                          /* not alloca */
358
359 #define REGEX_ALLOCATE alloca
360
361 /* Assumes a `char *destination' variable.  */
362 #define REGEX_REALLOCATE(source, osize, nsize)                          \
363   (destination = (char *) alloca (nsize),                               \
364    memmove (destination, source, osize),                                \
365    destination)
366
367 /* No need to do anything to free, after alloca.  */
368 #define REGEX_FREE(arg) ((void)0)       /* Do nothing!  But inhibit gcc warning.  */
369
370 #endif  /* REGEX_MALLOC */
371
372 /* Define how to allocate the failure stack.  */
373
374 #ifdef REL_ALLOC
375 #error not again
376 #define REGEX_ALLOCATE_STACK(size)                              \
377         r_alloc ((char **) &failure_stack_ptr, (size))
378 #define REGEX_REALLOCATE_STACK(source, osize, nsize)            \
379         r_re_alloc ((char **) &failure_stack_ptr, (nsize))
380 #define REGEX_FREE_STACK(ptr)                                   \
381         r_alloc_free ((void **) &failure_stack_ptr)
382
383 #else  /* !REL_ALLOC */
384
385 #ifdef REGEX_MALLOC
386
387 #define REGEX_ALLOCATE_STACK    xmalloc
388 #define REGEX_REALLOCATE_STACK(source, osize, nsize) xrealloc(source, nsize)
389 #define REGEX_FREE_STACK        xfree
390
391 #else  /* !REGEX_MALLOC */
392
393 #define REGEX_ALLOCATE_STACK alloca
394
395 #define REGEX_REALLOCATE_STACK(source, osize, nsize)                    \
396    REGEX_REALLOCATE (source, osize, nsize)
397 /* No need to explicitly free anything.  */
398 #define REGEX_FREE_STACK(arg)
399
400 #endif  /* REGEX_MALLOC */
401 #endif  /* REL_ALLOC */
402
403 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
404    `string1' or just past its end.  This works if PTR is NULL, which is
405    a good thing.  */
406 #define FIRST_STRING_P(ptr)                                     \
407   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
408
409 /* (Re)Allocate N items of type T using malloc, or fail.  */
410 #define TALLOC(n, t)                            \
411         ((t *)xmalloc((n) * sizeof (t)))
412 #define TALLOC_ATOMIC(n, t)                     \
413         ((t *)xmalloc_atomic((n) * sizeof (t)))
414 #define RETALLOC(addr, n, t)                                    \
415         ((addr) = (t *)xrealloc(addr, (n) * sizeof (t)))
416 #define RETALLOC_IF(addr, n, t)                                 \
417         if (addr) {                                             \
418                 RETALLOC((addr), (n), t);                       \
419         } else                                                  \
420                 (addr) = TALLOC ((n), t)
421 #define REGEX_TALLOC(n, t)                              \
422         ((t *)REGEX_ALLOCATE((n) * sizeof (t)))
423
424 #define BYTEWIDTH 8             /* In bits.  */
425
426 #define STREQ(s1, s2) (strcmp (s1, s2) == 0)
427
428 #undef MAX
429 #undef MIN
430 #define MAX(a, b) ((a) > (b) ? (a) : (b))
431 #define MIN(a, b) ((a) < (b) ? (a) : (b))
432
433 /* Type of source-pattern and string chars.  */
434 typedef const unsigned char re_char;
435
436 typedef char re_bool;
437 #define false 0
438 #define true 1
439 \f
440 /* These are the command codes that appear in compiled regular
441    expressions.  Some opcodes are followed by argument bytes.  A
442    command code can specify any interpretation whatsoever for its
443    arguments.  Zero bytes may appear in the compiled regular expression.  */
444
445 typedef enum {
446         no_op = 0,
447
448         /* Succeed right away--no more backtracking.  */
449         succeed,
450
451         /* Followed by one byte giving n, then by n literal bytes.  */
452         exactn,
453
454         /* Matches any (more or less) character.  */
455         anychar,
456
457         /* Matches any one char belonging to specified set.  First
458            following byte is number of bitmap bytes.  Then come bytes
459            for a bitmap saying which chars are in.  Bits in each byte
460            are ordered low-bit-first.  A character is in the set if its
461            bit is 1.  A character too large to have a bit in the map is
462            automatically not in the set.  */
463         charset,
464
465         /* Same parameters as charset, but match any character that is
466            not one of those specified.  */
467         charset_not,
468
469         /* Start remembering the text that is matched, for storing in a
470            register.  Followed by one byte with the register number, in
471            the range 1 to the pattern buffer's re_ngroups
472            field.  Then followed by one byte with the number of groups
473            inner to this one.  (This last has to be part of the
474            start_memory only because we need it in the on_failure_jump
475            of re_match_2.)  */
476         start_memory,
477
478         /* Stop remembering the text that is matched and store it in a
479            memory register.  Followed by one byte with the register
480            number, in the range 1 to `re_ngroups' in the
481            pattern buffer, and one byte with the number of inner groups,
482            just like `start_memory'.  (We need the number of inner
483            groups here because we don't have any easy way of finding the
484            corresponding start_memory when we're at a stop_memory.)  */
485         stop_memory,
486
487         /* Match a duplicate of something remembered. Followed by one
488            byte containing the register number.  */
489         duplicate,
490
491         /* Fail unless at beginning of line.  */
492         begline,
493
494         /* Fail unless at end of line.  */
495         endline,
496
497         /* Succeeds if at beginning of buffer (if emacs) or at beginning
498            of string to be matched (if not).  */
499         begbuf,
500
501         /* Analogously, for end of buffer/string.  */
502         endbuf,
503
504         /* Followed by two byte relative address to which to jump.  */
505         jump,
506
507         /* Same as jump, but marks the end of an alternative.  */
508         jump_past_alt,
509
510         /* Followed by two-byte relative address of place to resume at
511            in case of failure.  */
512         on_failure_jump,
513
514         /* Like on_failure_jump, but pushes a placeholder instead of the
515            current string position when executed.  */
516         on_failure_keep_string_jump,
517
518         /* Throw away latest failure point and then jump to following
519            two-byte relative address.  */
520         pop_failure_jump,
521
522         /* Change to pop_failure_jump if know won't have to backtrack to
523            match; otherwise change to jump.  This is used to jump
524            back to the beginning of a repeat.  If what follows this jump
525            clearly won't match what the repeat does, such that we can be
526            sure that there is no use backtracking out of repetitions
527            already matched, then we change it to a pop_failure_jump.
528            Followed by two-byte address.  */
529         maybe_pop_jump,
530
531         /* Jump to following two-byte address, and push a dummy failure
532            point. This failure point will be thrown away if an attempt
533            is made to use it for a failure.  A `+' construct makes this
534            before the first repeat.  Also used as an intermediary kind
535            of jump when compiling an alternative.  */
536         dummy_failure_jump,
537
538         /* Push a dummy failure point and continue.  Used at the end of
539            alternatives.  */
540         push_dummy_failure,
541
542         /* Followed by two-byte relative address and two-byte number n.
543            After matching N times, jump to the address upon failure.  */
544         succeed_n,
545
546         /* Followed by two-byte relative address, and two-byte number n.
547            Jump to the address N times, then fail.  */
548         jump_n,
549
550         /* Set the following two-byte relative address to the
551            subsequent two-byte number.  The address *includes* the two
552            bytes of number.  */
553         set_number_at,
554
555         wordchar,               /* Matches any word-constituent character.  */
556         notwordchar,            /* Matches any char that is not a word-constituent.  */
557
558         wordbeg,                /* Succeeds if at word beginning.  */
559         wordend,                /* Succeeds if at word end.  */
560
561         wordbound,              /* Succeeds if at a word boundary.  */
562         notwordbound            /* Succeeds if not at a word boundary.  */
563 #ifdef emacs
564             , before_dot,       /* Succeeds if before point.  */
565         at_dot,                 /* Succeeds if at point.  */
566         after_dot,              /* Succeeds if after point.  */
567
568         /* Matches any character whose syntax is specified.  Followed by
569            a byte which contains a syntax code, e.g., Sword.  */
570         syntaxspec,
571
572         /* Matches any character whose syntax is not that specified.  */
573         notsyntaxspec
574 #endif                          /* emacs */
575 #ifdef MULE
576             /* need extra stuff to be able to properly work with XEmacs/Mule
577                characters (which may take up more than one byte) */
578             , charset_mule,     /* Matches any character belonging to specified set.
579                                    The set is stored in "unified range-table
580                                    format"; see rangetab.c.  Unlike the `charset'
581                                    opcode, this can handle arbitrary characters. */
582
583         charset_mule_not        /* Same parameters as charset_mule, but match any
584                                    character that is not one of those specified.  */
585             /* 97/2/17 jhod: The following two were merged back in from the Mule
586                2.3 code to enable some language specific processing */
587             , categoryspec,     /* Matches entries in the character category tables */
588         notcategoryspec         /* The opposite of the above */
589 #endif                          /* MULE */
590 } re_opcode_t;
591 \f
592 /* Common operations on the compiled pattern.  */
593
594 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
595
596 #define STORE_NUMBER(destination, number)                               \
597   do {                                                                  \
598     (destination)[0] = (number) & 0377;                                 \
599     (destination)[1] = (number) >> 8;                                   \
600   } while (0)
601
602 /* Same as STORE_NUMBER, except increment DESTINATION to
603    the byte after where the number is stored.  Therefore, DESTINATION
604    must be an lvalue.  */
605
606 #define STORE_NUMBER_AND_INCR(destination, number)                      \
607   do {                                                                  \
608     STORE_NUMBER (destination, number);                                 \
609     (destination) += 2;                                                 \
610   } while (0)
611
612 /* Put into DESTINATION a number stored in two contiguous bytes starting
613    at SOURCE.  */
614
615 #define EXTRACT_NUMBER(destination, source)                             \
616   do {                                                                  \
617     (destination) = *(source) & 0377;                                   \
618     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;           \
619   } while (0)
620
621 #ifdef REGEX_DEBUG_FLAG
622 static void extract_number(int *dest, re_char * source)
623 {
624         int temp = SIGN_EXTEND_CHAR(*(source + 1));
625         *dest = *source & 0377;
626         *dest += temp << 8;
627 }
628
629 #ifndef EXTRACT_MACROS          /* To debug the macros.  */
630 #undef EXTRACT_NUMBER
631 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
632 #endif                          /* not EXTRACT_MACROS */
633
634 #endif                          /* REGEX_DEBUG_FLAG */
635
636 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
637    SOURCE must be an lvalue.  */
638
639 #define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
640         do {                                                            \
641                 EXTRACT_NUMBER (destination, source);                   \
642                 (source) += 2;                                          \
643         } while (0)
644
645 #ifdef REGEX_DEBUG_FLAG
646 static void
647 extract_number_and_incr(int *destination, unsigned char **source)
648 {
649         extract_number(destination, *source);
650         *source += 2;
651 }
652
653 #ifndef EXTRACT_MACROS
654 #undef EXTRACT_NUMBER_AND_INCR
655 #define EXTRACT_NUMBER_AND_INCR(dest, src)      \
656         extract_number_and_incr (&dest, &src)
657 #endif                          /* not EXTRACT_MACROS */
658
659 #endif                          /* REGEX_DEBUG_FLAG */
660 \f
661 /* If REGEX_DEBUG_FLAG is defined, Regex prints many voluminous messages about
662    what it is doing (if the variable `debug' is nonzero).  If linked with the
663    main program in `iregex.c', you can enter patterns and strings interactively.
664    And if linked with the main program in `main.c' and the other test files, you
665    can run the already-written tests.  */
666
667 #if defined (REGEX_DEBUG_FLAG)
668
669 /* We use standard I/O for debugging.  */
670 #include <stdio.h>
671
672 #ifndef emacs
673 /* XEmacs provides its own version of assert() */
674 /* It is useful to test things that ``must'' be true when debugging.  */
675 #include <assert.h>
676 #endif
677
678 #if 0
679 /* no point in having this one, since we do not offer any accessor to this */
680 static int debug = 0;
681 #endif
682
683 #define DEBUG_STATEMENT(e)              e
684 #define DEBUG_PRINT1(x)                 printf(x)
685 #define DEBUG_PRINT2(x1, x2)            printf(x1, x2)
686 #define DEBUG_PRINT3(x1, x2, x3)        printf(x1, x2, x3)
687 #define DEBUG_PRINT4(x1, x2, x3, x4)    printf(x1, x2, x3, x4)
688 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                           \
689         print_partial_compiled_pattern(s, e)
690 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                  \
691         print_double_string (w, s1, sz1, s2, sz2)
692
693 /* Print the fastmap in human-readable form.  */
694
695 static void print_fastmap(char *fastmap)
696 {
697         unsigned was_a_range = 0;
698         unsigned i = 0;
699
700         while (i < (1 << BYTEWIDTH)) {
701                 if (fastmap[i++]) {
702                         was_a_range = 0;
703                         putchar(i - 1);
704                         while (i < (1 << BYTEWIDTH) && fastmap[i]) {
705                                 was_a_range = 1;
706                                 i++;
707                         }
708                         if (was_a_range) {
709                                 putchar('-');
710                                 putchar(i - 1);
711                         }
712                 }
713         }
714         putchar('\n');
715 }
716
717 /* Print a compiled pattern string in human-readable form, starting at
718    the START pointer into it and ending just before the pointer END.  */
719
720 /* can't help it ... the ugly mule code changes `start' upon alignment,
721  * so this can't have the const qualifier ... bugger */
722 static void
723 print_partial_compiled_pattern(unsigned char *start, re_char *end)
724 {
725         int mcnt, mcnt2;
726         unsigned char *p = start;
727         re_char *pend = end;
728
729         if (start == NULL) {
730                 puts("(null)");
731                 return;
732         }
733
734         /* Loop over pattern commands.  */
735         while (p < pend) {
736                 printf("%ld:\t", (long)(p - start));
737
738                 switch ((const re_opcode_t)*p++) {
739                 case no_op:
740                         printf("/no_op");
741                         break;
742
743                 case exactn:
744                         mcnt = *p++;
745                         printf("/exactn/%d", mcnt);
746                         do {
747                                 putchar('/');
748                                 putchar(*p++);
749                         }
750                         while (--mcnt);
751                         break;
752
753                 case start_memory:
754                         mcnt = *p++;
755                         printf("/start_memory/%d/%d", mcnt, *p++);
756                         break;
757
758                 case stop_memory:
759                         mcnt = *p++;
760                         printf("/stop_memory/%d/%d", mcnt, *p++);
761                         break;
762
763                 case duplicate:
764                         printf("/duplicate/%d", *p++);
765                         break;
766
767                 case anychar:
768                         printf("/anychar");
769                         break;
770
771                 case charset:
772                 case charset_not: {
773                         REGISTER int c, last = -100;
774                         REGISTER int in_range = 0;
775
776                         printf("/charset [%s",
777                                (re_opcode_t) * (p - 1) == charset_not
778                                ? "^" : "");
779
780                         assert(p + *p < pend);
781
782                         for (c = 0; c < 256; c++)
783                                 if (((unsigned char)(c / 8) < *p)
784                                     && (p[1 + (c / 8)] &
785                                         (1 << (c % 8)))) {
786                                         /* Are we starting a range?  */
787                                         if (last + 1 == c && !in_range) {
788                                                 putchar('-');
789                                                 in_range = 1;
790                                         }
791                                         /* Have we broken a range?  */
792                                         else if (last + 1 != c
793                                                  && in_range) {
794                                                 putchar(last);
795                                                 in_range = 0;
796                                         }
797
798                                         if (!in_range)
799                                                 putchar(c);
800
801                                         last = c;
802                                 }
803
804                         if (in_range)
805                                 putchar(last);
806
807                         putchar(']');
808
809                         p += 1 + *p;
810                         break;
811                 }
812
813 #ifdef MULE
814                 case charset_mule:
815                 case charset_mule_not: {
816                         int nentries, i;
817
818                         printf("/charset_mule [%s",
819                                (re_opcode_t) * (p - 1) == charset_mule_not
820                                ? "^" : "");
821                         /* u_r_t_nentries() CHANGES its arg, why on earth
822                          * is this marked const here then? :O -hrop */
823                         nentries = unified_range_table_nentries(p);
824                         for (i = 0; i < nentries; i++) {
825                                 EMACS_INT first, last;
826                                 Lisp_Object dummy_val;
827
828                                 /* u_r_t_get_range CHANGES its arg ...
829                                  * p can't be const thence */
830                                 unified_range_table_get_range(p, i,
831                                                               &first,
832                                                               &last,
833                                                               &dummy_val);
834                                 if (first < 0x100)
835                                         putchar(first);
836                                 else
837                                         printf("(0x%lx)", (long)first);
838                                 if (first != last) {
839                                         putchar('-');
840                                         if (last < 0x100)
841                                                 putchar(last);
842                                         else
843                                                 printf("(0x%lx)",
844                                                        (long)last);
845                                 }
846                         }
847                         putchar(']');
848                         p += unified_range_table_bytes_used(p);
849                 }
850                         break;
851 #endif
852
853                 case begline:
854                         printf("/begline");
855                         break;
856
857                 case endline:
858                         printf("/endline");
859                         break;
860
861                 case on_failure_jump:
862                         extract_number_and_incr(&mcnt, &p);
863                         printf("/on_failure_jump to %ld",
864                                (long)(p + mcnt - start));
865                         break;
866
867                 case on_failure_keep_string_jump:
868                         extract_number_and_incr(&mcnt, &p);
869                         printf("/on_failure_keep_string_jump to %ld",
870                                (long)(p + mcnt - start));
871                         break;
872
873                 case dummy_failure_jump:
874                         extract_number_and_incr(&mcnt, &p);
875                         printf("/dummy_failure_jump to %ld",
876                                (long)(p + mcnt - start));
877                         break;
878
879                 case push_dummy_failure:
880                         printf("/push_dummy_failure");
881                         break;
882
883                 case maybe_pop_jump:
884                         extract_number_and_incr(&mcnt, &p);
885                         printf("/maybe_pop_jump to %ld",
886                                (long)(p + mcnt - start));
887                         break;
888
889                 case pop_failure_jump:
890                         extract_number_and_incr(&mcnt, &p);
891                         printf("/pop_failure_jump to %ld",
892                                (long)(p + mcnt - start));
893                         break;
894
895                 case jump_past_alt:
896                         extract_number_and_incr(&mcnt, &p);
897                         printf("/jump_past_alt to %ld",
898                                (long)(p + mcnt - start));
899                         break;
900
901                 case jump:
902                         extract_number_and_incr(&mcnt, &p);
903                         printf("/jump to %ld", (long)(p + mcnt - start));
904                         break;
905
906                 case succeed_n:
907                         extract_number_and_incr(&mcnt, &p);
908                         extract_number_and_incr(&mcnt2, &p);
909                         printf("/succeed_n to %ld, %d times",
910                                (long)(p + mcnt - start), mcnt2);
911                         break;
912
913                 case jump_n:
914                         extract_number_and_incr(&mcnt, &p);
915                         extract_number_and_incr(&mcnt2, &p);
916                         printf("/jump_n to %ld, %d times",
917                                (long)(p + mcnt - start), mcnt2);
918                         break;
919
920                 case set_number_at:
921                         extract_number_and_incr(&mcnt, &p);
922                         extract_number_and_incr(&mcnt2, &p);
923                         printf("/set_number_at location %ld to %d",
924                                (long)(p + mcnt - start), mcnt2);
925                         break;
926
927                 case wordbound:
928                         printf("/wordbound");
929                         break;
930
931                 case notwordbound:
932                         printf("/notwordbound");
933                         break;
934
935                 case wordbeg:
936                         printf("/wordbeg");
937                         break;
938
939                 case wordend:
940                         printf("/wordend");
941
942 #ifdef emacs
943                 case before_dot:
944                         printf("/before_dot");
945                         break;
946
947                 case at_dot:
948                         printf("/at_dot");
949                         break;
950
951                 case after_dot:
952                         printf("/after_dot");
953                         break;
954
955                 case syntaxspec:
956                         printf("/syntaxspec");
957                         mcnt = *p++;
958                         printf("/%d", mcnt);
959                         break;
960
961                 case notsyntaxspec:
962                         printf("/notsyntaxspec");
963                         mcnt = *p++;
964                         printf("/%d", mcnt);
965                         break;
966
967 #ifdef MULE
968 /* 97/2/17 jhod Mule category patch */
969                 case categoryspec:
970                         printf("/categoryspec");
971                         mcnt = *p++;
972                         printf("/%d", mcnt);
973                         break;
974
975                 case notcategoryspec:
976                         printf("/notcategoryspec");
977                         mcnt = *p++;
978                         printf("/%d", mcnt);
979                         break;
980 /* end of category patch */
981 #endif                          /* MULE */
982 #endif                          /* emacs */
983
984                 case wordchar:
985                         printf("/wordchar");
986                         break;
987
988                 case notwordchar:
989                         printf("/notwordchar");
990                         break;
991
992                 case begbuf:
993                         printf("/begbuf");
994                         break;
995
996                 case endbuf:
997                         printf("/endbuf");
998                         break;
999
1000                 case succeed:
1001                 default:
1002                         printf("?%d", *(p - 1));
1003                 }
1004
1005                 putchar('\n');
1006         }
1007
1008         printf("%ld:\tend of pattern.\n", (long)(p - start));
1009 }
1010
1011 static void
1012 print_compiled_pattern(struct re_pattern_buffer *bufp)
1013 {
1014 #if 0
1015         /* cant be const */
1016         re_char *buffer = bufp->buffer;
1017 #else
1018         unsigned char *buffer = bufp->buffer;
1019 #endif
1020
1021         print_partial_compiled_pattern(buffer, buffer + bufp->used);
1022         printf("%ld bytes used/%ld bytes allocated.\n", bufp->used,
1023                bufp->allocated);
1024
1025         if (bufp->fastmap_accurate && bufp->fastmap) {
1026                 printf("fastmap: ");
1027                 print_fastmap(bufp->fastmap);
1028         }
1029
1030         printf("re_nsub: %ld\t", (long)bufp->re_nsub);
1031         printf("re_ngroups: %ld\t", (long)bufp->re_ngroups);
1032         printf("regs_alloc: %d\t", bufp->regs_allocated);
1033         printf("can_be_null: %d\t", bufp->can_be_null);
1034         printf("newline_anchor: %d\n", bufp->newline_anchor);
1035         printf("no_sub: %d\t", bufp->no_sub);
1036         printf("not_bol: %d\t", bufp->not_bol);
1037         printf("not_eol: %d\t", bufp->not_eol);
1038         printf("syntax: %d\n", bufp->syntax);
1039         /* Perhaps we should print the translate table?  */
1040         /* and maybe the category table? */
1041
1042         if (bufp->external_to_internal_register) {
1043                 int i;
1044
1045                 printf("external_to_internal_register:\n");
1046                 for (i = 0; i <= bufp->re_nsub; i++) {
1047                         if (i > 0)
1048                                 printf(", ");
1049                         printf("%d -> %d", i,
1050                                bufp->external_to_internal_register[i]);
1051                 }
1052                 printf("\n");
1053         }
1054 }
1055
1056 static void
1057 print_double_string(re_char * where, re_char * string1, int size1,
1058                     re_char * string2, int size2)
1059 {
1060         if (where == NULL)
1061                 printf("(null)");
1062         else {
1063                 Element_count this_char;
1064
1065                 if (FIRST_STRING_P(where)) {
1066                         for (this_char = where - string1; this_char < size1;
1067                              this_char++)
1068                                 putchar(string1[this_char]);
1069
1070                         where = string2;
1071                 }
1072
1073                 for (this_char = where - string2; this_char < size2;
1074                      this_char++)
1075                         putchar(string2[this_char]);
1076         }
1077 }
1078
1079 #else                           /* not REGEX_DEBUG_FLAG */
1080
1081 #undef assert
1082 #define assert(e)
1083
1084 #define DEBUG_STATEMENT(e)
1085 #define DEBUG_PRINT1(x)
1086 #define DEBUG_PRINT2(x1, x2)
1087 #define DEBUG_PRINT3(x1, x2, x3)
1088 #define DEBUG_PRINT4(x1, x2, x3, x4)
1089 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1090 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1091
1092 #endif                          /* REGEX_DEBUG_FLAG */
1093 \f
1094 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
1095    also be assigned to arbitrarily: each pattern buffer stores its own
1096    syntax, so it can be changed between regex compilations.  */
1097 /* This has no initializer because initialized variables in Emacs
1098    become read-only after dumping.  */
1099 reg_syntax_t re_syntax_options;
1100
1101 /* Specify the precise syntax of regexps for compilation.  This provides
1102    for compatibility for various utilities which historically have
1103    different, incompatible syntaxes.
1104
1105    The argument SYNTAX is a bit mask comprised of the various bits
1106    defined in regex.h.  We return the old syntax.  */
1107
1108 reg_syntax_t re_set_syntax(reg_syntax_t syntax)
1109 {
1110         reg_syntax_t ret = re_syntax_options;
1111
1112         re_syntax_options = syntax;
1113         return ret;
1114 }
1115 \f
1116 /* This table gives an error message for each of the error codes listed
1117    in regex.h.  Obviously the order here has to be same as there.
1118    POSIX doesn't require that we do anything for REG_NOERROR,
1119    but why not be nice?  */
1120
1121 static const char *re_error_msgid[] = {
1122         "Success",              /* REG_NOERROR */
1123         "No match",             /* REG_NOMATCH */
1124         "Invalid regular expression",   /* REG_BADPAT */
1125         "Invalid collation character",  /* REG_ECOLLATE */
1126         "Invalid character class name", /* REG_ECTYPE */
1127         "Trailing backslash",   /* REG_EESCAPE */
1128         "Invalid back reference",       /* REG_ESUBREG */
1129         "Unmatched [ or [^",    /* REG_EBRACK */
1130         "Unmatched ( or \\(",   /* REG_EPAREN */
1131         "Unmatched \\{",        /* REG_EBRACE */
1132         "Invalid content of \\{\\}",    /* REG_BADBR */
1133         "Invalid range end",    /* REG_ERANGE */
1134         "Memory exhausted",     /* REG_ESPACE */
1135         "Invalid preceding regular expression", /* REG_BADRPT */
1136         "Premature end of regular expression",  /* REG_EEND */
1137         "Regular expression too big",   /* REG_ESIZE */
1138         "Unmatched ) or \\)",   /* REG_ERPAREN */
1139 #ifdef emacs
1140         "Invalid syntax designator",    /* REG_ESYNTAX */
1141 #endif
1142 #ifdef MULE
1143         "Ranges may not span charsets", /* REG_ERANGESPAN */
1144         "Invalid category designator",  /* REG_ECATEGORY */
1145 #endif
1146 };
1147 \f
1148 /* Avoiding alloca during matching, to placate r_alloc.  */
1149
1150 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1151    searching and matching functions should not call alloca.  On some
1152    systems, alloca is implemented in terms of malloc, and if we're
1153    using the relocating allocator routines, then malloc could cause a
1154    relocation, which might (if the strings being searched are in the
1155    ralloc heap) shift the data out from underneath the regexp
1156    routines.
1157
1158    Here's another reason to avoid allocation: Emacs
1159    processes input from X in a signal handler; processing X input may
1160    call malloc; if input arrives while a matching routine is calling
1161    malloc, then we're scrod.  But Emacs can't just block input while
1162    calling matching routines; then we don't notice interrupts when
1163    they come in.  So, Emacs blocks input around all regexp calls
1164    except the matching calls, which it leaves unprotected, in the
1165    faith that they will not malloc.  */
1166
1167 /* Normally, this is fine.  */
1168 #define MATCH_MAY_ALLOCATE
1169
1170 /* When using GNU C, we are not REALLY using the C alloca, no matter
1171    what config.h may say.  So don't take precautions for it.  */
1172 #ifdef __GNUC__
1173 #undef C_ALLOCA
1174 #endif
1175
1176 /* The match routines may not allocate if (1) they would do it with malloc
1177    and (2) it's not safe for them to use malloc.
1178    Note that if REL_ALLOC is defined, matching would not use malloc for the
1179    failure stack, but we would still use it for the register vectors;
1180    so REL_ALLOC should not affect this.  */
1181 #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
1182 #undef MATCH_MAY_ALLOCATE
1183 #endif
1184 \f
1185 /* Failure stack declarations and macros; both re_compile_fastmap and
1186    re_match_2 use a failure stack.  These have to be macros because of
1187    REGEX_ALLOCATE_STACK.  */
1188
1189 /* Number of failure points for which to initially allocate space
1190    when matching.  If this number is exceeded, we allocate more
1191    space, so it is not a hard limit.  */
1192 #ifndef INIT_FAILURE_ALLOC
1193 #define INIT_FAILURE_ALLOC 20
1194 #endif
1195
1196 /* Roughly the maximum number of failure points on the stack.  Would be
1197    exactly that if always used MAX_FAILURE_SPACE each time we failed.
1198    This is a variable only so users of regex can assign to it; we never
1199    change it ourselves.  */
1200 #if defined (MATCH_MAY_ALLOCATE) || defined (REGEX_MALLOC)
1201 /* 4400 was enough to cause a crash on Alpha OSF/1,
1202    whose default stack limit is 2mb.  */
1203 int re_max_failures = 50000;
1204 #else
1205 int re_max_failures = 2000;
1206 #endif
1207
1208 union fail_stack_elt {
1209         const unsigned char *pointer;
1210         int integer;
1211 };
1212
1213 typedef union fail_stack_elt fail_stack_elt_t;
1214
1215 typedef struct {
1216         fail_stack_elt_t *stack;
1217         Element_count size;
1218         Element_count avail;    /* Offset of next open position.  */
1219 } fail_stack_type;
1220
1221 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1222 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1223 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1224
1225 /* Define macros to initialize and free the failure stack.
1226    Do `return -2' if the alloc fails.  */
1227
1228 #ifdef MATCH_MAY_ALLOCATE
1229 #define INIT_FAIL_STACK()                                               \
1230         do {                                                            \
1231                 fail_stack.stack = (fail_stack_elt_t *)                 \
1232                         REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC *      \
1233                                               sizeof (fail_stack_elt_t)); \
1234                                                                         \
1235                 if (fail_stack.stack == NULL)                           \
1236                         return -2;                                      \
1237                                                                         \
1238                 fail_stack.size = INIT_FAILURE_ALLOC;                   \
1239                 fail_stack.avail = 0;                                   \
1240         } while (0)
1241
1242 #define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1243 #else
1244 #define INIT_FAIL_STACK()                       \
1245         do {                                    \
1246                 fail_stack.avail = 0;           \
1247         } while (0)
1248
1249 #define RESET_FAIL_STACK()
1250 #endif
1251
1252 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1253
1254    Return 1 if succeeds, and 0 if either ran out of memory
1255    allocating space for it or it was already too large.
1256
1257    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1258
1259 #define DOUBLE_FAIL_STACK(fail_stack)                                   \
1260   ((int) (fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS        \
1261    ? 0                                                                  \
1262    : ((fail_stack).stack = (fail_stack_elt_t *)                         \
1263         REGEX_REALLOCATE_STACK ((fail_stack).stack,                     \
1264           (fail_stack).size * sizeof (fail_stack_elt_t),                \
1265           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),        \
1266                                                                         \
1267       (fail_stack).stack == NULL                                        \
1268       ? 0                                                               \
1269       : ((fail_stack).size <<= 1,                                       \
1270          1)))
1271
1272 /* Push pointer POINTER on FAIL_STACK.
1273    Return 1 if was able to do so and 0 if ran out of memory allocating
1274    space to do so.  */
1275 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)                            \
1276   ((FAIL_STACK_FULL ()                                                  \
1277     && !DOUBLE_FAIL_STACK (FAIL_STACK))                                 \
1278    ? 0                                                                  \
1279    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,       \
1280       1))
1281
1282 /* Push a pointer value onto the failure stack.
1283    Assumes the variable `fail_stack'.  Probably should only
1284    be called from within `PUSH_FAILURE_POINT'.  */
1285 #define PUSH_FAILURE_POINTER(item)                                      \
1286         fail_stack.stack[fail_stack.avail++].pointer =                  \
1287                 (const unsigned char*)(item)
1288
1289 /* This pushes an integer-valued item onto the failure stack.
1290    Assumes the variable `fail_stack'.  Probably should only
1291    be called from within `PUSH_FAILURE_POINT'.  */
1292 #define PUSH_FAILURE_INT(item)                                  \
1293         fail_stack.stack[fail_stack.avail++].integer = (item)
1294
1295 /* Push a fail_stack_elt_t value onto the failure stack.
1296    Assumes the variable `fail_stack'.  Probably should only
1297    be called from within `PUSH_FAILURE_POINT'.  */
1298 #define PUSH_FAILURE_ELT(item)                                  \
1299         fail_stack.stack[fail_stack.avail++] =  (item)
1300
1301 /* These three POP... operations complement the three PUSH... operations.
1302    All assume that `fail_stack' is nonempty.  */
1303 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1304 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1305 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1306
1307 /* Used to omit pushing failure point id's when we're not debugging.  */
1308 #ifdef REGEX_DEBUG_FLAG
1309 #define DEBUG_PUSH PUSH_FAILURE_INT
1310 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1311 #else
1312 #define DEBUG_PUSH(item)
1313 #define DEBUG_POP(item_addr)
1314 #endif
1315
1316 /* Push the information about the state we will need
1317    if we ever fail back to it.
1318
1319    Requires variables fail_stack, regstart, regend, reg_info, and
1320    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
1321    declared.
1322
1323    Does `return FAILURE_CODE' if runs out of memory.  */
1324
1325 #if !defined (REGEX_MALLOC) && !defined (REL_ALLOC)
1326 #define DECLARE_DESTINATION char *destination
1327 #else
1328 #define DECLARE_DESTINATION DECLARE_NOTHING
1329 #endif
1330
1331 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)   \
1332         do {                                                            \
1333                 DECLARE_DESTINATION;                                    \
1334                 /* Must be int, so when we don't save any registers, the\
1335                    arithmetic \ of 0 + -1 isn't done as unsigned.  */   \
1336                 int this_reg;                                           \
1337                                                                         \
1338                 DEBUG_STATEMENT (failure_id++);                         \
1339                 DEBUG_STATEMENT (nfailure_points_pushed++);             \
1340                 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
1341                 DEBUG_PRINT2 ("  Before push, next avail: %lu\n",       \
1342                               (long unsigned int)(fail_stack).avail);   \
1343                 DEBUG_PRINT2 ("                     size: %lu\n",       \
1344                               (long unsigned int)(fail_stack).size);    \
1345                                                                         \
1346                 DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS); \
1347                 DEBUG_PRINT2 ("     available: %ld\n",                  \
1348                               (long) REMAINING_AVAIL_SLOTS);            \
1349                                                                         \
1350                 /* Ensure we have enough space allocated for what       \
1351                  * we will push.  */                                    \
1352                 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) {     \
1353                         if (!DOUBLE_FAIL_STACK (fail_stack))            \
1354                                 return failure_code;                    \
1355                                                                         \
1356                         DEBUG_PRINT2 ("\n  Doubled stack; size now: %lu\n", \
1357                                       (unsigned long) (fail_stack).size); \
1358                         DEBUG_PRINT2 ("  slots available: %ld\n",       \
1359                                       (long) REMAINING_AVAIL_SLOTS);    \
1360                 }                                                       \
1361                                                                         \
1362                 /* Push the info, starting with the registers.  */      \
1363                 DEBUG_PRINT1 ("\n");                                    \
1364                                                                         \
1365                 for (this_reg = lowest_active_reg;                      \
1366                      this_reg <= highest_active_reg;                    \
1367                      this_reg++) {                                      \
1368                         DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg); \
1369                         DEBUG_STATEMENT (num_regs_pushed++);            \
1370                                                                         \
1371                         DEBUG_PRINT2("    start: %p\n",                 \
1372                                      regstart[this_reg]);               \
1373                         PUSH_FAILURE_POINTER(regstart[this_reg]);       \
1374                                                                         \
1375                         DEBUG_PRINT2 ("    end: %p\n",                  \
1376                                       regend[this_reg]);                \
1377                         PUSH_FAILURE_POINTER(regend[this_reg]); \
1378                                                                         \
1379                         DEBUG_PRINT2 ("    info: 0x%p\n      ",         \
1380                                       &reg_info[this_reg]);             \
1381                         DEBUG_PRINT2 (" match_null=%d",                 \
1382                                       REG_MATCH_NULL_STRING_P           \
1383                                       (reg_info[this_reg]));            \
1384                         DEBUG_PRINT2 (" active=%d",                     \
1385                                       IS_ACTIVE (reg_info[this_reg]));  \
1386                         DEBUG_PRINT2 (" matched_something=%d",          \
1387                                       MATCHED_SOMETHING                 \
1388                                       (reg_info[this_reg]));            \
1389                         DEBUG_PRINT2 (" ever_matched_something=%d",     \
1390                                       EVER_MATCHED_SOMETHING            \
1391                                       (reg_info[this_reg]));            \
1392                         DEBUG_PRINT1 ("\n");                            \
1393                         PUSH_FAILURE_ELT(reg_info[this_reg].word);      \
1394                 }                                                       \
1395                                                                         \
1396                 DEBUG_PRINT2 ("  Pushing  low active reg: %d\n",        \
1397                               lowest_active_reg);                       \
1398                 PUSH_FAILURE_INT (lowest_active_reg);                   \
1399                                                                         \
1400                 DEBUG_PRINT2 ("  Pushing high active reg: %d\n",        \
1401                               highest_active_reg);                      \
1402                 PUSH_FAILURE_INT (highest_active_reg);                  \
1403                                                                         \
1404                 DEBUG_PRINT2 ("  Pushing pattern 0x%lx: \n",            \
1405                               (long) pattern_place);                    \
1406                 DEBUG_PRINT_COMPILED_PATTERN(bufp, pattern_place, pend); \
1407                 PUSH_FAILURE_POINTER (pattern_place);                   \
1408                                                                         \
1409                 DEBUG_PRINT2("  Pushing string %p: `",                  \
1410                              string_place);                             \
1411                 DEBUG_PRINT_DOUBLE_STRING(string_place,                 \
1412                                           string1, size1,               \
1413                                           string2, size2);              \
1414                 DEBUG_PRINT1("'\n");                                    \
1415                 PUSH_FAILURE_POINTER (string_place);                    \
1416                                                                         \
1417                 DEBUG_PRINT2("  Pushing failure id: %u\n", failure_id); \
1418                 DEBUG_PUSH(failure_id);                                 \
1419         } while (0)
1420
1421 /* This is the number of items that are pushed and popped on the stack
1422    for each register.  */
1423 #define NUM_REG_ITEMS  3
1424
1425 /* Individual items aside from the registers.  */
1426 #ifdef DEBUG
1427 #define NUM_NONREG_ITEMS 5      /* Includes failure point id.  */
1428 #else
1429 #define NUM_NONREG_ITEMS 4
1430 #endif
1431
1432 /* We push at most this many items on the stack.  */
1433 /* We used to use (num_regs - 1), which is the number of registers
1434    this regexp will save; but that was changed to 5
1435    to avoid stack overflow for a regexp with lots of parens.  */
1436 #define MAX_FAILURE_ITEMS (5 * (NUM_REG_ITEMS + NUM_NONREG_ITEMS))
1437
1438 /* We actually push this many items.  */
1439 #define NUM_FAILURE_ITEMS                                               \
1440   ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS         \
1441     + NUM_NONREG_ITEMS)
1442
1443 /* How many items can still be added to the stack without overflowing it.  */
1444 #define REMAINING_AVAIL_SLOTS ((int) ((fail_stack).size - (fail_stack).avail))
1445
1446 /* Pops what PUSH_FAIL_STACK pushes.
1447
1448    We restore into the parameters, all of which should be lvalues:
1449      STR -- the saved data position.
1450      PAT -- the saved pattern position.
1451      LOW_REG, HIGH_REG -- the highest and lowest active registers.
1452      REGSTART, REGEND -- arrays of string positions.
1453      REG_INFO -- array of information about each subexpression.
1454
1455    Also assumes the variables `fail_stack' and (if debugging), `bufp',
1456    `pend', `string1', `size1', `string2', and `size2'.  */
1457
1458 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg,                  \
1459                           regstart, regend, reg_info)                   \
1460         do {                                                            \
1461                 DEBUG_STATEMENT (fail_stack_elt_t ffailure_id;)         \
1462                         int this_reg;                                   \
1463                 const unsigned char *string_temp;                       \
1464                                                                         \
1465                 assert (!FAIL_STACK_EMPTY ());                          \
1466                                                                         \
1467                 /* Remove failure points and point to                   \
1468                  * how many regs pushed.  */                            \
1469                 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                  \
1470                 DEBUG_PRINT2 ("  Before pop, next avail: %lu\n",        \
1471                               (unsigned long) fail_stack.avail);        \
1472                 DEBUG_PRINT2 ("                    size: %lu\n",        \
1473                               (unsigned long) fail_stack.size);         \
1474                                                                         \
1475                 assert (fail_stack.avail >= NUM_NONREG_ITEMS);          \
1476                                                                         \
1477                 DEBUG_POP (&ffailure_id.integer);                       \
1478                 DEBUG_PRINT2 ("  Popping failure id: %u\n",             \
1479                               * (unsigned int *) &ffailure_id);         \
1480                                                                         \
1481                 /* If the saved string location is NULL, it             \
1482                    came from an on_failure_keep_string_jump opcode,     \
1483                    and we want to throw away the saved NULL, thus       \
1484                    retaining our current position in the string.  */    \
1485                 string_temp = POP_FAILURE_POINTER ();                   \
1486                 if (string_temp != NULL)                                \
1487                         str = string_temp;                              \
1488                                                                         \
1489                 DEBUG_PRINT2("  Popping string %p: `", str);            \
1490                 DEBUG_PRINT_DOUBLE_STRING(str,                          \
1491                                           string1, size1,               \
1492                                           string2, size2);              \
1493                 DEBUG_PRINT1("'\n");                                    \
1494                                                                         \
1495                 pat = (unsigned char *) POP_FAILURE_POINTER();          \
1496                 DEBUG_PRINT2 ("  Popping pattern %p: ", pat);           \
1497                 DEBUG_PRINT_COMPILED_PATTERN(bufp, pat, pend);          \
1498                                                                         \
1499                 /* Restore register info.  */                           \
1500                 high_reg = (unsigned) POP_FAILURE_INT ();               \
1501                 DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg); \
1502                                                                         \
1503                 low_reg = (unsigned) POP_FAILURE_INT ();                \
1504                 DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg); \
1505                                                                         \
1506                 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) { \
1507                         DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg); \
1508                                                                         \
1509                         reg_info[this_reg].word = POP_FAILURE_ELT ();   \
1510                         DEBUG_PRINT2 ("      info: %p\n",               \
1511                                       &reg_info[this_reg]);             \
1512                                                                         \
1513                         regend[this_reg] = POP_FAILURE_POINTER ();      \
1514                         DEBUG_PRINT2 ("      end: %p\n",                \
1515                                       regend[this_reg]);                \
1516                                                                         \
1517                         regstart[this_reg] = POP_FAILURE_POINTER ();    \
1518                         DEBUG_PRINT2 ("      start: %p\n",              \
1519                                       regstart[this_reg]);              \
1520                 }                                                       \
1521                                                                         \
1522                 set_regs_matched_done = 0;                              \
1523                 DEBUG_STATEMENT (nfailure_points_popped++);             \
1524         } while (0)                     /* POP_FAILURE_POINT */
1525 \f
1526 /* Structure for per-register (a.k.a. per-group) information.
1527    Other register information, such as the
1528    starting and ending positions (which are addresses), and the list of
1529    inner groups (which is a bits list) are maintained in separate
1530    variables.
1531
1532    We are making a (strictly speaking) nonportable assumption here: that
1533    the compiler will pack our bit fields into something that fits into
1534    the type of `word', i.e., is something that fits into one item on the
1535    failure stack.  */
1536
1537 typedef union {
1538         fail_stack_elt_t word;
1539         struct {
1540                 /* This field is one if this group can match the empty string,
1541                    zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1542 #define MATCH_NULL_UNSET_VALUE 3
1543                 unsigned match_null_string_p:2;
1544                 unsigned is_active:1;
1545                 unsigned matched_something:1;
1546                 unsigned ever_matched_something:1;
1547         } bits;
1548 } register_info_type;
1549
1550 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1551 #define IS_ACTIVE(R)  ((R).bits.is_active)
1552 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1553 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1554
1555 /* Call this when have matched a real character; it sets `matched' flags
1556    for the subexpressions which we are currently inside.  Also records
1557    that those subexprs have matched.  */
1558 #define SET_REGS_MATCHED()                                              \
1559   do                                                                    \
1560     {                                                                   \
1561       if (!set_regs_matched_done)                                       \
1562         {                                                               \
1563           Element_count r;                                              \
1564           set_regs_matched_done = 1;                                    \
1565           for (r = lowest_active_reg; r <= highest_active_reg; r++)     \
1566             {                                                           \
1567               MATCHED_SOMETHING (reg_info[r])                           \
1568                 = EVER_MATCHED_SOMETHING (reg_info[r])                  \
1569                 = 1;                                                    \
1570             }                                                           \
1571         }                                                               \
1572     }                                                                   \
1573   while (0)
1574
1575 /* Registers are set to a sentinel when they haven't yet matched.  */
1576 static unsigned char reg_unset_dummy;
1577 #define REG_UNSET_VALUE (&reg_unset_dummy)
1578 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1579 \f
1580 /* Subroutine declarations and macros for regex_compile.  */
1581
1582 /* Fetch the next character in the uncompiled pattern---translating it
1583    if necessary.  Also cast from a signed character in the constant
1584    string passed to us by the user to an unsigned char that we can use
1585    as an array index (in, e.g., `translate').  */
1586 #define PATFETCH(c)                                                     \
1587   do {                                                                  \
1588     PATFETCH_RAW (c);                                                   \
1589     c = TRANSLATE (c);                                                  \
1590   } while (0)
1591
1592 /* Fetch the next character in the uncompiled pattern, with no
1593    translation.  */
1594 #define PATFETCH_RAW(c)                                                 \
1595   do {if (p == pend) return REG_EEND;                                   \
1596     assert (p < pend);                                                  \
1597     c = charptr_emchar (p);                                             \
1598     INC_CHARPTR (p);                                                    \
1599   } while (0)
1600
1601 /* Go backwards one character in the pattern.  */
1602 #define PATUNFETCH DEC_CHARPTR (p)
1603
1604 #ifdef MULE
1605
1606 #define PATFETCH_EXTENDED(emch)                                         \
1607   do {if (p == pend) return REG_EEND;                                   \
1608     assert (p < pend);                                                  \
1609     emch = charptr_emchar ((const Bufbyte *) p);                        \
1610     INC_CHARPTR (p);                                                    \
1611     if (TRANSLATE_P (translate) && emch < 0x80)                         \
1612       emch = (Emchar) (unsigned char) RE_TRANSLATE (emch);              \
1613   } while (0)
1614
1615 #define PATFETCH_RAW_EXTENDED(emch)                                     \
1616   do {if (p == pend) return REG_EEND;                                   \
1617     assert (p < pend);                                                  \
1618     emch = charptr_emchar ((const Bufbyte *) p);                        \
1619     INC_CHARPTR (p);                                                    \
1620   } while (0)
1621
1622 #define PATUNFETCH_EXTENDED DEC_CHARPTR (p)
1623
1624 #define PATFETCH_EITHER(emch)                   \
1625   do {                                          \
1626     if (has_extended_chars)                     \
1627       PATFETCH_EXTENDED (emch);                 \
1628     else                                        \
1629       PATFETCH (emch);                          \
1630   } while (0)
1631
1632 #define PATFETCH_RAW_EITHER(emch)               \
1633   do {                                          \
1634     if (has_extended_chars)                     \
1635       PATFETCH_RAW_EXTENDED (emch);             \
1636     else                                        \
1637       PATFETCH_RAW (emch);                      \
1638   } while (0)
1639
1640 #define PATUNFETCH_EITHER                       \
1641   do {                                          \
1642     if (has_extended_chars)                     \
1643       PATUNFETCH_EXTENDED (emch);               \
1644     else                                        \
1645       PATUNFETCH (emch);                        \
1646   } while (0)
1647
1648 #else                           /* not MULE */
1649
1650 #define PATFETCH_EITHER(emch) PATFETCH (emch)
1651 #define PATFETCH_RAW_EITHER(emch) PATFETCH_RAW (emch)
1652 #define PATUNFETCH_EITHER PATUNFETCH
1653
1654 #endif                          /* MULE */
1655
1656 /* If `translate' is non-null, return translate[D], else just D.  We
1657    cast the subscript to translate because some data is declared as
1658    `char *', to avoid warnings when a string constant is passed.  But
1659    when we use a character as a subscript we must make it unsigned.  */
1660 #define TRANSLATE(d) (TRANSLATE_P (translate) ? RE_TRANSLATE (d) : (d))
1661
1662 /* Macros for outputting the compiled pattern into `buffer'.  */
1663
1664 /* If the buffer isn't allocated when it comes in, use this.  */
1665 #define INIT_BUF_SIZE  32
1666
1667 /* Make sure we have at least N more bytes of space in buffer.  */
1668 #define GET_BUFFER_SPACE(n)                                             \
1669     while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated)  \
1670       EXTEND_BUFFER ()
1671
1672 /* Make sure we have one more byte of buffer space and then add C to it.  */
1673 #define BUF_PUSH(c)                                                     \
1674   do {                                                                  \
1675     GET_BUFFER_SPACE (1);                                               \
1676     *buf_end++ = (unsigned char) (c);                                   \
1677   } while (0)
1678
1679 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1680 #define BUF_PUSH_2(c1, c2)                                              \
1681   do {                                                                  \
1682     GET_BUFFER_SPACE (2);                                               \
1683     *buf_end++ = (unsigned char) (c1);                                  \
1684     *buf_end++ = (unsigned char) (c2);                                  \
1685   } while (0)
1686
1687 /* As with BUF_PUSH_2, except for three bytes.  */
1688 #define BUF_PUSH_3(c1, c2, c3)                                          \
1689   do {                                                                  \
1690     GET_BUFFER_SPACE (3);                                               \
1691     *buf_end++ = (unsigned char) (c1);                                  \
1692     *buf_end++ = (unsigned char) (c2);                                  \
1693     *buf_end++ = (unsigned char) (c3);                                  \
1694   } while (0)
1695
1696 /* Store a jump with opcode OP at LOC to location TO.  We store a
1697    relative address offset by the three bytes the jump itself occupies.  */
1698 #define STORE_JUMP(op, loc, to) \
1699   store_op1 (op, loc, (to) - (loc) - 3)
1700
1701 /* Likewise, for a two-argument jump.  */
1702 #define STORE_JUMP2(op, loc, to, arg) \
1703   store_op2 (op, loc, (to) - (loc) - 3, arg)
1704
1705 /* Like `STORE_JUMP', but for inserting.  Assume `buf_end' is the
1706    buffer end.  */
1707 #define INSERT_JUMP(op, loc, to) \
1708   insert_op1 (op, loc, (to) - (loc) - 3, buf_end)
1709
1710 /* Like `STORE_JUMP2', but for inserting.  Assume `buf_end' is the
1711    buffer end.  */
1712 #define INSERT_JUMP2(op, loc, to, arg) \
1713   insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end)
1714
1715 /* This is not an arbitrary limit: the arguments which represent offsets
1716    into the pattern are two bytes long.  So if 2^16 bytes turns out to
1717    be too small, many things would have to change.  */
1718 #define MAX_BUF_SIZE (1L << 16)
1719
1720 /* Extend the buffer by twice its current size via realloc and
1721    reset the pointers that pointed into the old block to point to the
1722    correct places in the new one.  If extending the buffer results in it
1723    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1724 #define EXTEND_BUFFER()                                                 \
1725         do {                                                            \
1726                 re_char *old_buffer = bufp->buffer;                     \
1727                 if (bufp->allocated == MAX_BUF_SIZE)                    \
1728                         return REG_ESIZE;                               \
1729                 bufp->allocated <<= 1;                                  \
1730                 if (bufp->allocated > MAX_BUF_SIZE)                     \
1731                         bufp->allocated = MAX_BUF_SIZE;                 \
1732                 bufp->buffer = (unsigned char *)xrealloc(               \
1733                         bufp->buffer, bufp->allocated);                 \
1734                 if (bufp->buffer == NULL) {                             \
1735                         return REG_ESPACE;                              \
1736                 }                                                       \
1737                 /* If the buffer moved, move all the pointers into it.  */ \
1738                 if (old_buffer != bufp->buffer) {                       \
1739                         buf_end = (buf_end - old_buffer) + bufp->buffer; \
1740                         begalt = (begalt - old_buffer) + bufp->buffer;  \
1741                         if (fixup_alt_jump) {                           \
1742                                 fixup_alt_jump =                        \
1743                                         (fixup_alt_jump - old_buffer) + \
1744                                         bufp->buffer;                   \
1745                         }                                               \
1746                         if (laststart) {                                \
1747                                 laststart = (laststart - old_buffer) +  \
1748                                         bufp->buffer;                   \
1749                         }                                               \
1750                         if (pending_exact) {                            \
1751                                 pending_exact =                         \
1752                                         (pending_exact - old_buffer) +  \
1753                                         bufp->buffer;                   \
1754                         }                                               \
1755                 }                                                       \
1756         } while (0)
1757
1758 /* Since we have one byte reserved for the register number argument to
1759    {start,stop}_memory, the maximum number of groups we can report
1760    things about is what fits in that byte.  */
1761 #define MAX_REGNUM 255
1762
1763 /* But patterns can have more than `MAX_REGNUM' registers.  We just
1764    ignore the excess.  */
1765 typedef unsigned regnum_t;
1766
1767 #define INIT_REG_TRANSLATE_SIZE 5
1768
1769 /* Macros for the compile stack.  */
1770
1771 /* Since offsets can go either forwards or backwards, this type needs to
1772    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1773 typedef int pattern_offset_t;
1774
1775 typedef struct {
1776         pattern_offset_t begalt_offset;
1777         pattern_offset_t fixup_alt_jump;
1778         pattern_offset_t inner_group_offset;
1779         pattern_offset_t laststart_offset;
1780         regnum_t regnum;
1781 } compile_stack_elt_t;
1782
1783 typedef struct {
1784         compile_stack_elt_t *stack;
1785         unsigned size;
1786         unsigned avail;         /* Offset of next open position.  */
1787 } compile_stack_type;
1788
1789 #define INIT_COMPILE_STACK_SIZE 32
1790
1791 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1792 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1793
1794 /* The next available element.  */
1795 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1796
1797 /* Set the bit for character C in a bit vector.  */
1798 #define SET_LIST_BIT(c)                         \
1799   (buf_end[((unsigned char) (c)) / BYTEWIDTH]   \
1800    |= 1 << (((unsigned char) c) % BYTEWIDTH))
1801
1802 #ifdef MULE
1803
1804 /* Set the "bit" for character C in a range table. */
1805 #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt)
1806
1807 /* Set the "bit" for character c in the appropriate table. */
1808 #define SET_EITHER_BIT(c)                       \
1809         do {                                    \
1810                 if (has_extended_chars)         \
1811                         SET_RANGETAB_BIT (c);   \
1812                 else                            \
1813                         SET_LIST_BIT (c);       \
1814         } while (0)
1815
1816 #else                           /* not MULE */
1817
1818 #define SET_EITHER_BIT(c) SET_LIST_BIT (c)
1819
1820 #endif
1821
1822 /* Get the next unsigned number in the uncompiled pattern.  */
1823 #define GET_UNSIGNED_NUMBER(num)                                        \
1824   { if (p != pend)                                                      \
1825      {                                                                  \
1826        PATFETCH (c);                                                    \
1827        while (ISDIGIT (c))                                              \
1828          {                                                              \
1829            if (num < 0)                                                 \
1830               num = 0;                                                  \
1831            num = num * 10 + c - '0';                                    \
1832            if (p == pend)                                               \
1833               break;                                                    \
1834            PATFETCH (c);                                                \
1835          }                                                              \
1836        }                                                                \
1837     }
1838
1839 #define CHAR_CLASS_MAX_LENGTH  9        /* Namely, `multibyte'.  */
1840
1841 \f
1842 static void store_op1(re_opcode_t op, unsigned char *loc, int arg);
1843 static void store_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2);
1844 static void insert_op1(re_opcode_t op, unsigned char *loc, int arg,
1845                        unsigned char *end);
1846 static void insert_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2,
1847                        unsigned char *end);
1848 static re_bool at_begline_loc_p(re_char*, re_char*, reg_syntax_t);
1849 static re_bool at_endline_loc_p(re_char*, re_char*, int syntax);
1850 static re_bool group_in_compile_stack(compile_stack_type compile_stack,
1851                                       regnum_t regnum);
1852 static reg_errcode_t compile_range(re_char ** p_ptr, re_char * pend,
1853                                    RE_TRANSLATE_TYPE translate,
1854                                    reg_syntax_t syntax, unsigned char *b);
1855 #ifdef MULE
1856 static reg_errcode_t compile_extended_range(re_char ** p_ptr,
1857                                             re_char * pend,
1858                                             RE_TRANSLATE_TYPE translate,
1859                                             reg_syntax_t syntax,
1860                                             Lisp_Object rtab);
1861 #endif                          /* MULE */
1862 static re_bool group_match_null_string_p(unsigned char **p,
1863                                          unsigned char *end,
1864                                          register_info_type * reg_info);
1865 static re_bool alt_match_null_string_p(unsigned char *p, unsigned char *end,
1866                                        register_info_type * reg_info);
1867 static re_bool common_op_match_null_string_p(unsigned char **p,
1868                                              unsigned char *end,
1869                                              register_info_type * reg_info);
1870 static int bcmp_translate(const unsigned char *s1, const unsigned char *s2,
1871                           REGISTER int len, RE_TRANSLATE_TYPE translate);
1872 static int re_match_2_internal(struct re_pattern_buffer *bufp,
1873                                re_char * string1, int size1,
1874                                re_char * string2, int size2, int pos,
1875                                struct re_registers *regs, int stop);
1876 \f
1877 #ifndef MATCH_MAY_ALLOCATE
1878
1879 /* If we cannot allocate large objects within re_match_2_internal,
1880    we make the fail stack and register vectors global.
1881    The fail stack, we grow to the maximum size when a regexp
1882    is compiled.
1883    The register vectors, we adjust in size each time we
1884    compile a regexp, according to the number of registers it needs.  */
1885
1886 static fail_stack_type fail_stack;
1887
1888 /* Size with which the following vectors are currently allocated.
1889    That is so we can make them bigger as needed,
1890    but never make them smaller.  */
1891 static int regs_allocated_size;
1892
1893 static re_char **regstart, **regend;
1894 static re_char **old_regstart, **old_regend;
1895 static re_char **best_regstart, **best_regend;
1896 static register_info_type *reg_info;
1897 static re_char **reg_dummy;
1898 static register_info_type *reg_info_dummy;
1899
1900 /* Make the register vectors big enough for NUM_REGS registers,
1901    but don't make them smaller.  */
1902
1903 static void regex_grow_registers(int num_regs)
1904 {
1905         if (num_regs > regs_allocated_size) {
1906                 RETALLOC_IF(regstart, num_regs, re_char *);
1907                 RETALLOC_IF(regend, num_regs, re_char *);
1908                 RETALLOC_IF(old_regstart, num_regs, re_char *);
1909                 RETALLOC_IF(old_regend, num_regs, re_char *);
1910                 RETALLOC_IF(best_regstart, num_regs, re_char *);
1911                 RETALLOC_IF(best_regend, num_regs, re_char *);
1912                 RETALLOC_IF(reg_info, num_regs, register_info_type);
1913                 RETALLOC_IF(reg_dummy, num_regs, re_char *);
1914                 RETALLOC_IF(reg_info_dummy, num_regs, register_info_type);
1915
1916                 regs_allocated_size = num_regs;
1917         }
1918 }
1919
1920 #endif                          /* not MATCH_MAY_ALLOCATE */
1921
1922 /* auxiliary stuff, FSF theft */
1923 /* Character classes.  */
1924 typedef enum {
1925         RECC_ERROR = 0,
1926         RECC_ALNUM, RECC_ALPHA, RECC_WORD,
1927         RECC_GRAPH, RECC_PRINT,
1928         RECC_LOWER, RECC_UPPER,
1929         RECC_PUNCT, RECC_CNTRL,
1930         RECC_DIGIT, RECC_XDIGIT,
1931         RECC_BLANK, RECC_SPACE,
1932         RECC_MULTIBYTE, RECC_NONASCII,
1933         RECC_ASCII, RECC_UNIBYTE
1934 } re_wctype_t;
1935
1936 /* Map a string to the char class it names (if any).  */
1937 static inline re_wctype_t
1938 re_wctype(re_char *str)
1939 {
1940         const char *string = (const char*)str;
1941
1942         if (STREQ (string, "alnum")) {
1943                 return RECC_ALNUM;
1944         } else if (STREQ (string, "alpha")) {
1945                 return RECC_ALPHA;
1946         } else if (STREQ (string, "digit")) {
1947                 return RECC_DIGIT;
1948         } else if (STREQ (string, "graph")) {
1949                 return RECC_GRAPH;
1950         } else if (STREQ (string, "space")) {
1951                 return RECC_SPACE;
1952         } else if (STREQ (string, "upper")) {
1953                 return RECC_UPPER;
1954         } else if (STREQ (string, "lower")) {
1955                 return RECC_LOWER;
1956         } else if (STREQ (string, "word")) {
1957                 return RECC_WORD;
1958         } else if (STREQ (string, "print")) {
1959                 return RECC_PRINT;
1960         } else if (STREQ (string, "punct")) {
1961                 return RECC_PUNCT;
1962         } else if (STREQ (string, "xdigit")) {
1963                 return RECC_XDIGIT;
1964         } else if (STREQ (string, "cntrl")) {
1965                 return RECC_CNTRL;
1966         } else if (STREQ (string, "blank")) {
1967                 return RECC_BLANK;
1968         } else if (STREQ (string, "ascii")) {
1969                 return RECC_ASCII;
1970         } else if (STREQ (string, "nonascii")) {
1971                 return RECC_NONASCII;
1972         } else if (STREQ (string, "unibyte")) {
1973                 return RECC_UNIBYTE;
1974         } else if (STREQ (string, "multibyte")) {
1975                 return RECC_MULTIBYTE;
1976         } else {
1977                 return 0;
1978         }
1979 }
1980
1981 /* True if CH is in the char class CC.  */
1982 static inline bool
1983 re_iswctype(int ch, re_wctype_t cc)
1984 {
1985         switch (cc) {
1986         case RECC_ALNUM:
1987                 return ISALNUM (ch);
1988         case RECC_ALPHA:
1989                 return ISALPHA (ch);
1990         case RECC_BLANK:
1991                 return ISBLANK (ch);
1992         case RECC_CNTRL:
1993                 return ISCNTRL (ch);
1994         case RECC_DIGIT:
1995                 return ISDIGIT (ch);
1996         case RECC_GRAPH:
1997                 return ISGRAPH (ch);
1998         case RECC_LOWER:
1999                 return ISLOWER (ch);
2000         case RECC_PRINT:
2001                 return ISPRINT (ch);
2002         case RECC_PUNCT:
2003                 return ISPUNCT (ch);
2004         case RECC_SPACE:
2005                 return ISSPACE (ch);
2006         case RECC_UPPER:
2007                 return ISUPPER (ch);
2008         case RECC_XDIGIT:
2009                 return ISXDIGIT (ch);
2010         case RECC_ASCII:
2011                 return IS_REAL_ASCII (ch);
2012         case RECC_NONASCII:
2013                 return !IS_REAL_ASCII (ch);
2014         case RECC_WORD:
2015                 return ISWORD(ch);
2016         case RECC_UNIBYTE:
2017                 return ISUNIBYTE((unsigned int)ch);
2018         case RECC_MULTIBYTE:
2019                 return !ISUNIBYTE((unsigned int)ch);
2020         case RECC_ERROR:
2021                 return false;
2022         default:
2023                 abort();
2024         }
2025         return false;
2026 }
2027
2028 \f
2029 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
2030    Returns one of error codes defined in `regex.h', or zero for success.
2031
2032    Assumes the `allocated' (and perhaps `buffer') and `translate'
2033    fields are set in BUFP on entry.
2034
2035    If it succeeds, results are put in BUFP (if it returns an error, the
2036    contents of BUFP are undefined):
2037      `buffer' is the compiled pattern;
2038      `syntax' is set to SYNTAX;
2039      `used' is set to the length of the compiled pattern;
2040      `fastmap_accurate' is zero;
2041      `re_ngroups' is the number of groups/subexpressions (including shy
2042         groups) in PATTERN;
2043      `re_nsub' is the number of non-shy groups in PATTERN;
2044      `not_bol' and `not_eol' are zero;
2045
2046    The `fastmap' and `newline_anchor' fields are neither
2047    examined nor set.  */
2048
2049 static reg_errcode_t
2050 regex_compile(re_char * pattern, int size, reg_syntax_t syntax,
2051               struct re_pattern_buffer *bufp)
2052 {
2053         /* We fetch characters from PATTERN here.  We declare these as int
2054            (or possibly long) so that chars above 127 can be used as
2055            array indices.  The macros that fetch a character from the pattern
2056            make sure to coerce to unsigned char before assigning, so we won't
2057            get bitten by negative numbers here. */
2058         /* XEmacs change: used to be unsigned char. */
2059         REGISTER EMACS_INT c, c1;
2060
2061         /* A random temporary spot in PATTERN.  */
2062         re_char *p1;
2063
2064         /* Points to the end of the buffer, where we should append.  */
2065         REGISTER unsigned char *buf_end;
2066
2067         /* Keeps track of unclosed groups.  */
2068         compile_stack_type compile_stack;
2069
2070         /* Points to the current (ending) position in the pattern.  */
2071         re_char *p = pattern;
2072         re_char *pend = pattern + size;
2073
2074         /* How to translate the characters in the pattern.  */
2075         RE_TRANSLATE_TYPE translate = bufp->translate;
2076
2077         /* Address of the count-byte of the most recently inserted `exactn'
2078            command.  This makes it possible to tell if a new exact-match
2079            character can be added to that command or if the character requires
2080            a new `exactn' command.  */
2081         unsigned char *pending_exact = 0;
2082
2083         /* Address of start of the most recently finished expression.
2084            This tells, e.g., postfix * where to find the start of its
2085            operand.  Reset at the beginning of groups and alternatives.  */
2086         unsigned char *laststart = 0;
2087
2088         /* Address of beginning of regexp, or inside of last group.  */
2089         unsigned char *begalt;
2090
2091         /* Place in the uncompiled pattern (i.e., the {) to
2092            which to go back if the interval is invalid.  */
2093         re_char *beg_interval;
2094
2095         /* Address of the place where a forward jump should go to the end of
2096            the containing expression.  Each alternative of an `or' -- except the
2097            last -- ends with a forward jump of this sort.  */
2098         unsigned char *fixup_alt_jump = 0;
2099
2100         /* Counts open-groups as they are encountered.  Remembered for the
2101            matching close-group on the compile stack, so the same register
2102            number is put in the stop_memory as the start_memory.  */
2103         regnum_t regnum = 0;
2104
2105         /* we need to close over compile_stack */
2106 #       define free_stack(args...)      xfree(compile_stack.stack)
2107
2108 #ifdef REGEX_DEBUG_FLAG
2109         DEBUG_PRINT1("\nCompiling pattern: ");
2110         if (debug) {
2111                 int debug_count;
2112
2113                 for (debug_count = 0; debug_count < size; debug_count++)
2114                         putchar(pattern[debug_count]);
2115                 putchar('\n');
2116         }
2117 #endif                          /* DEBUG */
2118
2119         /* Initialize the compile stack.  */
2120         compile_stack.stack =
2121                 TALLOC(INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2122         if (compile_stack.stack == NULL) {
2123                 return REG_ESPACE;
2124         }
2125         compile_stack.size = INIT_COMPILE_STACK_SIZE;
2126         compile_stack.avail = 0;
2127
2128         /* Initialize the pattern buffer.  */
2129         bufp->syntax = syntax;
2130         bufp->fastmap_accurate = 0;
2131         bufp->not_bol = bufp->not_eol = 0;
2132
2133         /* Set `used' to zero, so that if we return an error, the pattern
2134            printer (for debugging) will think there's no pattern.  We reset it
2135            at the end.  */
2136         bufp->used = 0;
2137
2138         /* Always count groups, whether or not bufp->no_sub is set.  */
2139         bufp->re_nsub = 0;
2140         bufp->re_ngroups = 0;
2141
2142         /* Allocate index translation array if needed. */
2143         if (bufp->external_to_internal_register == 0) {
2144                 bufp->external_to_internal_register_size =
2145                     INIT_REG_TRANSLATE_SIZE;
2146                 RETALLOC(bufp->external_to_internal_register,
2147                          bufp->external_to_internal_register_size, int);
2148         }
2149         /* Initialize translations to impossible value to aid debugging. */
2150         {
2151                 int i;
2152
2153                 bufp->external_to_internal_register[0] = 0;
2154                 for (i = 1; i < bufp->external_to_internal_register_size; i++)
2155                         bufp->external_to_internal_register[i] =
2156                             (int)0xDEADBEEF;
2157         }
2158
2159 #if !defined (emacs) && !defined (SYNTAX_TABLE)
2160         /* Initialize the syntax table.  */
2161         init_syntax_once();
2162 #endif
2163
2164         if (bufp->allocated == 0) {
2165                 if (bufp->buffer) {
2166                         /* If zero allocated, but buffer is non-null, try to
2167                            realloc enough space.  This loses if buffer's address
2168                            is bogus, but that is the user's responsibility.  */
2169                         RETALLOC(bufp->buffer, INIT_BUF_SIZE, unsigned char);
2170                 } else {
2171                         /* Caller did not allocate a buffer.  Do it for them. */
2172                         bufp->buffer = TALLOC(INIT_BUF_SIZE, unsigned char);
2173                 }
2174                 if (!bufp->buffer) {
2175                         free_stack();
2176                         return REG_ESPACE;
2177                 }
2178
2179                 bufp->allocated = INIT_BUF_SIZE;
2180         }
2181
2182         begalt = buf_end = bufp->buffer;
2183
2184         /* Loop through the uncompiled pattern until we're at the end.  */
2185         while (p != pend) {
2186                 PATFETCH(c);
2187
2188                 switch (c) {
2189                 case '^': {
2190                         if (    /* If at start of pattern, it's an operator.  */
2191                                 p == pattern + 1
2192                                 /* If context independent, it's an operator.  */
2193                                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2194                                 /* Otherwise, depends on what's come before.  */
2195                                 || at_begline_loc_p(pattern, p, syntax))
2196                                 BUF_PUSH(begline);
2197                         else
2198                                 goto normal_char;
2199                 }
2200                         break;
2201
2202                 case '$': {
2203                         if (    /* If at end of pattern, it's an
2204                                    operator.  */
2205                                 p == pend
2206                                 /* If context independent, it's an
2207                                    operator.  */
2208                                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2209                                 /* Otherwise, depends on what's
2210                                    next.  */
2211                                 || at_endline_loc_p(p, pend, syntax))
2212                                 BUF_PUSH(endline);
2213                         else
2214                                 goto normal_char;
2215                 }
2216                         break;
2217
2218                 case '+':
2219                 case '?':
2220                         if ((syntax & RE_BK_PLUS_QM) ||
2221                             (syntax & RE_LIMITED_OPS)) {
2222                                 goto normal_char;
2223                         }
2224                         /* pardon? */
2225                 handle_plus:
2226                 case '*': {
2227                         re_bool zero_times_ok;
2228                         re_bool many_times_ok;
2229                         re_bool minimal;
2230
2231                         /* If there is no previous pattern... */
2232                         if (!laststart) {
2233                                 if (syntax & RE_CONTEXT_INVALID_OPS) {
2234                                         free_stack();
2235                                         return REG_BADRPT;
2236                                 } else if (!(syntax & RE_CONTEXT_INDEP_OPS)) {
2237                                         goto normal_char;
2238                                 }
2239                         }
2240
2241                         /* true means zero/many matches are allowed. */
2242                         zero_times_ok = c != '+';
2243                         many_times_ok = c != '?';
2244
2245                         /* true means match shortest string possible. */
2246                         minimal = false;
2247
2248                         /* If there is a sequence of repetition chars, collapse it
2249                            down to just one (the right one).  We can't combine
2250                            interval operators with these because of, e.g., `a{2}*',
2251                            which should only match an even number of `a's.  */
2252                         while (p != pend) {
2253                                 PATFETCH(c);
2254
2255                                 if (c == '*'
2256                                     || (!(syntax & RE_BK_PLUS_QM)
2257                                         && (c == '+' || c == '?'))) ;
2258
2259                                 else if (syntax & RE_BK_PLUS_QM
2260                                          && c == '\\') {
2261                                         if (p == pend) {
2262                                                 free_stack();
2263                                                 return REG_EESCAPE;
2264                                         }
2265
2266                                         PATFETCH(c1);
2267                                         if (!(c1 == '+' || c1 == '?')) {
2268                                                 PATUNFETCH;
2269                                                 PATUNFETCH;
2270                                                 break;
2271                                         }
2272
2273                                         c = c1;
2274                                 } else {
2275                                         PATUNFETCH;
2276                                         break;
2277                                 }
2278
2279                                 /* If we get here, we found another repeat
2280                                    character.  */
2281                                 if (!(syntax & RE_NO_MINIMAL_MATCHING)) {
2282                                         /* "*?" and "+?" and "??" are okay (and
2283                                            mean match minimally), but other
2284                                            sequences (such as "*??" and "+++")
2285                                            are rejected (reserved for future
2286                                            use). */
2287                                         if (minimal || c != '?') {
2288                                                 free_stack();
2289                                                 return REG_BADRPT;
2290                                         }
2291                                         minimal = true;
2292                                 } else {
2293                                         zero_times_ok |= c != '+';
2294                                         many_times_ok |= c != '?';
2295                                 }
2296                         }
2297
2298                         /* Star, etc. applied to an empty pattern is equivalent
2299                            to an empty pattern.  */
2300                         if (!laststart) {
2301                                 break;
2302                         }
2303
2304                         /* Now we know whether zero matches is allowed
2305                            and whether two or more matches is allowed
2306                            and whether we want minimal or maximal matching. */
2307                         if (minimal) {
2308                                 if (!many_times_ok) {
2309                                         /* "a??" becomes:
2310                                            0: /on_failure_jump to 6
2311                                            3: /jump to 9
2312                                            6: /exactn/1/A
2313                                            9: end of pattern.
2314                                         */
2315                                         GET_BUFFER_SPACE(6);
2316                                         INSERT_JUMP(jump, laststart,
2317                                                     buf_end + 3);
2318                                         buf_end += 3;
2319                                         INSERT_JUMP(on_failure_jump,
2320                                                     laststart,
2321                                                     laststart + 6);
2322                                         buf_end += 3;
2323                                 } else if (zero_times_ok) {
2324                                         /* "a*?" becomes:
2325                                            0: /jump to 6
2326                                            3: /exactn/1/A
2327                                            6: /on_failure_jump to 3
2328                                            9: end of pattern.
2329                                         */
2330                                         GET_BUFFER_SPACE(6);
2331                                         INSERT_JUMP(jump, laststart,
2332                                                     buf_end + 3);
2333                                         buf_end += 3;
2334                                         STORE_JUMP(on_failure_jump,
2335                                                    buf_end,
2336                                                    laststart + 3);
2337                                         buf_end += 3;
2338                                 } else {
2339                                         /* "a+?" becomes:
2340                                            0: /exactn/1/A
2341                                            3: /on_failure_jump to 0
2342                                            6: end of pattern.
2343                                         */
2344                                         GET_BUFFER_SPACE(3);
2345                                         STORE_JUMP(on_failure_jump,
2346                                                    buf_end, laststart);
2347                                         buf_end += 3;
2348                                 }
2349                         } else {
2350                                 /* Are we optimizing this jump?  */
2351                                 re_bool keep_string_p = false;
2352
2353                                 if (many_times_ok) {
2354                                         /* More than one repetition is allowed,
2355                                            so put in at the end a backward
2356                                            relative jump from `buf_end' to
2357                                            before the next jump we're going to
2358                                            put in below (which jumps from
2359                                            laststart to after this jump).
2360
2361                                            But if we are at the `*' in the exact
2362                                            sequence `.*\n', insert an
2363                                            unconditional jump backwards to the
2364                                            ., instead of the beginning of the
2365                                            loop.  This way we only push a
2366                                            failure point once, instead of every
2367                                            time through the loop.  */
2368                                         assert(p - 1 > pattern);
2369
2370                                         /* Allocate the space for the jump.  */
2371                                         GET_BUFFER_SPACE(3);
2372
2373                                         /* We know we are not at the first
2374                                            character of the pattern, because
2375                                            laststart was nonzero.  And we've
2376                                            already incremented `p', by the way,
2377                                            to be the character after the `*'.
2378                                            Do we have to do something analogous
2379                                            here for null bytes, because of
2380                                            RE_DOT_NOT_NULL? */
2381                                         if (*(p - 2) == '.' &&
2382                                             zero_times_ok &&
2383                                             p < pend &&
2384                                             *p == '\n' &&
2385                                             !(syntax & RE_DOT_NEWLINE)) {
2386                                                 /* We have .*\n.  */
2387                                                 STORE_JUMP(jump,
2388                                                            buf_end,
2389                                                            laststart);
2390                                                 keep_string_p = true;
2391                                         } else {
2392                                                 /* Anything else.  */
2393                                                 STORE_JUMP
2394                                                         (maybe_pop_jump,
2395                                                          buf_end,
2396                                                          laststart - 3);
2397                                         }
2398                                         /* We've added more stuff to the
2399                                            buffer.  */
2400                                         buf_end += 3;
2401                                 }
2402
2403                                 /* On failure, jump from laststart to buf_end +
2404                                    3, which will be the end of the buffer after
2405                                    this jump is inserted.  */
2406                                 GET_BUFFER_SPACE(3);
2407                                 INSERT_JUMP(keep_string_p ?
2408                                             on_failure_keep_string_jump
2409                                             : on_failure_jump,
2410                                             laststart, buf_end + 3);
2411                                 buf_end += 3;
2412
2413                                 if (!zero_times_ok) {
2414                                         /* At least one repetition is required,
2415                                            so insert a `dummy_failure_jump'
2416                                            before the initial `on_failure_jump'
2417                                            instruction of the loop. This effects
2418                                            a skip over that instruction the
2419                                            first time we hit that loop.  */
2420                                         GET_BUFFER_SPACE(3);
2421                                         INSERT_JUMP(dummy_failure_jump,
2422                                                     laststart,
2423                                                     laststart + 6);
2424                                         buf_end += 3;
2425                                 }
2426                         }
2427                         pending_exact = 0;
2428                 }
2429                         break;
2430
2431                 case '.':
2432                         laststart = buf_end;
2433                         BUF_PUSH(anychar);
2434                         break;
2435
2436                 case '[': {
2437                         /* XEmacs change: this whole section */
2438                         re_bool had_char_class = false;
2439 #ifdef MULE
2440                         re_bool has_extended_chars = false;
2441                         REGISTER Lisp_Object rtab = Qnil;
2442 #endif
2443
2444                         if (p == pend) {
2445                                 free_stack();
2446                                 return REG_EBRACK;
2447                         }
2448
2449                         /* Ensure that we have enough space to push a charset:
2450                            the opcode, the length count, and the bitset; 34
2451                            bytes in all.  */
2452                         GET_BUFFER_SPACE(34);
2453
2454                         laststart = buf_end;
2455
2456                         /* We test `*p == '^' twice, instead of using an if
2457                            statement, so we only need one BUF_PUSH.  */
2458                         BUF_PUSH(*p == '^' ? charset_not : charset);
2459                         if (*p == '^')
2460                                 p++;
2461
2462                         /* Remember the first position in the bracket
2463                            expression.  */
2464                         p1 = p;
2465
2466                         /* Push the number of bytes in the bitmap.  */
2467                         BUF_PUSH((1 << BYTEWIDTH) / BYTEWIDTH);
2468
2469                         /* Clear the whole map.  */
2470                         memset(buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
2471
2472                         /* charset_not matches newline according to a syntax
2473                            bit.  */
2474                         if ((re_opcode_t) buf_end[-2] == charset_not
2475                             && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2476                                 SET_LIST_BIT('\n');
2477
2478 #ifdef MULE
2479                         start_over_with_extended:
2480                         if (has_extended_chars) {
2481                                 /* There are extended chars here, which means we
2482                                    need to start over and shift to unified
2483                                    range-table format. */
2484                                 if (buf_end[-2] == charset)
2485                                         buf_end[-2] = charset_mule;
2486                                 else
2487                                         buf_end[-2] = charset_mule_not;
2488                                 buf_end--;
2489                                 p = p1;
2490                                 /* go back to the beginning of the charset,
2491                                    after a possible ^. */
2492                                 rtab = Vthe_lisp_rangetab;
2493                                 Fclear_range_table(rtab);
2494
2495                                 /* charset_not matches newline according to a
2496                                    syntax bit.  */
2497                                 if ((re_opcode_t) buf_end[-1] ==
2498                                     charset_mule_not
2499                                     && (syntax &
2500                                         RE_HAT_LISTS_NOT_NEWLINE))
2501                                         SET_EITHER_BIT('\n');
2502                         }
2503 #endif                          /* MULE */
2504
2505                                 /* Read in characters and ranges, setting map bits.  */
2506                         for (;;) {
2507                                 if (p == pend) {
2508                                         free_stack();
2509                                         return REG_EBRACK;
2510                                 }
2511                                 PATFETCH(c);
2512
2513 #ifdef MULE
2514                                 if (c >= 0x80 && !has_extended_chars) {
2515                                         has_extended_chars = 1;
2516                                         /* Frumble-bumble, we've found some
2517                                            extended chars.  Need to start over,
2518                                            process everything using the general
2519                                            extended-char mechanism, and need to
2520                                            use charset_mule and charset_mule_not
2521                                            instead of charset and
2522                                            charset_not. */
2523                                         goto start_over_with_extended;
2524                                 }
2525 #endif                          /* MULE */
2526                                 /* \ might escape characters inside [...] and
2527                                    [^...].  */
2528                                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) &&
2529                                     c == '\\') {
2530                                         if (p == pend) {
2531                                                 free_stack();
2532                                                 return REG_EESCAPE;
2533                                         }
2534
2535                                         PATFETCH(c1);
2536 #ifdef MULE
2537                                         if (c1 >= 0x80
2538                                             && !has_extended_chars) {
2539                                                 has_extended_chars = 1;
2540                                                 goto start_over_with_extended;
2541                                         }
2542 #endif                          /* MULE */
2543                                         SET_EITHER_BIT(c1);
2544                                         continue;
2545                                 }
2546
2547                                 /* Could be the end of the bracket
2548                                    expression.  If it's not (i.e., when
2549                                    the bracket expression is `[]' so
2550                                    far), the ']' character bit gets set
2551                                    way below.  */
2552                                 if (c == ']' && p != p1 + 1)
2553                                         break;
2554
2555                                 /* Look ahead to see if it's a range
2556                                    when the last thing was a character
2557                                    class.  */
2558                                 if (had_char_class && c == '-'
2559                                     && *p != ']') {
2560                                         free_stack();
2561                                         return REG_ERANGE;
2562                                 }
2563
2564                                 /* Look ahead to see if it's a range
2565                                    when the last thing was a character:
2566                                    if this is a hyphen not at the
2567                                    beginning or the end of a list, then
2568                                    it's the range operator.  */
2569                                 if (c == '-'
2570                                     && !(p - 2 >= pattern
2571                                          && p[-2] == '[')
2572                                     && !(p - 3 >= pattern
2573                                          && p[-3] == '['
2574                                          && p[-2] == '^')
2575                                     && *p != ']') {
2576                                         reg_errcode_t ret;
2577
2578 #ifdef MULE
2579                                         if (*(const unsigned char *)p >= 0x80
2580                                             && !has_extended_chars) {
2581                                                 has_extended_chars = 1;
2582                                                 goto start_over_with_extended;
2583                                         }
2584                                         if (has_extended_chars)
2585                                                 ret = compile_extended_range
2586                                                         (&p, pend, translate,
2587                                                          syntax, rtab);
2588                                         else
2589 #endif                          /* MULE */
2590                                                 ret = compile_range
2591                                                         (&p, pend, translate,
2592                                                          syntax, buf_end);
2593                                         if (ret != REG_NOERROR) {
2594                                                 free_stack();
2595                                                 return ret;
2596                                         }
2597                                 }
2598
2599                                 else if (p[0] == '-' && p[1] != ']') {
2600                                         /* This handles ranges made up of
2601                                            characters only.  */
2602                                         reg_errcode_t ret;
2603
2604                                         /* Move past the `-'.  */
2605                                         PATFETCH(c1);
2606
2607 #ifdef MULE
2608                                         if (*(const unsigned char*)p >= 0x80
2609                                             && !has_extended_chars) {
2610                                                 has_extended_chars = 1;
2611                                                 goto start_over_with_extended;
2612                                         }
2613                                         if (has_extended_chars)
2614                                                 ret = compile_extended_range(
2615                                                         &p, pend, translate,
2616                                                         syntax, rtab);
2617                                         else
2618 #endif                          /* MULE */
2619                                                 ret = compile_range(
2620                                                         &p, pend, translate,
2621                                                         syntax, buf_end);
2622                                         if (ret != REG_NOERROR) {
2623                                                 free_stack();
2624                                                 return ret;
2625                                         }
2626                                 }
2627
2628                                 /* See if we're at the beginning of a possible
2629                                    character class.  */
2630
2631                                 else if (syntax & RE_CHAR_CLASSES &&
2632                                          c == '[' && *p == ':') {       
2633                                         /* Leave room for the null.  */
2634                                         char str[CHAR_CLASS_MAX_LENGTH + 1];
2635
2636                                         PATFETCH(c);
2637                                         c1 = 0;
2638
2639                                         /* If pattern is `[[:'.  */
2640                                         if (p == pend) {
2641                                                 free_stack();
2642                                                 return REG_EBRACK;
2643                                         }
2644                                         for (;;) {
2645                                                 /* #### This code is unused.
2646                                                    Correctness is not checked
2647                                                    after TRT table change.  */
2648                                                 PATFETCH(c);
2649                                                 if (c == ':' || c == ']'
2650                                                     || p == pend
2651                                                     || c1 ==
2652                                                     CHAR_CLASS_MAX_LENGTH)
2653                                                         break;
2654                                                 str[c1++] = (char)c;
2655                                         }
2656                                         str[c1] = '\0';
2657
2658                                         /* If isn't a word bracketed by
2659                                            `[:' and `:]': undo the
2660                                            ending character, the
2661                                            letters, and leave the
2662                                            leading `:' and `[' (but set
2663                                            bits for them).  */
2664                                         if (c == ':' && *p == ']') {
2665                                                 re_wctype_t wct =
2666                                                         re_wctype(
2667                                                                 (unsigned char*)
2668                                                                 str);
2669
2670                                                 if (wct == RECC_ERROR) {
2671                                                         free_stack();
2672                                                         return REG_ECTYPE;
2673                                                 }
2674
2675                                                 /* Throw away the ] at
2676                                                    the end of the
2677                                                    character class.  */
2678                                                 PATFETCH(c);
2679
2680                                                 if (p == pend) {
2681                                                         free_stack();
2682                                                         return REG_EBRACK;
2683                                                 }
2684
2685                                                 for (int ch = 0;
2686                                                      ch < 1<<BYTEWIDTH;
2687                                                      ch++) {
2688                                                         /* rmsmacs has
2689                                                            this */
2690 #if 0
2691                                                         int translated =
2692                                                                 TRANSLATE(ch);
2693                                                         if (translated <
2694                                                             (1 << BYTEWIDTH) &&
2695                                                             re_iswctype(ch, wct)) {
2696                                                                 SET_EITHER_BIT(ch);
2697                                                         }
2698 #else
2699                                                         if (re_iswctype(ch, wct)) {
2700                                                                 SET_EITHER_BIT(ch);
2701                                                         }
2702 #endif
2703                                                 }
2704                                                 had_char_class = true;
2705                                         } else {
2706                                                 c1++;
2707                                                 while (c1--) {
2708                                                         PATUNFETCH;
2709                                                 }
2710                                                 SET_EITHER_BIT('[');
2711                                                 SET_EITHER_BIT(':');
2712                                                 had_char_class = false;
2713                                         }
2714                                 } else {
2715                                         had_char_class = false;
2716                                         SET_EITHER_BIT(c);
2717                                 }
2718                         }
2719
2720 #ifdef MULE
2721                         if (has_extended_chars) {
2722                                 /* We have a range table, not a bit vector. */
2723                                 int bytes_needed =
2724                                         unified_range_table_bytes_needed
2725                                         (rtab);
2726                                 GET_BUFFER_SPACE(bytes_needed);
2727                                 unified_range_table_copy_data(rtab,
2728                                                               buf_end);
2729                                 buf_end +=
2730                                         unified_range_table_bytes_used
2731                                         (buf_end);
2732                                 break;
2733                         }
2734 #endif                          /* MULE */
2735                                 /* Discard any (non)matching list bytes that are
2736                                    all 0 at the end of the map.  Decrease the
2737                                    map-length byte too.  */
2738                         while ((int)buf_end[-1] > 0
2739                                && buf_end[buf_end[-1] - 1] == 0)
2740                                 buf_end[-1]--;
2741                         buf_end += buf_end[-1];
2742                 }
2743                         break;
2744
2745                 case '(':
2746                         if (syntax & RE_NO_BK_PARENS)
2747                                 goto handle_open;
2748                         else
2749                                 goto normal_char;
2750
2751                 case ')':
2752                         if (syntax & RE_NO_BK_PARENS)
2753                                 goto handle_close;
2754                         else
2755                                 goto normal_char;
2756
2757                 case '\n':
2758                         if (syntax & RE_NEWLINE_ALT)
2759                                 goto handle_alt;
2760                         else
2761                                 goto normal_char;
2762
2763                 case '|':
2764                         if (syntax & RE_NO_BK_VBAR)
2765                                 goto handle_alt;
2766                         else
2767                                 goto normal_char;
2768
2769                 case '{':
2770                         if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2771                                 goto handle_interval;
2772                         else
2773                                 goto normal_char;
2774
2775                 case '\\':
2776                         if (p == pend) {
2777                                 free_stack();
2778                                 return REG_EESCAPE;
2779                         }
2780
2781                         /* Do not translate the character after the \, so that we can
2782                            distinguish, e.g., \B from \b, even if we normally would
2783                            translate, e.g., B to b.  */
2784                         PATFETCH_RAW(c);
2785
2786                         switch (c) {
2787                         case '(':
2788                                 if (syntax & RE_NO_BK_PARENS)
2789                                         goto normal_backslash;
2790
2791                               handle_open:
2792                                 {
2793                                         regnum_t r;
2794                                         int shy = 0;
2795
2796                                         if (!(syntax & RE_NO_SHY_GROUPS)
2797                                             && p != pend && *p == '?') {
2798                                                 p++;
2799                                                 PATFETCH(c);
2800                                                 switch (c) {
2801                                                 case ':':
2802                                                         /* shy groups */
2803                                                         shy = 1;
2804                                                         break;
2805
2806                                                         /* All others are
2807                                                            reserved for future
2808                                                            constructs. */
2809                                                 default:
2810                                                         free_stack();
2811                                                         return REG_BADPAT;
2812                                                 }
2813                                         }
2814
2815                                         r = ++regnum;
2816                                         bufp->re_ngroups++;
2817                                         if (!shy) {
2818                                                 /* Record the translation from capturing group index to
2819                                                    register number, reallocating table as needed. */
2820                                                 bufp->re_nsub++;
2821                                                 while (bufp->
2822                                                        external_to_internal_register_size
2823                                                        <= bufp->re_nsub) {
2824                                                         int i;
2825                                                         int old_size =
2826                                                             bufp->
2827                                                             external_to_internal_register_size;
2828                                                         bufp->
2829                                                             external_to_internal_register_size
2830                                                             += 5;
2831                                                         RETALLOC(bufp->
2832                                                                  external_to_internal_register,
2833                                                                  bufp->
2834                                                                  external_to_internal_register_size,
2835                                                                  int);
2836                                                         /* debugging */
2837                                                         for (i = old_size;
2838                                                              i <
2839                                                              bufp->
2840                                                              external_to_internal_register_size;
2841                                                              i++)
2842                                                                 bufp->
2843                                                                     external_to_internal_register
2844                                                                     [i] =
2845                                                                     (int)
2846                                                                     0xDEADBEEF;
2847                                                 }
2848
2849                                                 bufp->
2850                                                     external_to_internal_register
2851                                                     [bufp->re_nsub] =
2852                                                     bufp->re_ngroups;
2853                                         }
2854
2855                                         if (COMPILE_STACK_FULL) {
2856                                                 RETALLOC(compile_stack.stack,
2857                                                          compile_stack.
2858                                                          size << 1,
2859                                                          compile_stack_elt_t);
2860                                                 if (compile_stack.stack == NULL)
2861                                                         return REG_ESPACE;
2862
2863                                                 compile_stack.size <<= 1;
2864                                         }
2865
2866                                         /* These are the values to restore when
2867                                            we hit end of this group.  They are
2868                                            all relative offsets, so that if the
2869                                            whole pattern moves because of
2870                                            realloc, they will still be
2871                                            valid.  */
2872                                         COMPILE_STACK_TOP.begalt_offset =
2873                                                 begalt - bufp->buffer;
2874                                         COMPILE_STACK_TOP.fixup_alt_jump =
2875                                                 fixup_alt_jump
2876                                                 ? fixup_alt_jump -
2877                                                 bufp->buffer + 1
2878                                                 : 0;
2879                                         COMPILE_STACK_TOP.laststart_offset =
2880                                                 buf_end - bufp->buffer;
2881                                         COMPILE_STACK_TOP.regnum = r;
2882
2883                                         /* We will eventually replace the 0 with
2884                                            the number of groups inner to this
2885                                            one.  But do not push a start_memory
2886                                            for groups beyond the last one we can
2887                                            represent in the compiled pattern.
2888                                            #### bad bad bad.  this will fail in
2889                                            lots of ways, if we ever have to
2890                                            backtrack for these groups.
2891                                          */
2892                                         if (r <= MAX_REGNUM) {
2893                                                 COMPILE_STACK_TOP.
2894                                                         inner_group_offset =
2895                                                         buf_end - bufp->buffer +
2896                                                         2;
2897                                                 BUF_PUSH_3(start_memory, r, 0);
2898                                         }
2899
2900                                         compile_stack.avail++;
2901
2902                                         fixup_alt_jump = 0;
2903                                         laststart = 0;
2904                                         begalt = buf_end;
2905                                         /* If we've reached MAX_REGNUM groups,
2906                                            then this open won't actually
2907                                            generate any code, so we'll have to
2908                                            clear pending_exact explicitly.  */
2909                                         pending_exact = 0;
2910                                 }
2911                                 break;
2912
2913                         case ')':
2914                                 if (syntax & RE_NO_BK_PARENS)
2915                                         goto normal_backslash;
2916
2917                                 if (COMPILE_STACK_EMPTY) {
2918                                         if (syntax &
2919                                             RE_UNMATCHED_RIGHT_PAREN_ORD) {
2920                                                 goto normal_backslash;
2921                                         } else {
2922                                                 free_stack();
2923                                                 return REG_ERPAREN;
2924                                         }
2925                                 }
2926
2927                               handle_close:
2928                                 if (fixup_alt_jump) {
2929                                         /* Push a dummy failure point at the end
2930                                            of the alternative for a possible
2931                                            future `pop_failure_jump' to pop.
2932                                            See comments at `push_dummy_failure'
2933                                            in `re_match_2'. */
2934                                         BUF_PUSH(push_dummy_failure);
2935
2936                                         /* We allocated space for this jump when
2937                                            we assigned to `fixup_alt_jump', in
2938                                            the `handle_alt' case below.  */
2939                                         STORE_JUMP(jump_past_alt,
2940                                                    fixup_alt_jump, buf_end - 1);
2941                                 }
2942
2943                                 /* See similar code for backslashed left paren
2944                                    above.  */
2945                                 if (COMPILE_STACK_EMPTY) {
2946                                         if (syntax &
2947                                             RE_UNMATCHED_RIGHT_PAREN_ORD) {
2948                                                 goto normal_char;
2949                                         } else {
2950                                                 free_stack();
2951                                                 return REG_EPAREN;
2952                                         }
2953                                 }
2954
2955                                 /* Since we just checked for an empty stack
2956                                    above, this ``can't happen''.  */
2957                                 assert(compile_stack.avail != 0);
2958                                 {
2959                                         /* We don't just want to restore into
2960                                            `regnum', because later groups should
2961                                            continue to be numbered higher, as in
2962                                            `(ab)c(de)' -- the second group is
2963                                            #2.  */
2964                                         regnum_t this_group_regnum;
2965
2966                                         compile_stack.avail--;
2967                                         begalt =
2968                                                 bufp->buffer +
2969                                                 COMPILE_STACK_TOP.begalt_offset;
2970                                         fixup_alt_jump =
2971                                                 COMPILE_STACK_TOP.
2972                                                 fixup_alt_jump
2973                                                 ? bufp->buffer +
2974                                                 COMPILE_STACK_TOP.
2975                                                 fixup_alt_jump - 1
2976                                                 : 0;
2977                                         laststart =
2978                                                 bufp->buffer +
2979                                                 COMPILE_STACK_TOP.
2980                                                 laststart_offset;
2981                                         this_group_regnum =
2982                                                 COMPILE_STACK_TOP.regnum;
2983                                         /* If we've reached MAX_REGNUM groups,
2984                                            then this open won't actually
2985                                            generate any code, so we'll have to
2986                                            clear pending_exact explicitly.  */
2987                                         pending_exact = 0;
2988
2989                                         /* We're at the end of the group, so now
2990                                            we know how many groups were inside
2991                                            this one.  */
2992                                         if (this_group_regnum <= MAX_REGNUM) {
2993                                                 unsigned char *inner_group_loc
2994                                                         =
2995                                                         bufp->buffer +
2996                                                         COMPILE_STACK_TOP.
2997                                                         inner_group_offset;
2998
2999                                                 *inner_group_loc =
3000                                                         regnum -
3001                                                         this_group_regnum;
3002                                                 BUF_PUSH_3(stop_memory,
3003                                                            this_group_regnum,
3004                                                            regnum -
3005                                                            this_group_regnum);
3006                                         }
3007                                 }
3008                                 break;
3009
3010                         case '|':       /* `\|'.  */
3011                                 if (syntax & RE_LIMITED_OPS
3012                                     || syntax & RE_NO_BK_VBAR) {
3013                                         goto normal_backslash;
3014                                 }
3015                         handle_alt:
3016                                 if (syntax & RE_LIMITED_OPS) {
3017                                         goto normal_char;
3018                                 }
3019
3020                                 /* Insert before the previous alternative a jump
3021                                    which jumps to this alternative if the former
3022                                    fails.  */
3023                                 GET_BUFFER_SPACE(3);
3024                                 INSERT_JUMP(on_failure_jump, begalt,
3025                                             buf_end + 6);
3026                                 pending_exact = 0;
3027                                 buf_end += 3;
3028
3029                                 /* The alternative before this one has a jump after it
3030                                    which gets executed if it gets matched.  Adjust that
3031                                    jump so it will jump to this alternative's analogous
3032                                    jump (put in below, which in turn will jump to the next
3033                                    (if any) alternative's such jump, etc.).  The last such
3034                                    jump jumps to the correct final destination.  A picture:
3035                                    _____ _____
3036                                    |   | |   |
3037                                    |   v |   v
3038                                    a | b   | c
3039
3040                                    If we are at `b', then fixup_alt_jump right now points to a
3041                                    three-byte space after `a'.  We'll put in the jump, set
3042                                    fixup_alt_jump to right after `b', and leave behind three
3043                                    bytes which we'll fill in when we get to after `c'.  */
3044
3045                                 if (fixup_alt_jump)
3046                                         STORE_JUMP(jump_past_alt,
3047                                                    fixup_alt_jump, buf_end);
3048
3049                                 /* Mark and leave space for a jump after this alternative,
3050                                    to be filled in later either by next alternative or
3051                                    when know we're at the end of a series of alternatives.  */
3052                                 fixup_alt_jump = buf_end;
3053                                 GET_BUFFER_SPACE(3);
3054                                 buf_end += 3;
3055
3056                                 laststart = 0;
3057                                 begalt = buf_end;
3058                                 break;
3059
3060                         case '{':
3061                                 /* If \{ is a literal.  */
3062                                 if (!(syntax & RE_INTERVALS)
3063                                     /* If we're at `\{' and it's not the open-interval
3064                                        operator.  */
3065                                     || ((syntax & RE_INTERVALS)
3066                                         && (syntax & RE_NO_BK_BRACES))
3067                                     || (p - 2 == pattern && p == pend))
3068                                         goto normal_backslash;
3069
3070                               handle_interval:
3071                                 {
3072                                         /* If got here, then the syntax allows
3073                                            intervals.  */
3074
3075                                         /* At least (most) this many matches
3076                                            must be made.  */
3077                                         int lower_bound = -1, upper_bound = -1;
3078
3079                                         beg_interval = p - 1;
3080
3081                                         if (p == pend) {
3082                                                 if (syntax & RE_NO_BK_BRACES) {
3083                                                         goto unfetch_interval;
3084                                                 } else {
3085                                                         free_stack();
3086                                                         return REG_EBRACE;
3087                                                 }
3088                                         }
3089
3090                                         GET_UNSIGNED_NUMBER(lower_bound);
3091
3092                                         if (c == ',') {
3093                                                 GET_UNSIGNED_NUMBER(
3094                                                         upper_bound);
3095                                                 if (upper_bound < 0) {
3096                                                         upper_bound =
3097                                                                 RE_DUP_MAX;
3098                                                 }
3099                                         } else {
3100                                                 /* Interval such as `{1}' =>
3101                                                    match exactly once. */
3102                                                 upper_bound = lower_bound;
3103                                         }
3104
3105                                         if (lower_bound < 0
3106                                             || upper_bound > RE_DUP_MAX
3107                                             || lower_bound > upper_bound) {
3108                                                 if (syntax & RE_NO_BK_BRACES) {
3109                                                         goto unfetch_interval;
3110                                                 } else {
3111                                                         free_stack();
3112                                                         return REG_BADBR;
3113                                                 }
3114                                         }
3115
3116                                         if (!(syntax & RE_NO_BK_BRACES)) {
3117                                                 if (c != '\\') {
3118                                                         free_stack();
3119                                                         return REG_EBRACE;
3120                                                 }
3121                                                 PATFETCH(c);
3122                                         }
3123
3124                                         if (c != '}') {
3125                                                 if (syntax & RE_NO_BK_BRACES) {
3126                                                         goto unfetch_interval;
3127                                                 } else {
3128                                                         free_stack();
3129                                                         return REG_BADBR;
3130                                                 }
3131                                         }
3132
3133                                         /* We just parsed a valid interval.  */
3134
3135                                         /* If it's invalid to have no preceding
3136                                            re.  */
3137                                         if (!laststart) {
3138                                                 if (syntax &
3139                                                     RE_CONTEXT_INVALID_OPS) {
3140                                                         free_stack();
3141                                                         return REG_BADRPT;
3142                                                 } else if (
3143                                                         syntax &
3144                                                         RE_CONTEXT_INDEP_OPS) {
3145                                                         laststart = buf_end;
3146                                                 } else {
3147                                                         goto unfetch_interval;
3148                                                 }
3149                                         }
3150
3151                                         /* If the upper bound is zero, don't
3152                                            want to succeed at all; jump from
3153                                            `laststart' to `b + 3', which will be
3154                                            the end of the buffer after we insert
3155                                            the jump.  */
3156                                         if (upper_bound == 0) {
3157                                                 GET_BUFFER_SPACE(3);
3158                                                 INSERT_JUMP(jump, laststart,
3159                                                             buf_end + 3);
3160                                                 buf_end += 3;
3161                                         }
3162
3163                                         /* Otherwise, we have a nontrivial interval.  When
3164                                            we're all done, the pattern will look like:
3165                                            set_number_at <jump count> <upper bound>
3166                                            set_number_at <succeed_n count> <lower bound>
3167                                            succeed_n <after jump addr> <succeed_n count>
3168                                            <body of loop>
3169                                            jump_n <succeed_n addr> <jump count>
3170                                            (The upper bound and `jump_n' are omitted if
3171                                            `upper_bound' is 1, though.)  */
3172                                         else {  /* If the upper bound is > 1, we need to insert
3173                                                    more at the end of the loop.  */
3174                                                 Memory_count nbytes =
3175                                                     10 + (upper_bound > 1) * 10;
3176
3177                                                 GET_BUFFER_SPACE(nbytes);
3178
3179                                                 /* Initialize lower bound of the `succeed_n', even
3180                                                    though it will be set during matching by its
3181                                                    attendant `set_number_at' (inserted next),
3182                                                    because `re_compile_fastmap' needs to know.
3183                                                    Jump to the `jump_n' we might insert below.  */
3184                                                 INSERT_JUMP2(succeed_n,
3185                                                              laststart,
3186                                                              buf_end + 5 +
3187                                                              (upper_bound >
3188                                                               1) * 5,
3189                                                              lower_bound);
3190                                                 buf_end += 5;
3191
3192                                                 /* Code to initialize the lower bound.  Insert
3193                                                    before the `succeed_n'.  The `5' is the last two
3194                                                    bytes of this `set_number_at', plus 3 bytes of
3195                                                    the following `succeed_n'.  */
3196                                                 insert_op2(set_number_at,
3197                                                            laststart, 5,
3198                                                            lower_bound,
3199                                                            buf_end);
3200                                                 buf_end += 5;
3201
3202                                                 if (upper_bound > 1) {  /* More than one repetition is allowed, so
3203                                                                            append a backward jump to the `succeed_n'
3204                                                                            that starts this interval.
3205
3206                                                                            When we've reached this during matching,
3207                                                                            we'll have matched the interval once, so
3208                                                                            jump back only `upper_bound - 1' times.  */
3209                                                         STORE_JUMP2(jump_n,
3210                                                                     buf_end,
3211                                                                     laststart +
3212                                                                     5,
3213                                                                     upper_bound
3214                                                                     - 1);
3215                                                         buf_end += 5;
3216
3217                                                         /* The location we want to set is the second
3218                                                            parameter of the `jump_n'; that is `b-2' as
3219                                                            an absolute address.  `laststart' will be
3220                                                            the `set_number_at' we're about to insert;
3221                                                            `laststart+3' the number to set, the source
3222                                                            for the relative address.  But we are
3223                                                            inserting into the middle of the pattern --
3224                                                            so everything is getting moved up by 5.
3225                                                            Conclusion: (b - 2) - (laststart + 3) + 5,
3226                                                            i.e., b - laststart.
3227
3228                                                            We insert this at the beginning of the loop
3229                                                            so that if we fail during matching, we'll
3230                                                            reinitialize the bounds.  */
3231                                                         insert_op2
3232                                                             (set_number_at,
3233                                                              laststart,
3234                                                              buf_end -
3235                                                              laststart,
3236                                                              upper_bound - 1,
3237                                                              buf_end);
3238                                                         buf_end += 5;
3239                                                 }
3240                                         }
3241                                         pending_exact = 0;
3242                                         beg_interval = NULL;
3243                                 }
3244                                 break;
3245
3246                               unfetch_interval:
3247                                 /* If an invalid interval, match the characters as literals.  */
3248                                 assert(beg_interval);
3249                                 p = beg_interval;
3250                                 beg_interval = NULL;
3251
3252                                 /* normal_char and normal_backslash need `c'.  */
3253                                 PATFETCH(c);
3254
3255                                 if (!(syntax & RE_NO_BK_BRACES)) {
3256                                         if (p > pattern && p[-1] == '\\')
3257                                                 goto normal_backslash;
3258                                 }
3259                                 goto normal_char;
3260
3261 #ifdef emacs
3262                                 /* There is no way to specify the before_dot and after_dot
3263                                    operators.  rms says this is ok.  --karl  */
3264                         case '=':
3265                                 BUF_PUSH(at_dot);
3266                                 break;
3267
3268                         case 's':
3269                                 laststart = buf_end;
3270                                 PATFETCH(c);
3271                                 /* XEmacs addition */
3272                                 if (c >= 0x80 || syntax_spec_code[c] == 0377) {
3273                                         free_stack();
3274                                         return REG_ESYNTAX;
3275                                 }
3276                                 BUF_PUSH_2(syntaxspec, syntax_spec_code[c]);
3277                                 break;
3278
3279                         case 'S':
3280                                 laststart = buf_end;
3281                                 PATFETCH(c);
3282                                 /* XEmacs addition */
3283                                 if (c >= 0x80 || syntax_spec_code[c] == 0377) {
3284                                         free_stack();
3285                                         return REG_ESYNTAX;
3286                                 }
3287                                 BUF_PUSH_2(notsyntaxspec, syntax_spec_code[c]);
3288                                 break;
3289
3290 #ifdef MULE
3291 /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */
3292                         case 'c':
3293                                 laststart = buf_end;
3294                                 PATFETCH_RAW(c);
3295                                 if (c < 32 || c > 127) {
3296                                         free_stack();
3297                                         return REG_ECATEGORY;
3298                                 }
3299                                 BUF_PUSH_2(categoryspec, c);
3300                                 break;
3301
3302                         case 'C':
3303                                 laststart = buf_end;
3304                                 PATFETCH_RAW(c);
3305                                 if (c < 32 || c > 127) {
3306                                         free_stack();
3307                                         return REG_ECATEGORY;
3308                                 }
3309                                 BUF_PUSH_2(notcategoryspec, c);
3310                                 break;
3311 /* end of category patch */
3312 #endif                          /* MULE */
3313 #endif                          /* emacs */
3314
3315                         case 'w':
3316                                 laststart = buf_end;
3317                                 BUF_PUSH(wordchar);
3318                                 break;
3319
3320                         case 'W':
3321                                 laststart = buf_end;
3322                                 BUF_PUSH(notwordchar);
3323                                 break;
3324
3325                         case '<':
3326                                 BUF_PUSH(wordbeg);
3327                                 break;
3328
3329                         case '>':
3330                                 BUF_PUSH(wordend);
3331                                 break;
3332
3333                         case 'b':
3334                                 BUF_PUSH(wordbound);
3335                                 break;
3336
3337                         case 'B':
3338                                 BUF_PUSH(notwordbound);
3339                                 break;
3340
3341                         case '`':
3342                                 BUF_PUSH(begbuf);
3343                                 break;
3344
3345                         case '\'':
3346                                 BUF_PUSH(endbuf);
3347                                 break;
3348
3349                         case '1':
3350                         case '2':
3351                         case '3':
3352                         case '4':
3353                         case '5':
3354                         case '6':
3355                         case '7':
3356                         case '8':
3357                         case '9': {
3358                                 int reg;
3359
3360                                 if (syntax & RE_NO_BK_REFS) {
3361                                         goto normal_char;
3362                                 }
3363                                 /* External register indexing. */
3364                                 reg = c - '0';
3365
3366                                 if (reg > bufp->re_nsub) {
3367                                         free_stack();
3368                                         return REG_ESUBREG;
3369                                 }
3370
3371                                 /* Convert external to internal as soon
3372                                    as possible. */
3373                                 reg = bufp->external_to_internal_register[reg];
3374  
3375                                 /* Can't back reference to a
3376                                    subexpression if inside it. */
3377                                 if (group_in_compile_stack(
3378                                             compile_stack, reg)) {
3379                                         goto normal_char;
3380                                 }
3381                                 laststart = buf_end;
3382                                 BUF_PUSH_2(duplicate, reg);
3383                         }
3384                                 break;
3385
3386                         case '+':
3387                         case '?':
3388                                 if (syntax & RE_BK_PLUS_QM) {
3389                                         goto handle_plus;
3390                                 } else {
3391                                         goto normal_backslash;
3392                                 }
3393                         default:
3394                         normal_backslash:
3395                                 /* You might think it would be useful for \ to
3396                                    mean not to translate; but if we don't
3397                                    translate it, it will never match
3398                                    anything.  */
3399                                 c = TRANSLATE(c);
3400                                 goto normal_char;
3401                         }
3402                         break;
3403
3404                         /* Expects the character in `c'.  */
3405                         /* `p' points to the location after where `c' came
3406                            from. */
3407                 normal_char:
3408                 default: {
3409                         /* XEmacs: modifications here for Mule. */
3410                         /* `q' points to the beginning of the next char. */
3411                         re_char *q = p;
3412
3413                         /* If no exactn currently being built.  */
3414                         if (!pending_exact
3415                             /* If last exactn not at current position.  */
3416                             || pending_exact + *pending_exact + 1 !=
3417                             buf_end
3418                             /* We have only one byte following the exactn for
3419                                the count. */
3420                             || ((unsigned int)(*pending_exact + (q - p)) >=
3421                                 ((unsigned int)(1 << BYTEWIDTH) - 1))
3422
3423                             /* If followed by a repetition operator.  */
3424                             || *q == '*' || *q == '^'
3425                             || ((syntax & RE_BK_PLUS_QM)
3426                                 ? *q == '\\' && (q[1] == '+'
3427                                                  || q[1] == '?')
3428                                 : (*q == '+' || *q == '?'))
3429                             || ((syntax & RE_INTERVALS)
3430                                 && ((syntax & RE_NO_BK_BRACES)
3431                                     ? *q == '{'
3432                                     : (q[0] == '\\' && q[1] == '{')))) {
3433                                 /* Start building a new exactn.  */
3434
3435                                 laststart = buf_end;
3436
3437                                 BUF_PUSH_2(exactn, 0);
3438                                 pending_exact = buf_end - 1;
3439                         }
3440 #ifndef MULE
3441                         BUF_PUSH(c);
3442                         (*pending_exact)++;
3443 #else
3444                         {
3445                                 Bytecount bt_count;
3446                                 Bufbyte tmp_buf[MAX_EMCHAR_LEN];
3447                                 int i;
3448
3449                                 bt_count =
3450                                         set_charptr_emchar(tmp_buf, c);
3451
3452                                 for (i = 0; i < bt_count; i++) {
3453                                         BUF_PUSH(tmp_buf[i]);
3454                                         (*pending_exact)++;
3455                                 }
3456                         }
3457 #endif
3458                         break;
3459                 }
3460                 }               /* switch (c) */
3461         }                       /* while p != pend */
3462
3463         /* Through the pattern now.  */
3464
3465         if (fixup_alt_jump) {
3466                 STORE_JUMP(jump_past_alt, fixup_alt_jump, buf_end);
3467         }
3468         if (!COMPILE_STACK_EMPTY) {
3469                 free_stack();
3470                 return REG_EPAREN;
3471         }
3472         /* If we don't want backtracking, force success
3473            the first time we reach the end of the compiled pattern.  */
3474         if (syntax & RE_NO_POSIX_BACKTRACKING) {
3475                 BUF_PUSH(succeed);
3476         }
3477         xfree(compile_stack.stack);
3478
3479         /* We have succeeded; set the length of the buffer.  */
3480         bufp->used = buf_end - bufp->buffer;
3481
3482 #ifdef REGEX_DEBUG_FLAG
3483         if (debug) {
3484                 DEBUG_PRINT1("\nCompiled pattern: \n");
3485                 print_compiled_pattern(bufp);
3486         }
3487 #endif                          /* DEBUG */
3488
3489 #ifndef MATCH_MAY_ALLOCATE
3490         /* Initialize the failure stack to the largest possible stack.  This
3491            isn't necessary unless we're trying to avoid calling alloca in
3492            the search and match routines.  */
3493         {
3494                 int num_regs = bufp->re_ngroups + 1;
3495
3496                 /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
3497                    is strictly greater than re_max_failures, the largest possible stack
3498                    is 2 * re_max_failures failure points.  */
3499                 if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) {
3500                         fail_stack.size =
3501                             (2 * re_max_failures * MAX_FAILURE_ITEMS);
3502
3503                         if (!fail_stack.stack) {
3504                                 fail_stack.stack =
3505                                         (fail_stack_elt_t *)
3506                                         xmalloc_atomic(
3507                                                 fail_stack.size *
3508                                                 sizeof(fail_stack_elt_t));
3509                         } else {
3510                                 fail_stack.stack =
3511                                         (fail_stack_elt_t *)
3512                                         xrealloc(fail_stack.stack,
3513                                                  (fail_stack.size *
3514                                                   sizeof(fail_stack_elt_t)));
3515                         }
3516                 }
3517
3518                 regex_grow_registers(num_regs);
3519         }
3520 #endif  /* not MATCH_MAY_ALLOCATE */
3521
3522         return REG_NOERROR;
3523 }
3524 /* regex_compile */
3525 \f
3526 /* Subroutines for `regex_compile'.  */
3527
3528 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
3529
3530 static void store_op1(re_opcode_t op, unsigned char *loc, int arg)
3531 {
3532         *loc = (unsigned char)op;
3533         STORE_NUMBER(loc + 1, arg);
3534 }
3535
3536 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
3537
3538 static void store_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2)
3539 {
3540         *loc = (unsigned char)op;
3541         STORE_NUMBER(loc + 1, arg1);
3542         STORE_NUMBER(loc + 3, arg2);
3543 }
3544
3545 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
3546    for OP followed by two-byte integer parameter ARG.  */
3547
3548 static void
3549 insert_op1(re_opcode_t op, unsigned char *loc, int arg, unsigned char *end)
3550 {
3551         REGISTER unsigned char *pfrom = end;
3552         REGISTER unsigned char *pto = end + 3;
3553
3554         while (pfrom != loc)
3555                 *--pto = *--pfrom;
3556
3557         store_op1(op, loc, arg);
3558 }
3559
3560 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
3561
3562 static void
3563 insert_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2,
3564            unsigned char *end)
3565 {
3566         REGISTER unsigned char *pfrom = end;
3567         REGISTER unsigned char *pto = end + 5;
3568
3569         while (pfrom != loc)
3570                 *--pto = *--pfrom;
3571
3572         store_op2(op, loc, arg1, arg2);
3573 }
3574
3575 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
3576    after an alternative or a begin-subexpression.  We assume there is at
3577    least one character before the ^.  */
3578
3579 static re_bool
3580 at_begline_loc_p(re_char *pattern, re_char *p, reg_syntax_t syntax)
3581 {
3582         re_char *prev = p - 2;
3583         re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3584
3585         return
3586             /* After a subexpression?  */
3587             (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3588             /* After an alternative?  */
3589             || (*prev == '|'
3590                 && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
3591 }
3592
3593 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
3594    at least one character after the $, i.e., `P < PEND'.  */
3595
3596 static re_bool
3597 at_endline_loc_p(re_char *p, re_char *pend, int syntax)
3598 {
3599         re_char *next = p;
3600         re_bool next_backslash = *next == '\\';
3601         re_char *next_next = p + 1 < pend ? p + 1 : 0;
3602
3603         return
3604             /* Before a subexpression?  */
3605             (syntax & RE_NO_BK_PARENS ? *next == ')'
3606              : next_backslash && next_next && *next_next == ')')
3607             /* Before an alternative?  */
3608             || (syntax & RE_NO_BK_VBAR ? *next == '|'
3609                 : next_backslash && next_next && *next_next == '|');
3610 }
3611
3612 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3613    false if it's not.  */
3614
3615 static re_bool
3616 group_in_compile_stack(compile_stack_type compile_stack, regnum_t regnum)
3617 {
3618         int this_element;
3619
3620         for (this_element = compile_stack.avail - 1;
3621              this_element >= 0; this_element--)
3622                 if (compile_stack.stack[this_element].regnum == regnum) {
3623                         return true;
3624                 }
3625         return false;
3626 }
3627
3628 /* Read the ending character of a range (in a bracket expression) from the
3629    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
3630    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
3631    Then we set the translation of all bits between the starting and
3632    ending characters (inclusive) in the compiled pattern B.
3633
3634    Return an error code.
3635
3636    We use these short variable names so we can use the same macros as
3637    `regex_compile' itself.  */
3638
3639 static reg_errcode_t
3640 compile_range(re_char ** p_ptr, re_char * pend, RE_TRANSLATE_TYPE translate,
3641               reg_syntax_t syntax, unsigned char *buf_end)
3642 {
3643         Element_count this_char;
3644
3645         re_char *p = *p_ptr;
3646         int range_start, range_end;
3647
3648         if (p == pend)
3649                 return REG_ERANGE;
3650
3651         /* Even though the pattern is a signed `char *', we need to fetch
3652            with unsigned char *'s; if the high bit of the pattern character
3653            is set, the range endpoints will be negative if we fetch using a
3654            signed char *.
3655
3656            We also want to fetch the endpoints without translating them; the
3657            appropriate translation is done in the bit-setting loop below.  */
3658         /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
3659         range_start = ((const unsigned char *)p)[-2];
3660         range_end = ((const unsigned char *)p)[0];
3661
3662         /* Have to increment the pointer into the pattern string, so the
3663            caller isn't still at the ending character.  */
3664         (*p_ptr)++;
3665
3666         /* If the start is after the end, the range is empty.  */
3667         if (range_start > range_end)
3668                 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3669
3670         /* Here we see why `this_char' has to be larger than an `unsigned
3671            char' -- the range is inclusive, so if `range_end' == 0xff
3672            (assuming 8-bit characters), we would otherwise go into an infinite
3673            loop, since all characters <= 0xff.  */
3674         for (this_char = range_start; this_char <= range_end; this_char++) {
3675                 SET_LIST_BIT(TRANSLATE(this_char));
3676         }
3677
3678         return REG_NOERROR;
3679 }
3680
3681 #ifdef MULE
3682
3683 static reg_errcode_t
3684 compile_extended_range(re_char ** p_ptr, re_char * pend,
3685                        RE_TRANSLATE_TYPE translate,
3686                        reg_syntax_t syntax, Lisp_Object rtab)
3687 {
3688         Emchar this_char, range_start, range_end;
3689         const Bufbyte *p;
3690
3691         if (*p_ptr == pend)
3692                 return REG_ERANGE;
3693
3694         p = (const Bufbyte *)*p_ptr;
3695         range_end = charptr_emchar(p);
3696         p--;                    /* back to '-' */
3697         DEC_CHARPTR(p);         /* back to start of range */
3698         /* We also want to fetch the endpoints without translating them; the
3699            appropriate translation is done in the bit-setting loop below.  */
3700         range_start = charptr_emchar(p);
3701         INC_CHARPTR(*p_ptr);
3702
3703         /* If the start is after the end, the range is empty.  */
3704         if (range_start > range_end)
3705                 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3706
3707         /* Can't have ranges spanning different charsets, except maybe for
3708            ranges entirely within the first 256 chars. */
3709
3710         if ((range_start >= 0x100 || range_end >= 0x100)
3711             && CHAR_LEADING_BYTE(range_start) != CHAR_LEADING_BYTE(range_end))
3712                 return REG_ERANGESPAN;
3713
3714         /* As advertised, translations only work over the 0 - 0x7F range.
3715            Making this kind of stuff work generally is much harder.
3716            Iterating over the whole range like this would be way efficient
3717            if the range encompasses 10,000 chars or something.  You'd have
3718            to do something like this:
3719
3720            range_table a;
3721            range_table b;
3722            map over translation table in [range_start, range_end] of
3723            (put the mapped range in a;
3724            put the translation in b)
3725            invert the range in a and truncate to [range_start, range_end]
3726            compute the union of a, b
3727            union the result into rtab
3728          */
3729         for (this_char = range_start;
3730              this_char <= range_end && this_char < 0x80; this_char++) {
3731                 SET_RANGETAB_BIT(TRANSLATE(this_char));
3732         }
3733
3734         if (this_char <= range_end)
3735                 put_range_table(rtab, this_char, range_end, Qt);
3736
3737         return REG_NOERROR;
3738 }
3739
3740 #endif                          /* MULE */
3741 \f
3742 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3743    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
3744    characters can start a string that matches the pattern.  This fastmap
3745    is used by re_search to skip quickly over impossible starting points.
3746
3747    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3748    area as BUFP->fastmap.
3749
3750    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3751    the pattern buffer.
3752
3753    Returns 0 if we succeed, -2 if an internal error.   */
3754
3755 int re_compile_fastmap(struct re_pattern_buffer *bufp)
3756 {
3757         int j, k;
3758 #ifdef MATCH_MAY_ALLOCATE
3759         fail_stack_type fail_stack;
3760 #endif
3761         DECLARE_DESTINATION;
3762         /* We don't push any register information onto the failure stack.  */
3763
3764         REGISTER char *fastmap = bufp->fastmap;
3765         unsigned char *pattern = bufp->buffer;
3766         unsigned long size = bufp->used;
3767         /* actually p supposed to carry the const qualifier, however, some
3768            silly mule code below CHANGES p and hence we cant go with the const
3769            qualifier here, leaving an `unfixable' warning on the way */
3770 #ifdef MULE
3771         unsigned char *p = pattern;
3772 #else
3773         re_char *p = pattern;
3774 #endif
3775         REGISTER unsigned char *pend = pattern + size;
3776
3777 #ifdef REL_ALLOC
3778         /* This holds the pointer to the failure stack, when
3779            it is allocated relocatably.  */
3780         fail_stack_elt_t *failure_stack_ptr;
3781 #endif
3782
3783         /* Assume that each path through the pattern can be null until
3784            proven otherwise.  We set this false at the bottom of switch
3785            statement, to which we get only if a particular path doesn't
3786            match the empty string.  */
3787         re_bool path_can_be_null = true;
3788
3789         /* We aren't doing a `succeed_n' to begin with.  */
3790         re_bool succeed_n_p = false;
3791
3792         assert(fastmap != NULL && p != NULL);
3793
3794         INIT_FAIL_STACK();
3795         memset(fastmap, 0, 1 << BYTEWIDTH);     /* Assume nothing's valid.  */
3796         bufp->fastmap_accurate = 1;     /* It will be when we're done.  */
3797         bufp->can_be_null = 0;
3798
3799         while (1) {
3800                 if (p == pend || *p == succeed) {
3801                         /* We have reached the (effective) end of pattern.  */
3802                         if (!FAIL_STACK_EMPTY()) {
3803                                 bufp->can_be_null |= path_can_be_null;
3804
3805                                 /* Reset for next path.  */
3806                                 path_can_be_null = true;
3807
3808                                 /* fuck, p isnt const thanks to that
3809                                  * unified range table function below */
3810 #ifdef MULE
3811                                 p = (unsigned char*)fail_stack.
3812                                     stack[--fail_stack.avail].pointer;
3813 #else
3814                                 p = fail_stack.stack[--fail_stack.avail]
3815                                         .pointer;
3816 #endif
3817
3818                                 continue;
3819                         } else {
3820                                 break;
3821                         }
3822                 }
3823
3824                 /* We should never be about to go beyond the end of the pattern.  */
3825                 assert(p < pend);
3826
3827                 switch (SWITCH_ENUM_CAST((re_opcode_t) * p++)) {
3828
3829                         /* I guess the idea here is to simply not bother with a
3830                            fastmap if a backreference is used, since it's too
3831                            hard to figure out the fastmap for the corresponding
3832                            group.  Setting `can_be_null' stops `re_search_2'
3833                            from using the fastmap, so that is all we do.  */
3834                 case duplicate:
3835                         bufp->can_be_null = 1;
3836                         goto done;
3837
3838                         /* Following are the cases which match a character.
3839                            These end with `break'.  */
3840
3841                 case exactn:
3842                         fastmap[p[1]] = 1;
3843                         break;
3844
3845                 case charset:
3846                         /* XEmacs: Under Mule, these bit vectors will
3847                            only contain values for characters below 0x80. */
3848                         for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3849                                 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3850                                         fastmap[j] = 1;
3851                         break;
3852
3853                 case charset_not:
3854                         /* Chars beyond end of map must be allowed.  */
3855 #ifdef MULE
3856                         for (j = *p * BYTEWIDTH; j < 0x80; j++)
3857                                 fastmap[j] = 1;
3858                         /* And all extended characters must be allowed, too. */
3859                         for (j = 0x80; j < 0xA0; j++)
3860                                 fastmap[j] = 1;
3861 #else                           /* not MULE */
3862                         for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3863                                 fastmap[j] = 1;
3864 #endif                          /* MULE */
3865
3866                         for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3867                                 if (!
3868                                     (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3869                                         fastmap[j] = 1;
3870                         break;
3871
3872 #ifdef MULE
3873                 case charset_mule: {
3874                         int nentries;
3875                         int i;
3876
3877                         nentries = unified_range_table_nentries(p);
3878                         for (i = 0; i < nentries; i++) {
3879                                 EMACS_INT first, last;
3880                                 Lisp_Object dummy_val;
3881                                 int jj;
3882                                 Bufbyte strr[MAX_EMCHAR_LEN];
3883
3884                                 unified_range_table_get_range(p, i,
3885                                                               &first,
3886                                                               &last,
3887                                                               &dummy_val);
3888                                 for (jj = first;
3889                                      jj <= last && jj < 0x80; jj++)
3890                                         fastmap[jj] = 1;
3891                                 /* Ranges below 0x100 can span charsets, but there
3892                                    are only two (Control-1 and Latin-1), and
3893                                    either first or last has to be in them. */
3894                                 set_charptr_emchar(strr, first);
3895                                 fastmap[*strr] = 1;
3896                                 if (last < 0x100) {
3897                                         set_charptr_emchar(strr, last);
3898                                         fastmap[*strr] = 1;
3899                                 }
3900                         }
3901                 }
3902                         break;
3903
3904                 case charset_mule_not: {
3905                         int nentries;
3906                         int i;
3907
3908                         nentries = unified_range_table_nentries(p);
3909                         for (i = 0; i < nentries; i++) {
3910                                 EMACS_INT first, last;
3911                                 Lisp_Object dummy_val;
3912                                 int jj;
3913                                 int smallest_prev = 0;
3914
3915                                 unified_range_table_get_range(p, i,
3916                                                               &first,
3917                                                               &last,
3918                                                               &dummy_val);
3919                                 for (jj = smallest_prev;
3920                                      jj < first && jj < 0x80; jj++)
3921                                         fastmap[jj] = 1;
3922                                 smallest_prev = last + 1;
3923                                 if (smallest_prev >= 0x80)
3924                                         break;
3925                         }
3926                         /* Calculating which leading bytes are actually allowed
3927                            here is rather difficult, so we just punt and allow
3928                            all of them. */
3929                         for (i = 0x80; i < 0xA0; i++)
3930                                 fastmap[i] = 1;
3931                 }
3932                         break;
3933 #endif                          /* MULE */
3934
3935                 case wordchar:
3936 #ifdef emacs
3937                         k = (int)Sword;
3938                         goto matchsyntax;
3939 #else
3940                         for (j = 0; j < (1 << BYTEWIDTH); j++)
3941                                 if (SYNTAX_UNSAFE
3942                                     (XCHAR_TABLE
3943                                      (regex_emacs_buffer->mirror_syntax_table),
3944                                      j) == Sword)
3945                                         fastmap[j] = 1;
3946                         break;
3947 #endif
3948
3949                 case notwordchar:
3950 #ifdef emacs
3951                         k = (int)Sword;
3952                         goto matchnotsyntax;
3953 #else
3954                         for (j = 0; j < (1 << BYTEWIDTH); j++)
3955                                 if (SYNTAX_UNSAFE
3956                                     (XCHAR_TABLE
3957                                      (regex_emacs_buffer->mirror_syntax_table),
3958                                      j) != Sword)
3959                                         fastmap[j] = 1;
3960                         break;
3961 #endif
3962
3963                 case anychar: {
3964                         int fastmap_newline = fastmap['\n'];
3965
3966                         /* `.' matches anything ...  */
3967 #ifdef MULE
3968                         /* "anything" only includes bytes that can be the
3969                            first byte of a character. */
3970                         for (j = 0; j < 0xA0; j++)
3971                                 fastmap[j] = 1;
3972 #else
3973                         for (j = 0; j < (1 << BYTEWIDTH); j++)
3974                                 fastmap[j] = 1;
3975 #endif
3976
3977                         /* ... except perhaps newline.  */
3978                         if (!(bufp->syntax & RE_DOT_NEWLINE))
3979                                 fastmap['\n'] = fastmap_newline;
3980
3981                         /* Return if we have already set `can_be_null'; if we
3982                            have, then the fastmap is irrelevant.  Something's
3983                            wrong here.  */
3984                         else if (bufp->can_be_null)
3985                                 goto done;
3986
3987                         /* Otherwise, have to check alternative paths.  */
3988                         break;
3989                 }
3990
3991 #ifdef emacs
3992                 case wordbound:
3993                 case notwordbound:
3994                 case wordbeg:
3995                 case wordend:
3996                 case notsyntaxspec:
3997                 case syntaxspec:
3998                         /* This match depends on text properties.  These end with
3999                            aborting optimizations.  */
4000                         bufp->can_be_null = 1;
4001                         goto done;
4002
4003 #ifdef emacs
4004                 matchsyntax:
4005 #ifdef MULE
4006                         for (j = 0; j < 0x80; j++)
4007                                 if (SYNTAX_UNSAFE
4008                                     (XCHAR_TABLE
4009                                      (regex_emacs_buffer->mirror_syntax_table),
4010                                      j) == (enum syntaxcode)k)
4011                                         fastmap[j] = 1;
4012                         for (j = 0x80; j < 0xA0; j++) {
4013                                 if (LEADING_BYTE_PREFIX_P(j))
4014                                         /* too complicated to calculate this right */
4015                                         fastmap[j] = 1;
4016                                 else {
4017                                         int multi_p;
4018                                         Lisp_Object cset;
4019
4020                                         cset = CHARSET_BY_LEADING_BYTE(j);
4021                                         if (CHARSETP(cset)) {
4022                                                 if (charset_syntax
4023                                                     (regex_emacs_buffer, cset,
4024                                                      &multi_p)
4025                                                     == Sword || multi_p)
4026                                                         fastmap[j] = 1;
4027                                         }
4028                                 }
4029                         }
4030 #else                           /* not MULE */
4031                         for (j = 0; j < (1 << BYTEWIDTH); j++)
4032                                 if (SYNTAX_UNSAFE
4033                                     (XCHAR_TABLE
4034                                      (regex_emacs_buffer->mirror_syntax_table),
4035                                      j) == (enum syntaxcode)k)
4036                                         fastmap[j] = 1;
4037 #endif                          /* MULE */
4038                         break;
4039
4040                 matchnotsyntax:
4041 #ifdef MULE
4042                         for (j = 0; j < 0x80; j++)
4043                                 if (SYNTAX_UNSAFE
4044                                     (XCHAR_TABLE
4045                                      (regex_emacs_buffer->mirror_syntax_table),
4046                                      j) != (enum syntaxcode)k)
4047                                         fastmap[j] = 1;
4048                         for (j = 0x80; j < 0xA0; j++) {
4049                                 if (LEADING_BYTE_PREFIX_P(j))
4050                                         /* too complicated to calculate this right */
4051                                         fastmap[j] = 1;
4052                                 else {
4053                                         int multi_p;
4054                                         Lisp_Object cset;
4055
4056                                         cset = CHARSET_BY_LEADING_BYTE(j);
4057                                         if (CHARSETP(cset)) {
4058                                                 if (charset_syntax
4059                                                     (regex_emacs_buffer, cset,
4060                                                      &multi_p)
4061                                                     != Sword || multi_p)
4062                                                         fastmap[j] = 1;
4063                                         }
4064                                 }
4065                         }
4066 #else                           /* not MULE */
4067                         for (j = 0; j < (1 << BYTEWIDTH); j++)
4068                                 if (SYNTAX_UNSAFE
4069                                     (XCHAR_TABLE
4070                                      (regex_emacs_buffer->mirror_syntax_table),
4071                                      j) != (enum syntaxcode)k)
4072                                         fastmap[j] = 1;
4073 #endif                          /* MULE */
4074                         break;
4075 #endif                          /* emacs */
4076
4077 #ifdef MULE
4078 /* 97/2/17 jhod category patch */
4079                 case categoryspec:
4080                 case notcategoryspec:
4081                         bufp->can_be_null = 1;
4082                         return 0;
4083 /* end if category patch */
4084 #endif                          /* MULE */
4085
4086                         /* All cases after this match the empty string.  These
4087                            end with `continue'.  */
4088
4089                 case before_dot:
4090                 case at_dot:
4091                 case after_dot:
4092                         continue;
4093 #endif                          /* emacs */
4094
4095                 case no_op:
4096                 case begline:
4097                 case endline:
4098                 case begbuf:
4099                 case endbuf:
4100 #ifndef emacs
4101                 case wordbound:
4102                 case notwordbound:
4103                 case wordbeg:
4104                 case wordend:
4105 #endif
4106                 case push_dummy_failure:
4107                         continue;
4108
4109                 case jump_n:
4110                 case pop_failure_jump:
4111                 case maybe_pop_jump:
4112                 case jump:
4113                 case jump_past_alt:
4114                 case dummy_failure_jump:
4115                         EXTRACT_NUMBER_AND_INCR(j, p);
4116                         p += j;
4117                         if (j > 0)
4118                                 continue;
4119
4120                         /* Jump backward implies we just went through the body
4121                            of a loop and matched nothing.  Opcode jumped to
4122                            should be `on_failure_jump' or `succeed_n'.  Just
4123                            treat it like an ordinary jump.  For a * loop, it has
4124                            pushed its failure point already; if so, discard that
4125                            as redundant.  */
4126                         if ((re_opcode_t) * p != on_failure_jump
4127                             && (re_opcode_t) * p != succeed_n)
4128                                 continue;
4129
4130                         p++;
4131                         EXTRACT_NUMBER_AND_INCR(j, p);
4132                         p += j;
4133
4134                         /* If what's on the stack is where we are now, pop
4135                            it.  */
4136                         if (!FAIL_STACK_EMPTY()
4137                             && fail_stack.stack[fail_stack.avail - 1].pointer ==
4138                             p)
4139                                 fail_stack.avail--;
4140
4141                         continue;
4142
4143                 case on_failure_jump:
4144                 case on_failure_keep_string_jump:
4145                       handle_on_failure_jump:
4146                         EXTRACT_NUMBER_AND_INCR(j, p);
4147
4148                         /* For some patterns, e.g., `(a?)?', `p+j' here points
4149                            to the end of the pattern.  We don't want to push
4150                            such a point, since when we restore it above,
4151                            entering the switch will increment `p' past the end
4152                            of the pattern.  We don't need to push such a point
4153                            since we obviously won't find any more fastmap
4154                            entries beyond `pend'.  Such a pattern can match the
4155                            null string, though.  */
4156                         if (p + j < pend) {
4157                                 if (!PUSH_PATTERN_OP(p + j, fail_stack)) {
4158                                         RESET_FAIL_STACK();
4159                                         return -2;
4160                                 }
4161                         } else
4162                                 bufp->can_be_null = 1;
4163
4164                         if (succeed_n_p) {
4165                                 EXTRACT_NUMBER_AND_INCR(k, p);  /* Skip the n.  */
4166                                 succeed_n_p = false;
4167                         }
4168
4169                         continue;
4170
4171                 case succeed_n:
4172                         /* Get to the number of times to succeed.  */
4173                         p += 2;
4174
4175                         /* Increment p past the n for when k != 0.  */
4176                         EXTRACT_NUMBER_AND_INCR(k, p);
4177                         if (k == 0) {
4178                                 p -= 4;
4179                                 succeed_n_p = true;     /* Spaghetti code alert.  */
4180                                 goto handle_on_failure_jump;
4181                         }
4182                         continue;
4183
4184                 case set_number_at:
4185                         p += 4;
4186                         continue;
4187
4188                 case start_memory:
4189                 case stop_memory:
4190                         p += 2;
4191                         continue;
4192
4193                 case succeed:
4194                 default:
4195                         abort();        /* We have listed all the cases.  */
4196                 }               /* switch *p++ */
4197
4198                 /* Getting here means we have found the possible starting
4199                    characters for one path of the pattern -- and that the empty
4200                    string does not match.  We need not follow this path further.
4201                    Instead, look at the next alternative (remembered on the
4202                    stack), or quit if no more.  The test at the top of the loop
4203                    does these things.  */
4204                 path_can_be_null = false;
4205                 p = pend;
4206         }                       /* while p */
4207
4208         /* Set `can_be_null' for the last path (also the first path, if the
4209            pattern is empty).  */
4210         bufp->can_be_null |= path_can_be_null;
4211
4212 done:
4213         RESET_FAIL_STACK();
4214         return 0;
4215 }       /* re_compile_fastmap */
4216 \f
4217 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
4218    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
4219    this memory for recording register information.  STARTS and ENDS
4220    must be allocated using the malloc library routine, and must each
4221    be at least NUM_REGS * sizeof (regoff_t) bytes long.
4222
4223    If NUM_REGS == 0, then subsequent matches should allocate their own
4224    register data.
4225
4226    Unless this function is called, the first search or match using
4227    PATTERN_BUFFER will allocate its own register data, without
4228    freeing the old data.  */
4229
4230 void
4231 re_set_registers(struct re_pattern_buffer *bufp, struct re_registers *regs,
4232                  unsigned num_regs, regoff_t * starts, regoff_t * ends)
4233 {
4234         if (num_regs) {
4235                 bufp->regs_allocated = REGS_REALLOCATE;
4236                 regs->num_regs = num_regs;
4237                 regs->start = starts;
4238                 regs->end = ends;
4239         } else {
4240                 bufp->regs_allocated = REGS_UNALLOCATED;
4241                 regs->num_regs = 0;
4242                 regs->start = regs->end = (regoff_t *) 0;
4243         }
4244 }
4245 \f
4246 /* Searching routines.  */
4247
4248 /* Like re_search_2, below, but only one string is specified, and
4249    doesn't let you say where to stop matching. */
4250
4251 int
4252 re_search(struct re_pattern_buffer *bufp, const char *string, int size,
4253           int startpos, int range, struct re_registers *regs)
4254 {
4255         return re_search_2(bufp, NULL, 0, string, size, startpos, range,
4256                            regs, size);
4257 }
4258
4259 #ifndef emacs
4260 /* Snarfed from src/lisp.h, needed for compiling [ce]tags. */
4261 # define bytecount_to_charcount(ptr, len) (len)
4262 # define charcount_to_bytecount(ptr, len) (len)
4263 typedef int Charcount;
4264 #endif
4265
4266 /* Using the compiled pattern in BUFP->buffer, first tries to match the
4267    virtual concatenation of STRING1 and STRING2, starting first at index
4268    STARTPOS, then at STARTPOS + 1, and so on.
4269
4270    With MULE, STARTPOS is a byte position, not a char position.  And the
4271    search will increment STARTPOS by the width of the current leading
4272    character.
4273
4274    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
4275
4276    RANGE is how far to scan while trying to match.  RANGE = 0 means try
4277    only at STARTPOS; in general, the last start tried is STARTPOS +
4278    RANGE.
4279
4280    With MULE, RANGE is a byte position, not a char position.  The last
4281    start tried is the character starting <= STARTPOS + RANGE.
4282
4283    In REGS, return the indices of the virtual concatenation of STRING1
4284    and STRING2 that matched the entire BUFP->buffer and its contained
4285    subexpressions.
4286
4287    Do not consider matching one past the index STOP in the virtual
4288    concatenation of STRING1 and STRING2.
4289
4290    We return either the position in the strings at which the match was
4291    found, -1 if no match, or -2 if error (such as failure
4292    stack overflow).  */
4293
4294 int
4295 re_search_2(struct re_pattern_buffer *bufp, const char *str1,
4296             int size1, const char *str2, int size2, int startpos,
4297             int range, struct re_registers *regs, int stop)
4298 {
4299         int val;
4300         re_char *string1 = (re_char *) str1;
4301         re_char *string2 = (re_char *) str2;
4302         REGISTER char *fastmap = bufp->fastmap;
4303         REGISTER RE_TRANSLATE_TYPE translate = bufp->translate;
4304         int total_size = size1 + size2;
4305         int endpos = startpos + range;
4306 #ifdef REGEX_BEGLINE_CHECK
4307         int anchored_at_begline = 0;
4308 #endif
4309         re_char *d;
4310         Charcount d_size;
4311
4312         /* Check for out-of-range STARTPOS.  */
4313         if (startpos < 0 || startpos > total_size)
4314                 return -1;
4315
4316         /* Fix up RANGE if it might eventually take us outside
4317            the virtual concatenation of STRING1 and STRING2.  */
4318         if (endpos < 0)
4319                 range = 0 - startpos;
4320         else if (endpos > total_size)
4321                 range = total_size - startpos;
4322
4323         /* If the search isn't to be a backwards one, don't waste time in a
4324            search for a pattern that must be anchored.  */
4325         if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf
4326             && range > 0) {
4327                 if (startpos > 0)
4328                         return -1;
4329                 else {
4330                         d = ((const unsigned char *)
4331                              (startpos >=
4332                               size1 ? string2 - size1 : string1) + startpos);
4333                         range = charcount_to_bytecount(d, 1);
4334                 }
4335         }
4336 #ifdef emacs
4337         /* In a forward search for something that starts with \=.
4338            don't keep searching past point.  */
4339         if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot
4340             && range > 0) {
4341                 range =
4342                     BUF_PT(regex_emacs_buffer) - BUF_BEGV(regex_emacs_buffer)
4343                     - startpos;
4344                 if (range < 0)
4345                         return -1;
4346         }
4347 #endif                          /* emacs */
4348
4349         /* Update the fastmap now if not correct already.  */
4350         if (fastmap && !bufp->fastmap_accurate)
4351                 if (re_compile_fastmap(bufp) == -2)
4352                         return -2;
4353
4354 #ifdef REGEX_BEGLINE_CHECK
4355         {
4356                 unsigned long i = 0;
4357
4358                 while (i < bufp->used) {
4359                         if (bufp->buffer[i] == start_memory ||
4360                             bufp->buffer[i] == stop_memory)
4361                                 i += 2;
4362                         else
4363                                 break;
4364                 }
4365                 anchored_at_begline = i < bufp->used
4366                     && bufp->buffer[i] == begline;
4367         }
4368 #endif
4369
4370 #ifdef emacs
4371         SETUP_SYNTAX_CACHE_FOR_OBJECT(regex_match_object,
4372                                       regex_emacs_buffer,
4373                                       SYNTAX_CACHE_OBJECT_BYTE_TO_CHAR
4374                                       (regex_match_object, regex_emacs_buffer,
4375                                        startpos), 1);
4376 #endif
4377
4378         /* Loop through the string, looking for a place to start matching.  */
4379         for (;;) {
4380 #ifdef REGEX_BEGLINE_CHECK
4381                 /* If the regex is anchored at the beginning of a line (i.e. with a ^),
4382                    then we can speed things up by skipping to the next beginning-of-
4383                    line. */
4384                 if (anchored_at_begline && startpos > 0 && startpos != size1 &&
4385                     range > 0) {
4386                         /* whose stupid idea was it anyway to make this
4387                            function take two strings to match?? */
4388                         int lim = 0;
4389                         int irange = range;
4390
4391                         if (startpos < size1 && startpos + range >= size1)
4392                                 lim = range - (size1 - startpos);
4393
4394                         d = ((const unsigned char *)
4395                              (startpos >=
4396                               size1 ? string2 - size1 : string1) + startpos);
4397                         DEC_CHARPTR(d); /* Ok, since startpos != size1. */
4398                         d_size = charcount_to_bytecount(d, 1);
4399
4400                         if (TRANSLATE_P(translate))
4401                                 while (range > lim && *d != '\n') {
4402                                         d += d_size;    /* Speedier INC_CHARPTR(d) */
4403                                         d_size = charcount_to_bytecount(d, 1);
4404                                         range -= d_size;
4405                         } else
4406                                 while (range > lim && *d != '\n') {
4407                                         d += d_size;    /* Speedier INC_CHARPTR(d) */
4408                                         d_size = charcount_to_bytecount(d, 1);
4409                                         range -= d_size;
4410                                 }
4411
4412                         startpos += irange - range;
4413                 }
4414 #endif                          /* REGEX_BEGLINE_CHECK */
4415
4416                 /* If a fastmap is supplied, skip quickly over characters that
4417                    cannot be the start of a match.  If the pattern can match the
4418                    null string, however, we don't need to skip characters; we want
4419                    the first null string.  */
4420                 if (fastmap && startpos < total_size && !bufp->can_be_null) {
4421                         if (range > 0) {        /* Searching forwards.  */
4422                                 int lim = 0;
4423                                 int irange = range;
4424
4425                                 if (startpos < size1
4426                                     && startpos + range >= size1)
4427                                         lim = range - (size1 - startpos);
4428
4429                                 d = ((const unsigned char *)
4430                                      (startpos >=
4431                                       size1 ? string2 - size1 : string1) +
4432                                      startpos);
4433
4434                                 /* Written out as an if-else to avoid testing `translate'
4435                                    inside the loop.  */
4436                                 if (TRANSLATE_P(translate))
4437                                         while (range > lim) {
4438 #ifdef MULE
4439                                                 Emchar buf_ch;
4440                                                 Bufbyte str[MAX_EMCHAR_LEN];
4441
4442                                                 buf_ch = charptr_emchar(d);
4443                                                 buf_ch = RE_TRANSLATE(buf_ch);
4444                                                 set_charptr_emchar(str, buf_ch);
4445                                                 if (buf_ch >= 0200
4446                                                     || fastmap[(unsigned char)
4447                                                                *str])
4448                                                         break;
4449 #else
4450                                                 if (fastmap
4451                                                     [(unsigned char)
4452                                                      RE_TRANSLATE(*d)])
4453                                                         break;
4454 #endif                          /* MULE */
4455                                                 d_size =
4456                                                     charcount_to_bytecount(d,
4457                                                                            1);
4458                                                 range -= d_size;
4459                                                 d += d_size;    /* Speedier INC_CHARPTR(d) */
4460                                 } else
4461                                         while (range > lim && !fastmap[*d]) {
4462                                                 d_size =
4463                                                     charcount_to_bytecount(d,
4464                                                                            1);
4465                                                 range -= d_size;
4466                                                 d += d_size;    /* Speedier INC_CHARPTR(d) */
4467                                         }
4468
4469                                 startpos += irange - range;
4470                         } else {        /* Searching backwards.  */
4471
4472                                 Emchar c = (size1 == 0 || startpos >= size1
4473                                             ? charptr_emchar(string2 +
4474                                                              startpos - size1)
4475                                             : charptr_emchar(string1 +
4476                                                              startpos));
4477                                 c = TRANSLATE(c);
4478 #ifdef MULE
4479                                 if (!(c >= 0200 || fastmap[(unsigned char)c]))
4480                                         goto advance;
4481 #else
4482                                 if (!fastmap[(unsigned char)c])
4483                                         goto advance;
4484 #endif
4485                         }
4486                 }
4487
4488                 /* If can't match the null string, and that's all we have left, fail.  */
4489                 if (range >= 0 && startpos == total_size && fastmap
4490                     && !bufp->can_be_null)
4491                         return -1;
4492
4493 #ifdef emacs                    /* XEmacs added, w/removal of immediate_quit */
4494                 if (!no_quit_in_re_search)
4495                         QUIT;
4496 #endif
4497                 val = re_match_2_internal(bufp, string1, size1, string2, size2,
4498                                           startpos, regs, stop);
4499 #ifndef REGEX_MALLOC
4500 #ifdef C_ALLOCA
4501                 alloca(0);
4502 #endif
4503 #endif
4504
4505                 if (val >= 0)
4506                         return startpos;
4507
4508                 if (val == -2)
4509                         return -2;
4510
4511               advance:
4512                 if (!range)
4513                         break;
4514                 else if (range > 0) {
4515                         d = ((const unsigned char *)
4516                              (startpos >=
4517                               size1 ? string2 - size1 : string1) + startpos);
4518                         d_size = charcount_to_bytecount(d, 1);
4519                         range -= d_size;
4520                         startpos += d_size;
4521                 } else {
4522                         /* Note startpos > size1 not >=.  If we are on the
4523                            string1/string2 boundary, we want to backup into string1. */
4524                         d = ((const unsigned char *)
4525                              (startpos >
4526                               size1 ? string2 - size1 : string1) + startpos);
4527                         DEC_CHARPTR(d);
4528                         d_size = charcount_to_bytecount(d, 1);
4529                         range += d_size;
4530                         startpos -= d_size;
4531                 }
4532         }
4533         return -1;
4534 }                               /* re_search_2 */
4535 \f
4536 /* Declarations and macros for re_match_2.  */
4537
4538 /* This converts PTR, a pointer into one of the search strings `string1'
4539    and `string2' into an offset from the beginning of that string.  */
4540 #define POINTER_TO_OFFSET(ptr)                  \
4541   (FIRST_STRING_P (ptr)                         \
4542    ? ((regoff_t) ((ptr) - string1))             \
4543    : ((regoff_t) ((ptr) - string2 + size1)))
4544
4545 /* Macros for dealing with the split strings in re_match_2.  */
4546
4547 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
4548
4549 /* Call before fetching a character with *d.  This switches over to
4550    string2 if necessary.  */
4551 #define REGEX_PREFETCH()                                                        \
4552   while (d == dend)                                                     \
4553     {                                                                   \
4554       /* End of string2 => fail.  */                                    \
4555       if (dend == end_match_2)                                          \
4556         goto fail;                                                      \
4557       /* End of string1 => advance to string2.  */                      \
4558       d = string2;                                                      \
4559       dend = end_match_2;                                               \
4560     }
4561
4562 /* Test if at very beginning or at very end of the virtual concatenation
4563    of `string1' and `string2'.  If only one string, it's `string2'.  */
4564 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
4565 #define AT_STRINGS_END(d) ((d) == end2)
4566
4567 /* XEmacs change:
4568    If the given position straddles the string gap, return the equivalent
4569    position that is before or after the gap, respectively; otherwise,
4570    return the same position. */
4571 #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d))
4572 #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d))
4573
4574 /* Test if CH is a word-constituent character. (XEmacs change) */
4575 #define WORDCHAR_P_UNSAFE(ch)                                              \
4576   (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),   \
4577                                ch) == Sword)
4578
4579 /* Free everything we malloc.  */
4580 #ifdef MATCH_MAY_ALLOCATE
4581 #define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
4582 #define FREE_VARIABLES()                                                \
4583   do {                                                                  \
4584     REGEX_FREE_STACK (fail_stack.stack);                                \
4585     FREE_VAR (regstart);                                                \
4586     FREE_VAR (regend);                                                  \
4587     FREE_VAR (old_regstart);                                            \
4588     FREE_VAR (old_regend);                                              \
4589     FREE_VAR (best_regstart);                                           \
4590     FREE_VAR (best_regend);                                             \
4591     FREE_VAR (reg_info);                                                \
4592     FREE_VAR (reg_dummy);                                               \
4593     FREE_VAR (reg_info_dummy);                                          \
4594   } while (0)
4595 #else                           /* not MATCH_MAY_ALLOCATE */
4596 #define FREE_VARIABLES() ((void)0)      /* Do nothing!  But inhibit gcc warning.  */
4597 #endif                          /* MATCH_MAY_ALLOCATE */
4598
4599 /* These values must meet several constraints.  They must not be valid
4600    register values; since we have a limit of 255 registers (because
4601    we use only one byte in the pattern for the register number), we can
4602    use numbers larger than 255.  They must differ by 1, because of
4603    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
4604    be larger than the value for the highest register, so we do not try
4605    to actually save any registers when none are active.  */
4606 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
4607 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
4608 \f
4609 /* Matching routines.  */
4610
4611 #ifndef emacs                   /* Emacs never uses this.  */
4612 /* re_match is like re_match_2 except it takes only a single string.  */
4613
4614 int
4615 re_match(struct re_pattern_buffer *bufp, const char *string, int size,
4616          int pos, struct re_registers *regs)
4617 {
4618         int result =
4619             re_match_2_internal(bufp, NULL, 0, (re_char *) string, size,
4620                                 pos, regs, size);
4621         alloca(0);
4622         return result;
4623 }
4624 #endif                          /* not emacs */
4625
4626 /* re_match_2 matches the compiled pattern in BUFP against the
4627    (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and
4628    SIZE2, respectively).  We start matching at POS, and stop matching
4629    at STOP.
4630
4631    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
4632    store offsets for the substring each group matched in REGS.  See the
4633    documentation for exactly how many groups we fill.
4634
4635    We return -1 if no match, -2 if an internal error (such as the
4636    failure stack overflowing).  Otherwise, we return the length of the
4637    matched substring.  */
4638
4639 int
4640 re_match_2(struct re_pattern_buffer *bufp, const char *string1,
4641            int size1, const char *string2, int size2, int pos,
4642            struct re_registers *regs, int stop)
4643 {
4644         int result;
4645
4646 #ifdef emacs
4647         SETUP_SYNTAX_CACHE_FOR_OBJECT(regex_match_object,
4648                                       regex_emacs_buffer,
4649                                       SYNTAX_CACHE_OBJECT_BYTE_TO_CHAR
4650                                       (regex_match_object, regex_emacs_buffer,
4651                                        pos), 1);
4652 #endif
4653
4654         result = re_match_2_internal(bufp, (re_char *) string1, size1,
4655                                      (re_char *) string2, size2,
4656                                      pos, regs, stop);
4657
4658         alloca(0);
4659         return result;
4660 }
4661
4662 /* This is a separate function so that we can force an alloca cleanup
4663    afterwards.  */
4664 static int
4665 re_match_2_internal(struct re_pattern_buffer *bufp, re_char * string1,
4666                     int size1, re_char * string2, int size2, int pos,
4667                     struct re_registers *regs, int stop)
4668 {
4669         /* General temporaries.  */
4670         int mcnt;
4671         unsigned char *p1;
4672         int should_succeed;     /* XEmacs change */
4673
4674         /* Just past the end of the corresponding string.  */
4675         re_char *end1, *end2;
4676
4677         /* Pointers into string1 and string2, just past the last characters in
4678            each to consider matching.  */
4679         re_char *end_match_1, *end_match_2;
4680
4681         /* Where we are in the data, and the end of the current string.  */
4682         re_char *d, *dend;
4683
4684         /* Where we are in the pattern, and the end of the pattern.  */
4685         unsigned char *p = bufp->buffer;
4686         REGISTER unsigned char *pend = p + bufp->used;
4687
4688         /* Mark the opcode just after a start_memory, so we can test for an
4689            empty subpattern when we get to the stop_memory.  */
4690         re_char *just_past_start_mem = 0;
4691
4692         /* We use this to map every character in the string.  */
4693         RE_TRANSLATE_TYPE translate = bufp->translate;
4694
4695         /* Failure point stack.  Each place that can handle a failure further
4696            down the line pushes a failure point on this stack.  It consists of
4697            restart, regend, and reg_info for all registers corresponding to
4698            the subexpressions we're currently inside, plus the number of such
4699            registers, and, finally, two char *'s.  The first char * is where
4700            to resume scanning the pattern; the second one is where to resume
4701            scanning the strings.  If the latter is zero, the failure point is
4702            a ``dummy''; if a failure happens and the failure point is a dummy,
4703            it gets discarded and the next one is tried.  */
4704 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, this is global.  */
4705         fail_stack_type fail_stack;
4706 #endif
4707 #ifdef REGEX_DEBUG_FLAG
4708         static unsigned failure_id;
4709         int nfailure_points_pushed = 0, nfailure_points_popped = 0;
4710 #endif
4711
4712 #ifdef REL_ALLOC
4713         /* This holds the pointer to the failure stack, when
4714            it is allocated relocatably.  */
4715         fail_stack_elt_t *failure_stack_ptr;
4716 #endif
4717
4718         /* We fill all the registers internally, independent of what we
4719            return, for use in backreferences.  The number here includes
4720            an element for register zero.  */
4721         int num_regs = bufp->re_ngroups + 1;
4722
4723         /* The currently active registers.  */
4724         int lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4725         int highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4726
4727         /* Information on the contents of registers. These are pointers into
4728            the input strings; they record just what was matched (on this
4729            attempt) by a subexpression part of the pattern, that is, the
4730            regnum-th regstart pointer points to where in the pattern we began
4731            matching and the regnum-th regend points to right after where we
4732            stopped matching the regnum-th subexpression.  (The zeroth register
4733            keeps track of what the whole pattern matches.)  */
4734 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, these are global.  */
4735         re_char **regstart, **regend;
4736 #endif
4737
4738         /* If a group that's operated upon by a repetition operator fails to
4739            match anything, then the register for its start will need to be
4740            restored because it will have been set to wherever in the string we
4741            are when we last see its open-group operator.  Similarly for a
4742            register's end.  */
4743 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, these are global.  */
4744         re_char **old_regstart, **old_regend;
4745 #endif
4746
4747         /* The is_active field of reg_info helps us keep track of which (possibly
4748            nested) subexpressions we are currently in. The matched_something
4749            field of reg_info[reg_num] helps us tell whether or not we have
4750            matched any of the pattern so far this time through the reg_num-th
4751            subexpression.  These two fields get reset each time through any
4752            loop their register is in.  */
4753 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, this is global.  */
4754         register_info_type *reg_info;
4755 #endif
4756
4757         /* The following record the register info as found in the above
4758            variables when we find a match better than any we've seen before.
4759            This happens as we backtrack through the failure points, which in
4760            turn happens only if we have not yet matched the entire string. */
4761         unsigned best_regs_set = false;
4762 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, these are global.  */
4763         re_char **best_regstart, **best_regend;
4764 #endif
4765
4766         /* Logically, this is `best_regend[0]'.  But we don't want to have to
4767            allocate space for that if we're not allocating space for anything
4768            else (see below).  Also, we never need info about register 0 for
4769            any of the other register vectors, and it seems rather a kludge to
4770            treat `best_regend' differently than the rest.  So we keep track of
4771            the end of the best match so far in a separate variable.  We
4772            initialize this to NULL so that when we backtrack the first time
4773            and need to test it, it's not garbage.  */
4774         re_char *match_end = NULL;
4775
4776         /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
4777         int set_regs_matched_done = 0;
4778
4779         /* Used when we pop values we don't care about.  */
4780 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, these are global.  */
4781         re_char **reg_dummy;
4782         register_info_type *reg_info_dummy;
4783 #endif
4784
4785 #ifdef REGEX_DEBUG_FLAG
4786         /* Counts the total number of registers pushed.  */
4787         unsigned num_regs_pushed = 0;
4788 #endif
4789
4790         /* 1 if this match ends in the same string (string1 or string2)
4791            as the best previous match.  */
4792         re_bool same_str_p;
4793
4794         /* 1 if this match is the best seen so far.  */
4795         re_bool best_match_p;
4796
4797         DEBUG_PRINT1("\n\nEntering re_match_2.\n");
4798
4799         INIT_FAIL_STACK();
4800
4801 #ifdef MATCH_MAY_ALLOCATE
4802         /* Do not bother to initialize all the register variables if there are
4803            no groups in the pattern, as it takes a fair amount of time.  If
4804            there are groups, we include space for register 0 (the whole
4805            pattern), even though we never use it, since it simplifies the
4806            array indexing.  We should fix this.  */
4807         if (bufp->re_ngroups) {
4808                 regstart = REGEX_TALLOC(num_regs, re_char *);
4809                 regend = REGEX_TALLOC(num_regs, re_char *);
4810                 old_regstart = REGEX_TALLOC(num_regs, re_char *);
4811                 old_regend = REGEX_TALLOC(num_regs, re_char *);
4812                 best_regstart = REGEX_TALLOC(num_regs, re_char *);
4813                 best_regend = REGEX_TALLOC(num_regs, re_char *);
4814                 reg_info = REGEX_TALLOC(num_regs, register_info_type);
4815                 reg_dummy = REGEX_TALLOC(num_regs, re_char *);
4816                 reg_info_dummy = REGEX_TALLOC(num_regs, register_info_type);
4817
4818                 if (!
4819                     (regstart && regend && old_regstart && old_regend
4820                      && reg_info && best_regstart && best_regend && reg_dummy
4821                      && reg_info_dummy)) {
4822                         FREE_VARIABLES();
4823                         return -2;
4824                 }
4825         } else {
4826                 /* We must initialize all our variables to NULL, so that
4827                    `FREE_VARIABLES' doesn't try to free them.  */
4828                 regstart = regend = old_regstart = old_regend = best_regstart
4829                     = best_regend = reg_dummy = NULL;
4830                 reg_info = reg_info_dummy = (register_info_type *) NULL;
4831         }
4832 #endif                          /* MATCH_MAY_ALLOCATE */
4833
4834         /* The starting position is bogus.  */
4835         if (pos < 0 || pos > size1 + size2) {
4836                 FREE_VARIABLES();
4837                 return -1;
4838         }
4839
4840         /* Initialize subexpression text positions to -1 to mark ones that no
4841            start_memory/stop_memory has been seen for. Also initialize the
4842            register information struct.  */
4843         for (mcnt = 1; mcnt < num_regs; mcnt++) {
4844                 regstart[mcnt] = regend[mcnt]
4845                     = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
4846
4847                 REG_MATCH_NULL_STRING_P(reg_info[mcnt]) =
4848                     MATCH_NULL_UNSET_VALUE;
4849                 IS_ACTIVE(reg_info[mcnt]) = 0;
4850                 MATCHED_SOMETHING(reg_info[mcnt]) = 0;
4851                 EVER_MATCHED_SOMETHING(reg_info[mcnt]) = 0;
4852         }
4853         /* We move `string1' into `string2' if the latter's empty -- but not if
4854            `string1' is null.  */
4855         if (size2 == 0 && string1 != NULL) {
4856                 string2 = string1;
4857                 size2 = size1;
4858                 string1 = 0;
4859                 size1 = 0;
4860         }
4861         end1 = string1 + size1;
4862         end2 = string2 + size2;
4863
4864         /* Compute where to stop matching, within the two strings.  */
4865         if (stop <= size1) {
4866                 end_match_1 = string1 + stop;
4867                 end_match_2 = string2;
4868         } else {
4869                 end_match_1 = end1;
4870                 end_match_2 = string2 + stop - size1;
4871         }
4872
4873         /* `p' scans through the pattern as `d' scans through the data.
4874            `dend' is the end of the input string that `d' points within.  `d'
4875            is advanced into the following input string whenever necessary, but
4876            this happens before fetching; therefore, at the beginning of the
4877            loop, `d' can be pointing at the end of a string, but it cannot
4878            equal `string2'.  */
4879         if (size1 > 0 && pos <= size1) {
4880                 d = string1 + pos;
4881                 dend = end_match_1;
4882         } else {
4883                 d = string2 + pos - size1;
4884                 dend = end_match_2;
4885         }
4886
4887         DEBUG_PRINT1("The compiled pattern is: \n");
4888         DEBUG_PRINT_COMPILED_PATTERN(bufp, p, pend);
4889         DEBUG_PRINT1("The string to match is: `");
4890         DEBUG_PRINT_DOUBLE_STRING(d, string1, size1, string2, size2);
4891         DEBUG_PRINT1("'\n");
4892
4893         /* This loops over pattern commands.  It exits by returning from the
4894            function if the match is complete, or it drops through if the match
4895            fails at this starting point in the input data.  */
4896         for (;;) {
4897                 DEBUG_PRINT2("\n0x%lx: ", (long)p);
4898 #ifdef emacs                    /* XEmacs added, w/removal of immediate_quit */
4899                 if (!no_quit_in_re_search)
4900                         QUIT;
4901 #endif
4902
4903                 if (p == pend) {
4904                         /* End of pattern means we might have succeeded.  */
4905                         DEBUG_PRINT1("end of pattern ... ");
4906
4907                         /* If we haven't matched the entire string, and we want
4908                            the longest match, try backtracking.  */
4909                         if (d != end_match_2) {
4910                                 same_str_p = (FIRST_STRING_P(match_end)
4911                                               == MATCHING_IN_FIRST_STRING);
4912
4913                                 /* AIX compiler got confused when this was
4914                                    combined with the previous declaration.  */
4915                                 if (same_str_p)
4916                                         best_match_p = d > match_end;
4917                                 else
4918                                         best_match_p =
4919                                             !MATCHING_IN_FIRST_STRING;
4920
4921                                 DEBUG_PRINT1("backtracking.\n");
4922
4923                                 if (!FAIL_STACK_EMPTY()) {
4924                                         /* More failure points to try.  */
4925
4926                                         /* If exceeds best match so far, save
4927                                            it.  */
4928                                         if (!best_regs_set || best_match_p) {
4929                                                 best_regs_set = true;
4930                                                 match_end = d;
4931
4932                                                 DEBUG_PRINT1
4933                                                     ("\nSAVING match as best so far.\n");
4934
4935                                                 for (mcnt = 1; mcnt < num_regs;
4936                                                      mcnt++) {
4937                                                         best_regstart[mcnt] =
4938                                                             regstart[mcnt];
4939                                                         best_regend[mcnt] =
4940                                                             regend[mcnt];
4941                                                 }
4942                                         }
4943                                         goto fail;
4944                                 }
4945
4946                                 /* If no failure points, don't restore garbage.
4947                                    And if last match is real best match, don't
4948                                    restore second best one. */
4949                                 else if (best_regs_set && !best_match_p) {
4950                                       restore_best_regs:
4951                                         /* Restore best match.  It may happen
4952                                            that `dend == end_match_1' while the
4953                                            restored d is in string2.  For
4954                                            example, the pattern `x.*y.*z'
4955                                            against the strings `x-' and `y-z-',
4956                                            if the two strings are not
4957                                            consecutive in memory.  */
4958                                         DEBUG_PRINT1
4959                                             ("Restoring best registers.\n");
4960
4961                                         d = match_end;
4962                                         dend = ((d >= string1 && d <= end1)
4963                                                 ? end_match_1 : end_match_2);
4964
4965                                         for (mcnt = 1; mcnt < num_regs; mcnt++) {
4966                                                 regstart[mcnt] =
4967                                                     best_regstart[mcnt];
4968                                                 regend[mcnt] =
4969                                                     best_regend[mcnt];
4970                                         }
4971                                 }
4972                         }
4973                         /* d != end_match_2 */
4974                 succeed_label:
4975                         DEBUG_PRINT1("Accepting match.\n");
4976                         {
4977                                 int num_nonshy_regs = bufp->re_nsub + 1;
4978                                 /* If caller wants register contents data back,
4979                                    fill REGS.  */
4980                                 if (regs && !bufp->no_sub) {
4981                                         /* Have the register data arrays been
4982                                            allocated?  */
4983                                         if (bufp->regs_allocated == REGS_UNALLOCATED) { 
4984                                           /* No.  So allocate them with malloc.
4985                                              We need one extra element beyond
4986                                              `num_regs' for the `-1' marker GNU
4987                                              code uses.  */
4988                                                 regs->num_regs = MAX(RE_NREGS, num_nonshy_regs + 1);
4989                                                 regs->start = TALLOC(regs->num_regs, regoff_t);
4990                                                 regs->end = TALLOC(regs->num_regs, regoff_t);
4991                                                 if (regs->start == NULL || regs->end == NULL) {
4992                                                         FREE_VARIABLES ();
4993                                                         return -2;
4994                                                 }
4995                                                 bufp->regs_allocated = REGS_REALLOCATE;
4996                                         } else if (bufp->regs_allocated == REGS_REALLOCATE) { 
4997                                           /* Yes.  If we need more elements than were already
4998                                              allocated, reallocate them.  If we need fewer, just
4999                                              leave it alone.  */
5000                                                 if (regs->num_regs < num_nonshy_regs + 1) {
5001                                                         regs->num_regs = num_nonshy_regs + 1;
5002                                                         RETALLOC(regs->start, regs->num_regs, regoff_t);
5003                                                         RETALLOC(regs->end, regs->num_regs, regoff_t);
5004                                                         if (regs->start == NULL || regs->end == NULL) {
5005                                                                 FREE_VARIABLES();
5006                                                                 return -2;
5007                                                         }
5008                                                 }
5009                                         } else {
5010                                                 /* The braces fend off a "empty body in an else-statement"
5011                                                    warning under GCC when assert expands to nothing.  */
5012                                                 assert (bufp->regs_allocated == REGS_FIXED);
5013                                         }
5014
5015                                         /* Convert the pointer data in `regstart' and `regend' to
5016                                            indices.  Register zero has to be set differently,
5017                                            since we haven't kept track of any info for it.  */
5018                                         if (regs->num_regs > 0) {
5019                                                 regs->start[0] = pos;
5020                                                 regs->end[0] = (MATCHING_IN_FIRST_STRING
5021                                                                 ? ((regoff_t) (d - string1))
5022                                                                 : ((regoff_t) (d - string2 + size1)));
5023                                         }
5024
5025                                         /* Map over the NUM_NONSHY_REGS non-shy internal registers.
5026                                            Copy each into the corresponding external register.
5027                                            N.B. MCNT indexes external registers. */
5028                                         for (mcnt = 1;
5029                                              mcnt < MIN (num_nonshy_regs, regs->num_regs);
5030                                              mcnt++) {
5031                                                 int ireg = bufp->external_to_internal_register[mcnt];
5032
5033                                                 if (REG_UNSET (regstart[ireg]) || REG_UNSET (regend[ireg]))
5034                                                         regs->start[mcnt] = regs->end[mcnt] = -1;
5035                                                 else {
5036                                                         regs->start[mcnt]
5037                                                                 = (regoff_t) POINTER_TO_OFFSET (regstart[ireg]);
5038                                                         regs->end[mcnt]
5039                                                                 = (regoff_t) POINTER_TO_OFFSET (regend[ireg]);
5040                                                 }
5041                                         }
5042                                 } /* regs && !bufp->no_sub */
5043
5044                                 /* If we have regs and the regs structure has
5045                                    more elements than were in the pattern, set
5046                                    the extra elements to -1.  If we
5047                                    (re)allocated the registers, this is the
5048                                    case, because we always allocate enough to
5049                                    have at least one -1 at the end.
5050
5051                                    We do this even when no_sub is set because
5052                                    some applications (XEmacs) reuse register
5053                                    structures which may contain stale
5054                                    information, and permit attempts to access
5055                                    those registers.
5056
5057                                    It would be possible to require the caller to
5058                                    do this, but we'd have to change the API for
5059                                    this function to reflect that, and audit all
5060                                    callers. */
5061                                 if (regs && regs->num_regs > 0)
5062                                         for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++)
5063                                                 regs->start[mcnt] = regs->end[mcnt] = -1;
5064                         }
5065
5066                         DEBUG_PRINT4("%u failure points pushed, %u popped (%u remain).\n",
5067                                       nfailure_points_pushed, nfailure_points_popped,
5068                                       nfailure_points_pushed - nfailure_points_popped);
5069                         DEBUG_PRINT2("%u registers pushed.\n", num_regs_pushed);
5070
5071                         mcnt = d - pos - (MATCHING_IN_FIRST_STRING
5072                                           ? string1
5073                                           : string2 - size1);
5074
5075                         DEBUG_PRINT2("Returning %d from re_match_2.\n", mcnt);
5076
5077                         FREE_VARIABLES();
5078                         return mcnt;
5079                 }
5080
5081                 /* Otherwise match next pattern command.  */
5082                 switch (SWITCH_ENUM_CAST((re_opcode_t) *p++)) {
5083                         /* Ignore these.  Used to ignore the n of succeed_n's
5084                            which currently have n == 0.  */
5085                 case no_op:
5086                         DEBUG_PRINT1("EXECUTING no_op.\n");
5087                         break;
5088
5089                 case succeed:
5090                         DEBUG_PRINT1("EXECUTING succeed.\n");
5091                         goto succeed_label;
5092
5093                         /* Match the next n pattern characters exactly.  The
5094                            following byte in the pattern defines n, and the n
5095                            bytes after that are the characters to match.  */
5096                 case exactn:
5097                         mcnt = *p++;
5098                         DEBUG_PRINT2("EXECUTING exactn %d.\n", mcnt);
5099
5100                         /* This is written out as an if-else so we don't waste
5101                            time testing `translate' inside the loop.  */
5102                         if (TRANSLATE_P(translate)) {
5103                                 do {
5104 #ifdef MULE
5105                                         Emchar pat_ch, buf_ch;
5106                                         Bytecount pat_len;
5107
5108                                         REGEX_PREFETCH();
5109                                         pat_ch = charptr_emchar(p);
5110                                         buf_ch = charptr_emchar(d);
5111                                         if (RE_TRANSLATE(buf_ch) != pat_ch)
5112                                                 goto fail;
5113
5114                                         pat_len = charcount_to_bytecount(p, 1);
5115                                         p += pat_len;
5116                                         INC_CHARPTR(d);
5117
5118                                         mcnt -= pat_len;
5119 #else                           /* not MULE */
5120                                         REGEX_PREFETCH();
5121                                         if ((unsigned char)RE_TRANSLATE(*d++) !=
5122                                             *p++)
5123                                                 goto fail;
5124                                         mcnt--;
5125 #endif
5126                                 }
5127                                 while (mcnt > 0);
5128                         } else {
5129                                 do {
5130                                         REGEX_PREFETCH();
5131                                         if (*d++ != *p++)
5132                                                 goto fail;
5133                                 }
5134                                 while (--mcnt);
5135                         }
5136                         SET_REGS_MATCHED();
5137                         break;
5138
5139                         /* Match any character except possibly a newline or a
5140                            null.  */
5141                 case anychar:
5142                         DEBUG_PRINT1("EXECUTING anychar.\n");
5143
5144                         REGEX_PREFETCH();
5145
5146                         if ((!(bufp->syntax & RE_DOT_NEWLINE)
5147                              && TRANSLATE(*d) == '\n')
5148                             || (bufp->syntax & RE_DOT_NOT_NULL
5149                                 && TRANSLATE(*d) == '\000'))
5150                                 goto fail;
5151
5152                         SET_REGS_MATCHED();
5153                         DEBUG_PRINT2("  Matched `%d'.\n", *d);
5154                         INC_CHARPTR(d); /* XEmacs change */
5155                         break;
5156
5157                 case charset:
5158                 case charset_not: {
5159                         REGISTER unsigned char c;
5160                         re_bool not_p =
5161                                 (re_opcode_t) * (p - 1) == charset_not;
5162
5163                         DEBUG_PRINT2("EXECUTING charset%s.\n",
5164                                      not_p ? "_not" : "");
5165
5166                         REGEX_PREFETCH();
5167                         /* The character to match.  */
5168                         c = TRANSLATE(*d);
5169
5170                         /* Cast to `unsigned' instead of `unsigned char'
5171                            in case the bit list is a full 32 bytes
5172                            long.  */
5173                         if (c < (unsigned)(*p * BYTEWIDTH)
5174                             && p[1 +
5175                                  c / BYTEWIDTH] & (1 << (c %
5176                                                          BYTEWIDTH)))
5177                                 not_p = !not_p;
5178
5179                         p += 1 + *p;
5180
5181                         if (!not_p)
5182                                 goto fail;
5183
5184                         SET_REGS_MATCHED();
5185                         INC_CHARPTR(d); /* XEmacs change */
5186                         break;
5187                 }
5188
5189 #ifdef MULE
5190                 case charset_mule:
5191                 case charset_mule_not: {
5192                         REGISTER Emchar c;
5193                         re_bool not_p =
5194                                 (re_opcode_t) * (p - 1) == charset_mule_not;
5195
5196                         DEBUG_PRINT2("EXECUTING charset_mule%s.\n",
5197                                      not_p ? "_not" : "");
5198
5199                         REGEX_PREFETCH();
5200                         c = charptr_emchar((const Bufbyte *)d);
5201                         /* The character to match.  */
5202                         c = TRANSLATE(c);
5203
5204                         if (EQ
5205                             (Qt,
5206                              unified_range_table_lookup(p, c, Qnil)))
5207                                 not_p = !not_p;
5208
5209                         p += unified_range_table_bytes_used(p);
5210
5211                         if (!not_p)
5212                                 goto fail;
5213
5214                         SET_REGS_MATCHED();
5215                         INC_CHARPTR(d);
5216                         break;
5217                 }
5218 #endif                          /* MULE */
5219
5220                         /* The beginning of a group is represented by
5221                            start_memory.  The arguments are the register number
5222                            in the next byte, and the number of groups inner to
5223                            this one in the next.  The text matched within the
5224                            group is recorded (in the internal registers data
5225                            structure) under the register number.  */
5226                 case start_memory:
5227                         DEBUG_PRINT3("EXECUTING start_memory %d (%d):\n", *p,
5228                                      p[1]);
5229
5230                         /* Find out if this group can match the empty string.  */
5231                         p1 = p; /* To send to group_match_null_string_p.  */
5232
5233                         if (REG_MATCH_NULL_STRING_P(reg_info[*p]) ==
5234                             MATCH_NULL_UNSET_VALUE)
5235                                 REG_MATCH_NULL_STRING_P(reg_info[*p])
5236                                     = group_match_null_string_p(&p1, pend,
5237                                                                 reg_info);
5238
5239                         /* Save the position in the string where we were the
5240                            last time we were at this open-group operator in case
5241                            the group is operated upon by a repetition operator,
5242                            e.g., with `(a*)*b' against `ab'; then we want to
5243                            ignore where we are now in the string in case this
5244                            attempt to match fails.  */
5245                         old_regstart[*p] = REG_MATCH_NULL_STRING_P(reg_info[*p])
5246                             ? REG_UNSET(regstart[*p]) ? d : regstart[*p]
5247                             : regstart[*p];
5248                         DEBUG_PRINT2("  old_regstart: %d\n",
5249                                      POINTER_TO_OFFSET(old_regstart[*p]));
5250
5251                         regstart[*p] = d;
5252                         DEBUG_PRINT2("  regstart: %d\n",
5253                                      POINTER_TO_OFFSET(regstart[*p]));
5254
5255                         IS_ACTIVE(reg_info[*p]) = 1;
5256                         MATCHED_SOMETHING(reg_info[*p]) = 0;
5257
5258                         /* Clear this whenever we change the register activity
5259                            status.  */
5260                         set_regs_matched_done = 0;
5261
5262                         /* This is the new highest active register.  */
5263                         highest_active_reg = *p;
5264
5265                         /* If nothing was active before, this is the new lowest
5266                            active register.  */
5267                         if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
5268                                 lowest_active_reg = *p;
5269
5270                         /* Move past the register number and inner group
5271                            count.  */
5272                         p += 2;
5273                         just_past_start_mem = p;
5274
5275                         break;
5276
5277                         /* The stop_memory opcode represents the end of a group.
5278                            Its arguments are the same as start_memory's: the
5279                            register number, and the number of inner groups.  */
5280                 case stop_memory:
5281                         DEBUG_PRINT3("EXECUTING stop_memory %d (%d):\n", *p,
5282                                      p[1]);
5283
5284                         /* We need to save the string position the last time we
5285                            were at this close-group operator in case the group
5286                            is operated upon by a repetition operator, e.g., with
5287                            `((a*)*(b*)*)*' against `aba'; then we want to ignore
5288                            where we are now in the string in case this attempt
5289                            to match fails.  */
5290                         old_regend[*p] = REG_MATCH_NULL_STRING_P(reg_info[*p])
5291                             ? REG_UNSET(regend[*p]) ? d : regend[*p]
5292                             : regend[*p];
5293                         DEBUG_PRINT2("      old_regend: %d\n",
5294                                      POINTER_TO_OFFSET(old_regend[*p]));
5295
5296                         regend[*p] = d;
5297                         DEBUG_PRINT2("      regend: %d\n",
5298                                      POINTER_TO_OFFSET(regend[*p]));
5299
5300                         /* This register isn't active anymore.  */
5301                         IS_ACTIVE(reg_info[*p]) = 0;
5302
5303                         /* Clear this whenever we change the register activity
5304                            status.  */
5305                         set_regs_matched_done = 0;
5306
5307                         /* If this was the only register active, nothing is
5308                            active anymore.  */
5309                         if (lowest_active_reg == highest_active_reg) {
5310                                 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
5311                                 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
5312                         } else {        /* We must scan for the new highest
5313                                            active register, since it isn't
5314                                            necessarily one less than now:
5315                                            consider (a(b)c(d(e)f)g).  When group
5316                                            3 ends, after the f), the new highest
5317                                            active register is 1.  */
5318                                 unsigned char r = *p - 1;
5319                                 while (r > 0 && !IS_ACTIVE(reg_info[r]))
5320                                         r--;
5321
5322                                 /* If we end up at register zero, that means
5323                                    that we saved the registers as the result of
5324                                    an `on_failure_jump', not a `start_memory',
5325                                    and we jumped to past the innermost
5326                                    `stop_memory'.  For example, in ((.)*) we
5327                                    save registers 1 and 2 as a result of the *,
5328                                    but when we pop back to the second ), we are
5329                                    at the stop_memory 1.  Thus, nothing is
5330                                    active.  */
5331                                 if (r == 0) {
5332                                         lowest_active_reg =
5333                                             NO_LOWEST_ACTIVE_REG;
5334                                         highest_active_reg =
5335                                             NO_HIGHEST_ACTIVE_REG;
5336                                 } else {
5337                                         highest_active_reg = r;
5338
5339                                         /* 98/9/21 jhod: We've also gotta set
5340                                            lowest_active_reg, don't we? */
5341                                         r = 1;
5342                                         while (r < highest_active_reg
5343                                                && !IS_ACTIVE(reg_info[r]))
5344                                                 r++;
5345                                         lowest_active_reg = r;
5346                                 }
5347                         }
5348
5349                         /* If just failed to match something this time around
5350                            with a group that's operated on by a repetition
5351                            operator, try to force exit from the ``loop'', and
5352                            restore the register information for this group that
5353                            we had before trying this last match.  */
5354                         if ((!MATCHED_SOMETHING(reg_info[*p])
5355                              || just_past_start_mem == p - 1)
5356                             && (p + 2) < pend) {
5357                                 re_bool is_a_jump_n = false;
5358
5359                                 p1 = p + 2;
5360                                 mcnt = 0;
5361                                 switch ((unsigned int)(re_opcode_t)*p1++) {
5362                                 case jump_n:
5363                                         is_a_jump_n = true;
5364                                 case pop_failure_jump:
5365                                 case maybe_pop_jump:
5366                                 case jump:
5367                                 case dummy_failure_jump:
5368                                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
5369                                         if (is_a_jump_n)
5370                                                 p1 += 2;
5371                                         break;
5372
5373                                 default:
5374                                         /* do nothing */ ;
5375                                 }
5376                                 p1 += mcnt;
5377
5378                                 /* If the next operation is a jump backwards in
5379                                    the pattern to an on_failure_jump right
5380                                    before the start_memory corresponding to this
5381                                    stop_memory, exit from the loop by forcing a
5382                                    failure after pushing on the stack the
5383                                    on_failure_jump's jump in the pattern, and
5384                                    d.  */
5385                                 if (mcnt < 0
5386                                     && (re_opcode_t) * p1 == on_failure_jump
5387                                     && (re_opcode_t) p1[3] == start_memory
5388                                     && p1[4] == *p) {
5389                                         /* If this group ever matched anything,
5390                                            then restore what its registers were
5391                                            before trying this last failed match,
5392                                            e.g., with `(a*)*b' against `ab' for
5393                                            regstart[1], and, e.g., with
5394                                            `((a*)*(b*)*)*' against `aba' for
5395                                            regend[3].
5396
5397                                            Also restore the registers for inner
5398                                            groups for, e.g., `((a*)(b*))*'
5399                                            against `aba' (register 3 would
5400                                            otherwise get trashed).  */
5401
5402                                         if (EVER_MATCHED_SOMETHING
5403                                             (reg_info[*p])) {
5404                                                 int r;
5405
5406                                                 EVER_MATCHED_SOMETHING(reg_info
5407                                                                        [*p]) =
5408                                                     0;
5409
5410                                                 /* Restore this and inner
5411                                                    groups' (if any)
5412                                                    registers.  */
5413                                                 for (r = *p; r < *p + *(p + 1);
5414                                                      r++) {
5415                                                         regstart[r] =
5416                                                             old_regstart[r];
5417
5418                                                         /* xx why this test?  */
5419                                                         if (old_regend[r] >=
5420                                                             regstart[r])
5421                                                                 regend[r] =
5422                                                                     old_regend
5423                                                                     [r];
5424                                                 }
5425                                         }
5426                                         p1++;
5427                                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
5428                                         PUSH_FAILURE_POINT(p1 + mcnt, d, -2);
5429
5430                                         goto fail;
5431                                 }
5432                         }
5433
5434                         /* Move past the register number and the inner group
5435                            count.  */
5436                         p += 2;
5437                         break;
5438
5439                         /* \<digit> has been turned into a `duplicate' command
5440                            which is followed by the numeric value of <digit> as
5441                            the register number.  (Already passed through
5442                            external-to-internal-register mapping, so it refers
5443                            to the actual group number, not the non-shy-only
5444                            numbering used in the external world.) */
5445                 case duplicate:
5446                         {
5447                                 REGISTER re_char *d2, *dend2;
5448                                 /* Get which register to match against.  */
5449                                 int regno = *p++;
5450                                 DEBUG_PRINT2("EXECUTING duplicate %d.\n",
5451                                              regno);
5452
5453                                 /* Can't back reference a group which we've
5454                                    never matched.  */
5455                                 if (REG_UNSET(regstart[regno])
5456                                     || REG_UNSET(regend[regno]))
5457                                         goto fail;
5458
5459                                 /* Where in input to try to start matching.  */
5460                                 d2 = regstart[regno];
5461
5462                                 /* Where to stop matching; if both the place to
5463                                    start and the place to stop matching are in
5464                                    the same string, then set to the place to
5465                                    stop, otherwise, for now have to use the end
5466                                    of the first string.  */
5467
5468                                 dend2 = ((FIRST_STRING_P(regstart[regno])
5469                                           == FIRST_STRING_P(regend[regno]))
5470                                          ? regend[regno] : end_match_1);
5471                                 for (;;) {
5472                                         /* If necessary, advance to next segment
5473                                            in register contents.  */
5474                                         while (d2 == dend2) {
5475                                                 if (dend2 == end_match_2)
5476                                                         break;
5477                                                 if (dend2 == regend[regno])
5478                                                         break;
5479
5480                                                 /* End of string1 => advance to
5481                                                    string2. */
5482                                                 d2 = string2;
5483                                                 dend2 = regend[regno];
5484                                         }
5485                                         /* At end of register contents =>
5486                                            success */
5487                                         if (d2 == dend2)
5488                                                 break;
5489
5490                                         /* If necessary, advance to next segment
5491                                            in data.  */
5492                                         REGEX_PREFETCH();
5493
5494                                         /* How many characters left in this segment to match.  */
5495                                         mcnt = dend - d;
5496
5497                                         /* Want how many consecutive characters
5498                                            we can match in one shot, so, if
5499                                            necessary, adjust the count.  */
5500                                         if (mcnt > dend2 - d2)
5501                                                 mcnt = dend2 - d2;
5502
5503                                         /* Compare that many; failure if
5504                                            mismatch, else move past them.  */
5505                                         if (TRANSLATE_P(translate)
5506                                             ? bcmp_translate(
5507                                                     (const unsigned char*)d,
5508                                                     (const unsigned char*)d2,
5509                                                     mcnt, translate)
5510                                             : memcmp(d, d2, mcnt)) {
5511                                                 goto fail;
5512                                         }
5513                                         d += mcnt, d2 += mcnt;
5514
5515                                         /* Do this because we've match some
5516                                            characters.  */
5517                                         SET_REGS_MATCHED();
5518                                 }
5519                         }
5520                         break;
5521
5522                         /* begline matches the empty string at the beginning of
5523                            the string (unless `not_bol' is set in `bufp'), and,
5524                            if `newline_anchor' is set, after newlines.  */
5525                 case begline:
5526                         DEBUG_PRINT1("EXECUTING begline.\n");
5527
5528                         if (AT_STRINGS_BEG(d)) {
5529                                 if (!bufp->not_bol)
5530                                         break;
5531                         } else if (d[-1] == '\n' && bufp->newline_anchor) {
5532                                 break;
5533                         }
5534                         /* In all other cases, we fail.  */
5535                         goto fail;
5536
5537                         /* endline is the dual of begline.  */
5538                 case endline:
5539                         DEBUG_PRINT1("EXECUTING endline.\n");
5540
5541                         if (AT_STRINGS_END(d)) {
5542                                 if (!bufp->not_eol)
5543                                         break;
5544                         }
5545
5546                         /* We have to ``prefetch'' the next character.  */
5547                         else if ((d == end1 ? *string2 : *d) == '\n'
5548                                  && bufp->newline_anchor) {
5549                                 break;
5550                         }
5551                         goto fail;
5552
5553                         /* Match at the very beginning of the data.  */
5554                 case begbuf:
5555                         DEBUG_PRINT1("EXECUTING begbuf.\n");
5556                         if (AT_STRINGS_BEG(d))
5557                                 break;
5558                         goto fail;
5559
5560                         /* Match at the very end of the data.  */
5561                 case endbuf:
5562                         DEBUG_PRINT1("EXECUTING endbuf.\n");
5563                         if (AT_STRINGS_END(d))
5564                                 break;
5565                         goto fail;
5566
5567                         /* on_failure_keep_string_jump is used to optimize
5568                            `.*\n'.  It pushes NULL as the value for the string
5569                            on the stack.  Then `pop_failure_point' will keep the
5570                            current value for the string, instead of restoring
5571                            it.  To see why, consider matching `foo\nbar' against
5572                            `.*\n'.  The .* matches the foo; then the . fails
5573                            against the \n.  But the next thing we want to do is
5574                            match the \n against the \n; if we restored the
5575                            string value, we would be back at the foo.
5576
5577                            Because this is used only in specific cases, we don't
5578                            need to check all the things that `on_failure_jump'
5579                            does, to make sure the right things get saved on the
5580                            stack.  Hence we don't share its code.  The only
5581                            reason to push anything on the stack at all is that
5582                            otherwise we would have to change `anychar's code to
5583                            do something besides goto fail in this case; that
5584                            seems worse than this.  */
5585                 case on_failure_keep_string_jump:
5586                         DEBUG_PRINT1("EXECUTING on_failure_keep_string_jump");
5587
5588                         EXTRACT_NUMBER_AND_INCR(mcnt, p);
5589                         DEBUG_PRINT3(" %d (to 0x%lx):\n", mcnt,
5590                                      (long)(p + mcnt));
5591
5592                         PUSH_FAILURE_POINT(p + mcnt, (unsigned char *)0, -2);
5593                         break;
5594
5595                         /* Uses of on_failure_jump:
5596
5597                            Each alternative starts with an on_failure_jump that
5598                            points to the beginning of the next alternative.
5599                            Each alternative except the last ends with a jump
5600                            that in effect jumps past the rest of the
5601                            alternatives.  (They really jump to the ending jump
5602                            of the following alternative, because tensioning
5603                            these jumps is a hassle.)
5604
5605                            Repeats start with an on_failure_jump that points
5606                            past both the repetition text and either the
5607                            following jump or pop_failure_jump back to this
5608                            on_failure_jump.  */
5609                 case on_failure_jump:
5610                       on_failure:
5611                         DEBUG_PRINT1("EXECUTING on_failure_jump");
5612
5613                         EXTRACT_NUMBER_AND_INCR(mcnt, p);
5614                         DEBUG_PRINT3(" %d (to 0x%lx)", mcnt, (long)(p + mcnt));
5615
5616                         /* If this on_failure_jump comes right before a group
5617                            (i.e., the original * applied to a group), save the
5618                            information for that group and all inner ones, so
5619                            that if we fail back to this point, the group's
5620                            information will be correct.  For example, in
5621                            \(a*\)*\1, we need the preceding group, and in
5622                            \(\(a*\)b*\)\2, we need the inner group.  */
5623
5624                         /* We can't use `p' to check ahead because we push
5625                            a failure point to `p + mcnt' after we do this.  */
5626                         p1 = p;
5627
5628                         /* We need to skip no_op's before we look for the
5629                            start_memory in case this on_failure_jump is
5630                            happening as the result of a completed succeed_n, as
5631                            in \(a\)\{1,3\}b\1 against aba.  */
5632                         while (p1 < pend && (re_opcode_t) * p1 == no_op)
5633                                 p1++;
5634
5635                         if (p1 < pend && (re_opcode_t) * p1 == start_memory) {
5636                                 /* We have a new highest active register now.
5637                                    This will get reset at the start_memory we
5638                                    are about to get to, but we will have saved
5639                                    all the registers relevant to this repetition
5640                                    op, as described above.  */
5641                                 highest_active_reg = *(p1 + 1) + *(p1 + 2);
5642                                 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
5643                                         lowest_active_reg = *(p1 + 1);
5644                         }
5645
5646                         DEBUG_PRINT1(":\n");
5647                         PUSH_FAILURE_POINT(p + mcnt, d, -2);
5648                         break;
5649
5650                         /* A smart repeat ends with `maybe_pop_jump'.
5651                            We change it to either `pop_failure_jump' or `jump'.  */
5652                 case maybe_pop_jump:
5653                         EXTRACT_NUMBER_AND_INCR(mcnt, p);
5654                         DEBUG_PRINT2("EXECUTING maybe_pop_jump %d.\n", mcnt);
5655                         {
5656                                 REGISTER unsigned char *p2 = p;
5657
5658                                 /* Compare the beginning of the repeat with what
5659                                    in the pattern follows its end. If we can
5660                                    establish that there is nothing that they
5661                                    would both match, i.e., that we would have to
5662                                    backtrack because of (as in, e.g., `a*a')
5663                                    then we can change to pop_failure_jump,
5664                                    because we'll never have to backtrack.
5665
5666                                    This is not true in the case of alternatives:
5667                                    in `(a|ab)*' we do need to backtrack to the
5668                                    `ab' alternative (e.g., if the string was
5669                                    `ab').  But instead of trying to detect that
5670                                    here, the alternative has put on a dummy
5671                                    failure point which is what we will end up
5672                                    popping.  */
5673
5674                                 /* Skip over open/close-group commands.  If what
5675                                    follows this loop is a ...+ construct, look
5676                                    at what begins its body, since we will have
5677                                    to match at least one of that.  */
5678                                 while (1) {
5679                                         if (p2 + 2 < pend
5680                                             && ((re_opcode_t) * p2 ==
5681                                                 stop_memory
5682                                                 || (re_opcode_t) * p2 ==
5683                                                 start_memory))
5684                                                 p2 += 3;
5685                                         else if (p2 + 6 < pend
5686                                                  && (re_opcode_t) * p2 ==
5687                                                  dummy_failure_jump)
5688                                                 p2 += 6;
5689                                         else
5690                                                 break;
5691                                 }
5692
5693                                 p1 = p + mcnt;
5694                                 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
5695                                    to the `maybe_finalize_jump' of this case.  Examine what
5696                                    follows.  */
5697
5698                                 /* If we're at the end of the pattern, we can change.  */
5699                                 if (p2 == pend) {
5700                                         /* Consider what happens when matching ":\(.*\)"
5701                                            against ":/".  I don't really understand this code
5702                                            yet.  */
5703                                         p[-3] = (unsigned char)pop_failure_jump;
5704                                         DEBUG_PRINT1
5705                                             ("  End of pattern: change to `pop_failure_jump'.\n");
5706                                 }
5707
5708                                 else if ((re_opcode_t) * p2 == exactn
5709                                          || (bufp->newline_anchor
5710                                              && (re_opcode_t) * p2 ==
5711                                              endline)) {
5712                                         REGISTER unsigned char c =
5713                                             *p2 ==
5714                                             (unsigned char)endline ? '\n' :
5715                                             p2[2];
5716
5717                                         if ((re_opcode_t) p1[3] == exactn
5718                                             && p1[5] != c) {
5719                                                 p[-3] =
5720                                                     (unsigned char)
5721                                                     pop_failure_jump;
5722                                                 DEBUG_PRINT3
5723                                                     ("  %c != %c => pop_failure_jump.\n",
5724                                                      c, p1[5]);
5725                                         }
5726
5727                                         else if ((re_opcode_t) p1[3] == charset
5728                                                  || (re_opcode_t) p1[3] ==
5729                                                  charset_not) {
5730                                                 int not_p =
5731                                                     (re_opcode_t) p1[3] ==
5732                                                     charset_not;
5733
5734                                                 if (c <
5735                                                     (unsigned char)(p1[4] *
5736                                                                     BYTEWIDTH)
5737                                                     && p1[5 +
5738                                                           c /
5739                                                           BYTEWIDTH] & (1 << (c
5740                                                                               %
5741                                                                               BYTEWIDTH)))
5742                                                         not_p = !not_p;
5743
5744                                                 /* `not_p' is equal to 1 if c would match, which means
5745                                                    that we can't change to pop_failure_jump.  */
5746                                                 if (!not_p) {
5747                                                         p[-3] =
5748                                                             (unsigned char)
5749                                                             pop_failure_jump;
5750                                                         DEBUG_PRINT1
5751                                                             ("  No match => pop_failure_jump.\n");
5752                                                 }
5753                                         }
5754                                 } else if ((re_opcode_t) * p2 == charset) {
5755 #ifdef REGEX_DEBUG_FLAG
5756                                         REGISTER unsigned char c
5757                                             =
5758                                             *p2 ==
5759                                             (unsigned char)endline ? '\n' :
5760                                             p2[2];
5761 #endif
5762
5763                                         if ((re_opcode_t) p1[3] == exactn
5764                                             && !((int)p2[1] * BYTEWIDTH >
5765                                                  (int)p1[5]
5766                                                  && (p2[2 + p1[5] / BYTEWIDTH]
5767                                                      & (1 <<
5768                                                         (p1[5] %
5769                                                          BYTEWIDTH))))) {
5770                                                 p[-3] =
5771                                                     (unsigned char)
5772                                                     pop_failure_jump;
5773                                                 DEBUG_PRINT3
5774                                                     ("  %c != %c => pop_failure_jump.\n",
5775                                                      c, p1[5]);
5776                                         }
5777
5778                                         else if ((re_opcode_t) p1[3] ==
5779                                                  charset_not) {
5780                                                 int idx;
5781                                                 /* We win if the charset_not inside the loop
5782                                                    lists every character listed in the charset after.  */
5783                                                 for (idx = 0; idx < (int)p2[1];
5784                                                      idx++)
5785                                                         if (!
5786                                                             (p2[2 + idx] == 0
5787                                                              || (idx <
5788                                                                  (int)p1[4]
5789                                                                  &&
5790                                                                  ((p2[2 + idx] &
5791                                                                    ~p1[5 +
5792                                                                        idx]) ==
5793                                                                   0))))
5794                                                                 break;
5795
5796                                                 if (idx == p2[1]) {
5797                                                         p[-3] =
5798                                                             (unsigned char)
5799                                                             pop_failure_jump;
5800                                                         DEBUG_PRINT1
5801                                                             ("  No match => pop_failure_jump.\n");
5802                                                 }
5803                                         } else if ((re_opcode_t) p1[3] ==
5804                                                    charset) {
5805                                                 int idx;
5806                                                 /* We win if the charset inside the loop
5807                                                    has no overlap with the one after the loop.  */
5808                                                 for (idx = 0;
5809                                                      idx < (int)p2[1]
5810                                                      && idx < (int)p1[4]; idx++)
5811                                                         if ((p2[2 + idx] &
5812                                                              p1[5 + idx]) != 0)
5813                                                                 break;
5814
5815                                                 if (idx == p2[1]
5816                                                     || idx == p1[4]) {
5817                                                         p[-3] =
5818                                                             (unsigned char)
5819                                                             pop_failure_jump;
5820                                                         DEBUG_PRINT1
5821                                                             ("  No match => pop_failure_jump.\n");
5822                                                 }
5823                                         }
5824                                 }
5825                         }
5826                         p -= 2; /* Point at relative address again.  */
5827                         if ((re_opcode_t) p[-1] != pop_failure_jump) {
5828                                 p[-1] = (unsigned char)jump;
5829                                 DEBUG_PRINT1("  Match => jump.\n");
5830                                 goto unconditional_jump;
5831                         }
5832                         /* Note fall through.  */
5833
5834                         /* The end of a simple repeat has a pop_failure_jump
5835                            back to its matching on_failure_jump, where the
5836                            latter will push a failure point.  The
5837                            pop_failure_jump takes off failure points put on by
5838                            this pop_failure_jump's matching on_failure_jump; we
5839                            got through the pattern to here from the matching
5840                            on_failure_jump, so didn't fail.  */
5841                 case pop_failure_jump: {
5842                         /* We need to pass separate storage for the
5843                            lowest and highest registers, even though we
5844                            don't care about the actual values.
5845                            Otherwise, we will restore only one register
5846                            from the stack, since lowest will == highest
5847                            in `pop_failure_point'.  */
5848                         int dummy_low_reg, dummy_high_reg;
5849                         re_char *pdummy;
5850                         re_char *sdummy = NULL;
5851
5852                         DEBUG_PRINT1("EXECUTING pop_failure_jump.\n");
5853                         POP_FAILURE_POINT(sdummy, pdummy,
5854                                           dummy_low_reg, dummy_high_reg,
5855                                           reg_dummy, reg_dummy,
5856                                           reg_info_dummy);
5857                 }
5858                         /* Note fall through.  */
5859
5860                         /* Unconditionally jump (without popping any failure
5861                            points).  */
5862                 case jump:
5863                 unconditional_jump:
5864                         /* Get the amount to jump. */
5865                         EXTRACT_NUMBER_AND_INCR(mcnt, p);
5866                         DEBUG_PRINT2("EXECUTING jump %d ", mcnt);
5867                         p += mcnt;      /* Do the jump.  */
5868                         DEBUG_PRINT2("(to 0x%lx).\n", (long)p);
5869                         break;
5870
5871                         /* We need this opcode so we can detect where alternatives end
5872                            in `group_match_null_string_p' et al.  */
5873                 case jump_past_alt:
5874                         DEBUG_PRINT1("EXECUTING jump_past_alt.\n");
5875                         goto unconditional_jump;
5876
5877                         /* Normally, the on_failure_jump pushes a failure point, which
5878                            then gets popped at pop_failure_jump.  We will end up at
5879                            pop_failure_jump, also, and with a pattern of, say, `a+', we
5880                            are skipping over the on_failure_jump, so we have to push
5881                            something meaningless for pop_failure_jump to pop.  */
5882                 case dummy_failure_jump:
5883                         DEBUG_PRINT1("EXECUTING dummy_failure_jump.\n");
5884                         /* It doesn't matter what we push for the string here.  What
5885                            the code at `fail' tests is the value for the pattern.  */
5886                         PUSH_FAILURE_POINT(NULL, NULL, -2);
5887                         goto unconditional_jump;
5888
5889                         /* At the end of an alternative, we need to push a dummy failure
5890                            point in case we are followed by a `pop_failure_jump', because
5891                            we don't want the failure point for the alternative to be
5892                            popped.  For example, matching `(a|ab)*' against `aab'
5893                            requires that we match the `ab' alternative.  */
5894                 case push_dummy_failure:
5895                         DEBUG_PRINT1("EXECUTING push_dummy_failure.\n");
5896                         /* See comments just above at `dummy_failure_jump' about the
5897                            two zeroes.  */
5898                         PUSH_FAILURE_POINT(NULL, NULL, -2);
5899                         break;
5900
5901                         /* Have to succeed matching what follows at least n times.
5902                            After that, handle like `on_failure_jump'.  */
5903                 case succeed_n:
5904                         EXTRACT_NUMBER(mcnt, p + 2);
5905                         DEBUG_PRINT2("EXECUTING succeed_n %d.\n", mcnt);
5906
5907                         assert(mcnt >= 0);
5908                         /* Originally, this is how many times we HAVE to succeed.  */
5909                         if (mcnt > 0) {
5910                                 mcnt--;
5911                                 p += 2;
5912                                 STORE_NUMBER_AND_INCR(p, mcnt);
5913                                 DEBUG_PRINT3("  Setting 0x%lx to %d.\n",
5914                                              (long)p, mcnt);
5915                         } else if (mcnt == 0) {
5916                                 DEBUG_PRINT2
5917                                     ("  Setting two bytes from 0x%lx to no_op.\n",
5918                                      (long)(p + 2));
5919                                 p[2] = (unsigned char)no_op;
5920                                 p[3] = (unsigned char)no_op;
5921                                 goto on_failure;
5922                         }
5923                         break;
5924
5925                 case jump_n:
5926                         EXTRACT_NUMBER(mcnt, p + 2);
5927                         DEBUG_PRINT2("EXECUTING jump_n %d.\n", mcnt);
5928
5929                         /* Originally, this is how many times we CAN jump.  */
5930                         if (mcnt) {
5931                                 mcnt--;
5932                                 STORE_NUMBER(p + 2, mcnt);
5933                                 goto unconditional_jump;
5934                         }
5935                         /* If don't have to jump any more, skip over the rest of command.  */
5936                         else
5937                                 p += 4;
5938                         break;
5939
5940                 case set_number_at:
5941                         {
5942                                 DEBUG_PRINT1("EXECUTING set_number_at.\n");
5943
5944                                 EXTRACT_NUMBER_AND_INCR(mcnt, p);
5945                                 p1 = p + mcnt;
5946                                 EXTRACT_NUMBER_AND_INCR(mcnt, p);
5947                                 DEBUG_PRINT3("  Setting 0x%lx to %d.\n",
5948                                              (long)p1, mcnt);
5949                                 STORE_NUMBER(p1, mcnt);
5950                                 break;
5951                         }
5952
5953                 case wordbound:
5954                         DEBUG_PRINT1("EXECUTING wordbound.\n");
5955                         should_succeed = 1;
5956                       matchwordbound:
5957                         {
5958                                 /* XEmacs change */
5959                                 /* Straightforward and (I hope) correct implementation.
5960                                    Probably should be optimized by arranging to compute
5961                                    pos only once. */
5962                                 /* emch1 is the character before d, syn1 is the syntax of
5963                                    emch1, emch2 is the character at d, and syn2 is the
5964                                    syntax of emch2. */
5965                                 Emchar emch1, emch2;
5966                                 /* GCC isn't smart enough to see these are initialized if used. */
5967                                 int syn1 = 0, syn2 = 0;
5968                                 re_char *d_before, *d_after;
5969                                 int result,
5970                                     at_beg = AT_STRINGS_BEG(d),
5971                                     at_end = AT_STRINGS_END(d);
5972 #ifdef emacs
5973                                 int xpos;
5974 #endif
5975
5976                                 if (at_beg && at_end) {
5977                                         result = 0;
5978                                 } else {
5979                                         if (!at_beg) {
5980                                                 d_before =
5981                                                     POS_BEFORE_GAP_UNSAFE(d);
5982                                                 DEC_CHARPTR(d_before);
5983                                                 emch1 =
5984                                                     charptr_emchar(d_before);
5985 #ifdef emacs
5986                                                 xpos =
5987                                                     SYNTAX_CACHE_BYTE_TO_CHAR
5988                                                     (PTR_TO_OFFSET(d)) - 1;
5989                                                 UPDATE_SYNTAX_CACHE(xpos);
5990 #endif
5991                                                 syn1 = SYNTAX_FROM_CACHE
5992                                                     (XCHAR_TABLE
5993                                                      (regex_emacs_buffer->
5994                                                       mirror_syntax_table),
5995                                                      emch1);
5996                                         }
5997                                         if (!at_end) {
5998                                                 d_after =
5999                                                     POS_AFTER_GAP_UNSAFE(d);
6000                                                 emch2 = charptr_emchar(d_after);
6001 #ifdef emacs
6002                                                 xpos =
6003                                                     SYNTAX_CACHE_BYTE_TO_CHAR
6004                                                     (PTR_TO_OFFSET(d));
6005                                                 UPDATE_SYNTAX_CACHE_FORWARD(xpos
6006                                                                             +
6007                                                                             1);
6008 #endif
6009                                                 syn2 = SYNTAX_FROM_CACHE
6010                                                     (XCHAR_TABLE
6011                                                      (regex_emacs_buffer->
6012                                                       mirror_syntax_table),
6013                                                      emch2);
6014                                         }
6015
6016                                         if (at_beg)
6017                                                 result = (syn2 == Sword);
6018                                         else if (at_end)
6019                                                 result = (syn1 == Sword);
6020                                         else
6021                                                 result =
6022                                                     ((syn1 == Sword) !=
6023                                                      (syn2 == Sword));
6024                                 }
6025
6026                                 if (result == should_succeed)
6027                                         break;
6028                                 goto fail;
6029                         }
6030
6031                 case notwordbound:
6032                         DEBUG_PRINT1("EXECUTING notwordbound.\n");
6033                         should_succeed = 0;
6034                         goto matchwordbound;
6035
6036                 case wordbeg:
6037                         DEBUG_PRINT1("EXECUTING wordbeg.\n");
6038                         if (AT_STRINGS_END(d))
6039                                 goto fail;
6040                         {
6041                                 /* XEmacs: this originally read:
6042
6043                                    if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
6044                                    break;
6045
6046                                  */
6047                                 re_char *dtmp = POS_AFTER_GAP_UNSAFE(d);
6048                                 Emchar emch = charptr_emchar(dtmp);
6049 #ifdef emacs
6050                                 int charpos =
6051                                     SYNTAX_CACHE_BYTE_TO_CHAR(PTR_TO_OFFSET(d));
6052                                 UPDATE_SYNTAX_CACHE(charpos);
6053 #endif
6054                                 if (SYNTAX_FROM_CACHE
6055                                     (XCHAR_TABLE
6056                                      (regex_emacs_buffer->mirror_syntax_table),
6057                                      emch) != Sword)
6058                                         goto fail;
6059                                 if (AT_STRINGS_BEG(d))
6060                                         break;
6061                                 dtmp = POS_BEFORE_GAP_UNSAFE(d);
6062                                 DEC_CHARPTR(dtmp);
6063                                 emch = charptr_emchar(dtmp);
6064 #ifdef emacs
6065                                 UPDATE_SYNTAX_CACHE_BACKWARD(charpos - 1);
6066 #endif
6067                                 if (SYNTAX_FROM_CACHE
6068                                     (XCHAR_TABLE
6069                                      (regex_emacs_buffer->mirror_syntax_table),
6070                                      emch) != Sword)
6071                                         break;
6072                                 goto fail;
6073                         }
6074
6075                 case wordend:
6076                         DEBUG_PRINT1("EXECUTING wordend.\n");
6077                         if (AT_STRINGS_BEG(d))
6078                                 goto fail;
6079                         {
6080                                 /* XEmacs: this originally read:
6081
6082                                    if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
6083                                    && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
6084                                    break;
6085
6086                                    The or condition is incorrect (reversed).
6087                                  */
6088                                 re_char *dtmp;
6089                                 Emchar emch;
6090 #ifdef emacs
6091                                 int charpos =
6092                                     SYNTAX_CACHE_BYTE_TO_CHAR(PTR_TO_OFFSET(d))
6093                                     - 1;
6094                                 UPDATE_SYNTAX_CACHE(charpos);
6095 #endif
6096                                 dtmp = POS_BEFORE_GAP_UNSAFE(d);
6097                                 DEC_CHARPTR(dtmp);
6098                                 emch = charptr_emchar(dtmp);
6099                                 if (SYNTAX_FROM_CACHE
6100                                     (XCHAR_TABLE
6101                                      (regex_emacs_buffer->mirror_syntax_table),
6102                                      emch) != Sword)
6103                                         goto fail;
6104                                 if (AT_STRINGS_END(d))
6105                                         break;
6106                                 dtmp = POS_AFTER_GAP_UNSAFE(d);
6107                                 emch = charptr_emchar(dtmp);
6108 #ifdef emacs
6109                                 UPDATE_SYNTAX_CACHE_FORWARD(charpos + 1);
6110 #endif
6111                                 if (SYNTAX_FROM_CACHE
6112                                     (XCHAR_TABLE
6113                                      (regex_emacs_buffer->mirror_syntax_table),
6114                                      emch) != Sword)
6115                                         break;
6116                                 goto fail;
6117                         }
6118
6119 #ifdef emacs
6120                 case before_dot:
6121                         DEBUG_PRINT1("EXECUTING before_dot.\n");
6122                         if (!
6123                             (NILP(regex_match_object)
6124                              || BUFFERP(regex_match_object))
6125                             || (BUF_PTR_BYTE_POS(regex_emacs_buffer, d)
6126                                 >= BUF_PT(regex_emacs_buffer)))
6127                                 goto fail;
6128                         break;
6129
6130                 case at_dot:
6131                         DEBUG_PRINT1("EXECUTING at_dot.\n");
6132                         if (!
6133                             (NILP(regex_match_object)
6134                              || BUFFERP(regex_match_object))
6135                             || (BUF_PTR_BYTE_POS(regex_emacs_buffer, d)
6136                                 != BUF_PT(regex_emacs_buffer)))
6137                                 goto fail;
6138                         break;
6139
6140                 case after_dot:
6141                         DEBUG_PRINT1("EXECUTING after_dot.\n");
6142                         if (!
6143                             (NILP(regex_match_object)
6144                              || BUFFERP(regex_match_object))
6145                             || (BUF_PTR_BYTE_POS(regex_emacs_buffer, d)
6146                                 <= BUF_PT(regex_emacs_buffer)))
6147                                 goto fail;
6148                         break;
6149 #if 0                           /* not emacs19 */
6150                 case at_dot:
6151                         DEBUG_PRINT1("EXECUTING at_dot.\n");
6152                         if (BUF_PTR_BYTE_POS
6153                             (regex_emacs_buffer,
6154                              (unsigned char *)d) + 1 !=
6155                             BUF_PT(regex_emacs_buffer))
6156                                 goto fail;
6157                         break;
6158 #endif                          /* not emacs19 */
6159
6160                 case syntaxspec:
6161                         DEBUG_PRINT2("EXECUTING syntaxspec %d.\n", mcnt);
6162                         mcnt = *p++;
6163                         goto matchsyntax;
6164
6165                 case wordchar:
6166                         DEBUG_PRINT1("EXECUTING Emacs wordchar.\n");
6167                         mcnt = (int)Sword;
6168                       matchsyntax:
6169                         should_succeed = 1;
6170                       matchornotsyntax:
6171                         {
6172                                 int matches;
6173                                 Emchar emch;
6174
6175                                 REGEX_PREFETCH();
6176 #ifdef emacs
6177                                 {
6178                                         int charpos =
6179                                             SYNTAX_CACHE_BYTE_TO_CHAR
6180                                             (PTR_TO_OFFSET(d));
6181                                         UPDATE_SYNTAX_CACHE(charpos);
6182                                 }
6183 #endif
6184
6185                                 emch = charptr_emchar((const Bufbyte *)d);
6186                                 matches =
6187                                     (SYNTAX_FROM_CACHE
6188                                      (XCHAR_TABLE
6189                                       (regex_emacs_buffer->mirror_syntax_table),
6190                                       emch) == (enum syntaxcode)mcnt);
6191                                 INC_CHARPTR(d);
6192                                 if (matches != should_succeed)
6193                                         goto fail;
6194                                 SET_REGS_MATCHED();
6195                         }
6196                         break;
6197
6198                 case notsyntaxspec:
6199                         DEBUG_PRINT2("EXECUTING notsyntaxspec %d.\n", mcnt);
6200                         mcnt = *p++;
6201                         goto matchnotsyntax;
6202
6203                 case notwordchar:
6204                         DEBUG_PRINT1("EXECUTING Emacs notwordchar.\n");
6205                         mcnt = (int)Sword;
6206                       matchnotsyntax:
6207                         should_succeed = 0;
6208                         goto matchornotsyntax;
6209
6210 #ifdef MULE
6211 /* 97/2/17 jhod Mule category code patch */
6212                 case categoryspec:
6213                         should_succeed = 1;
6214                       matchornotcategory:
6215                         {
6216                                 Emchar emch;
6217
6218                                 mcnt = *p++;
6219                                 REGEX_PREFETCH();
6220                                 emch = charptr_emchar((const Bufbyte *)d);
6221                                 INC_CHARPTR(d);
6222                                 if (check_category_char
6223                                     (emch, regex_emacs_buffer->category_table,
6224                                      mcnt, should_succeed))
6225                                         goto fail;
6226                                 SET_REGS_MATCHED();
6227                         }
6228                         break;
6229
6230                 case notcategoryspec:
6231                         should_succeed = 0;
6232                         goto matchornotcategory;
6233 /* end of category patch */
6234 #endif                          /* MULE */
6235 #else                           /* not emacs */
6236                 case wordchar:
6237                         DEBUG_PRINT1("EXECUTING non-Emacs wordchar.\n");
6238                         REGEX_PREFETCH();
6239                         if (!WORDCHAR_P_UNSAFE((int)(*d)))
6240                                 goto fail;
6241                         SET_REGS_MATCHED();
6242                         d++;
6243                         break;
6244
6245                 case notwordchar:
6246                         DEBUG_PRINT1("EXECUTING non-Emacs notwordchar.\n");
6247                         REGEX_PREFETCH();
6248                         if (!WORDCHAR_P_UNSAFE((int)(*d)))
6249                                 goto fail;
6250                         SET_REGS_MATCHED();
6251                         d++;
6252                         break;
6253 #endif                          /* emacs */
6254
6255                 default:
6256                         abort();
6257                 }
6258                 /* Successfully executed one pattern command; keep going.  */
6259                 continue;
6260
6261                 /* We goto here if a matching operation fails. */
6262         fail:
6263                 if (!FAIL_STACK_EMPTY()) {
6264                         /* A restart point is known.  Restore to that state.  */
6265                         DEBUG_PRINT1("\nFAIL:\n");
6266                         POP_FAILURE_POINT(d, p /* used to be const char* */,
6267                                           lowest_active_reg, highest_active_reg,
6268                                           regstart, regend, reg_info);
6269
6270                         /* If this failure point is a dummy, try the next one.  */
6271                         if (!p)
6272                                 goto fail;
6273
6274                         /* If we failed to the end of the pattern, don't examine
6275                            *p.  */
6276                         assert(p <= pend);
6277                         if (p < pend) {
6278                                 re_bool is_a_jump_n = false;
6279
6280                                 /* If failed to a backwards jump that's part of
6281                                    a repetition loop, need to pop this failure
6282                                    point and use the next one.  */
6283                                 switch ((unsigned int)(re_opcode_t)*p) {
6284                                 case jump_n:
6285                                         is_a_jump_n = true;
6286                                 case maybe_pop_jump:
6287                                 case pop_failure_jump:
6288                                 case jump:
6289                                         p1 = p + 1;
6290                                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6291                                         p1 += mcnt;
6292
6293                                         if ((is_a_jump_n
6294                                              && (re_opcode_t) * p1 == succeed_n)
6295                                             || (!is_a_jump_n
6296                                                 && (re_opcode_t) * p1 ==
6297                                                 on_failure_jump))
6298                                                 goto fail;
6299                                         break;
6300                                 default:
6301                                         /* do nothing */ ;
6302                                 }
6303                         }
6304
6305                         if (d >= string1 && d <= end1)
6306                                 dend = end_match_1;
6307                 } else
6308                         break;  /* Matching at this starting point really fails.  */
6309         }                       /* for (;;) */
6310
6311         if (best_regs_set)
6312                 goto restore_best_regs;
6313
6314         FREE_VARIABLES();
6315
6316         return -1;              /* Failure to match.  */
6317 }                               /* re_match_2 */
6318 \f
6319 /* Subroutine definitions for re_match_2.  */
6320
6321 /* We are passed P pointing to a register number after a start_memory.
6322
6323    Return true if the pattern up to the corresponding stop_memory can
6324    match the empty string, and false otherwise.
6325
6326    If we find the matching stop_memory, sets P to point to one past its number.
6327    Otherwise, sets P to an undefined byte less than or equal to END.
6328
6329    We don't handle duplicates properly (yet).  */
6330
6331 static re_bool
6332 group_match_null_string_p(unsigned char **p, unsigned char *end,
6333                           register_info_type * register_info)
6334 {
6335         int mcnt;
6336         /* Point to after the args to the start_memory.  */
6337         unsigned char *p1 = *p + 2;
6338
6339         while (p1 < end) {
6340                 /* Skip over opcodes that can match nothing, and return true or
6341                    false, as appropriate, when we get to one that can't, or to the
6342                    matching stop_memory.  */
6343
6344                 switch ((unsigned int)(re_opcode_t)*p1) {
6345                         /* Could be either a loop or a series of alternatives.  */
6346                 case on_failure_jump:
6347                         p1++;
6348                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6349
6350                         /* If the next operation is not a jump backwards in the
6351                            pattern.  */
6352
6353                         if (mcnt >= 0) {
6354                                 /* Go through the on_failure_jumps of the alternatives,
6355                                    seeing if any of the alternatives cannot match nothing.
6356                                    The last alternative starts with only a jump,
6357                                    whereas the rest start with on_failure_jump and end
6358                                    with a jump, e.g., here is the pattern for `a|b|c':
6359
6360                                    /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
6361                                    /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
6362                                    /exactn/1/c
6363
6364                                    So, we have to first go through the first (n-1)
6365                                    alternatives and then deal with the last one separately.  */
6366
6367                                 /* Deal with the first (n-1) alternatives, which start
6368                                    with an on_failure_jump (see above) that jumps to right
6369                                    past a jump_past_alt.  */
6370
6371                                 while ((re_opcode_t) p1[mcnt - 3] ==
6372                                        jump_past_alt) {
6373                                         /* `mcnt' holds how many bytes long the alternative
6374                                            is, including the ending `jump_past_alt' and
6375                                            its number.  */
6376
6377                                         if (!alt_match_null_string_p
6378                                             (p1, p1 + mcnt - 3, register_info))
6379                                                 return false;
6380
6381                                         /* Move to right after this alternative, including the
6382                                            jump_past_alt.  */
6383                                         p1 += mcnt;
6384
6385                                         /* Break if it's the beginning of an n-th alternative
6386                                            that doesn't begin with an on_failure_jump.  */
6387                                         if ((re_opcode_t) * p1 !=
6388                                             on_failure_jump)
6389                                                 break;
6390
6391                                         /* Still have to check that it's not an n-th
6392                                            alternative that starts with an on_failure_jump.  */
6393                                         p1++;
6394                                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6395                                         if ((re_opcode_t) p1[mcnt - 3] !=
6396                                             jump_past_alt) {
6397                                                 /* Get to the beginning of the n-th alternative.  */
6398                                                 p1 -= 3;
6399                                                 break;
6400                                         }
6401                                 }
6402
6403                                 /* Deal with the last alternative: go back and get number
6404                                    of the `jump_past_alt' just before it.  `mcnt' contains
6405                                    the length of the alternative.  */
6406                                 EXTRACT_NUMBER(mcnt, p1 - 2);
6407
6408                                 if (!alt_match_null_string_p
6409                                     (p1, p1 + mcnt, register_info))
6410                                         return false;
6411
6412                                 p1 += mcnt;     /* Get past the n-th alternative.  */
6413                         }       /* if mcnt > 0 */
6414                         break;
6415
6416                 case stop_memory:
6417                         assert(p1[1] == **p);
6418                         *p = p1 + 2;
6419                         return true;
6420
6421                 default:
6422                         if (!common_op_match_null_string_p(&p1, end, register_info))
6423                                 return false;
6424                 }
6425         }                       /* while p1 < end */
6426
6427         return false;
6428 }                               /* group_match_null_string_p */
6429
6430 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
6431    It expects P to be the first byte of a single alternative and END one
6432    byte past the last. The alternative can contain groups.  */
6433
6434 static re_bool
6435 alt_match_null_string_p(unsigned char *p, unsigned char *end,
6436                         register_info_type * register_info)
6437 {
6438         int mcnt;
6439         unsigned char *p1 = p;
6440
6441         while (p1 < end) {
6442                 /* Skip over opcodes that can match nothing, and break when we get
6443                    to one that can't.  */
6444
6445                 switch ((unsigned int)(re_opcode_t)*p1) {
6446                         /* It's a loop.  */
6447                 case on_failure_jump:
6448                         p1++;
6449                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6450                         p1 += mcnt;
6451                         break;
6452
6453                 default:
6454                         if (!common_op_match_null_string_p(&p1, end, register_info))
6455                                 return false;
6456                 }
6457         }                       /* while p1 < end */
6458
6459         return true;
6460 }                               /* alt_match_null_string_p */
6461
6462 /* Deals with the ops common to group_match_null_string_p and
6463    alt_match_null_string_p.
6464
6465    Sets P to one after the op and its arguments, if any.  */
6466
6467 static re_bool
6468 common_op_match_null_string_p(unsigned char **p, unsigned char *end,
6469                               register_info_type * register_info)
6470 {
6471         int mcnt;
6472         re_bool ret;
6473         int reg_no;
6474         unsigned char *p1 = *p;
6475
6476         switch ((unsigned int)(re_opcode_t)*p1++) {
6477         case no_op:
6478         case begline:
6479         case endline:
6480         case begbuf:
6481         case endbuf:
6482         case wordbeg:
6483         case wordend:
6484         case wordbound:
6485         case notwordbound:
6486 #ifdef emacs
6487         case before_dot:
6488         case at_dot:
6489         case after_dot:
6490 #endif
6491                 break;
6492
6493         case start_memory:
6494                 reg_no = *p1;
6495                 assert(reg_no > 0 && reg_no <= MAX_REGNUM);
6496                 ret = group_match_null_string_p(&p1, end, register_info);
6497
6498                 /* Have to set this here in case we're checking a group which
6499                    contains a group and a back reference to it.  */
6500
6501                 if (REG_MATCH_NULL_STRING_P(register_info[reg_no]) ==
6502                     MATCH_NULL_UNSET_VALUE)
6503                         REG_MATCH_NULL_STRING_P(register_info[reg_no]) = ret;
6504
6505                 if (!ret)
6506                         return false;
6507                 break;
6508
6509                 /* If this is an optimized succeed_n for zero times, make the jump.  */
6510         case jump:
6511                 EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6512                 if (mcnt >= 0)
6513                         p1 += mcnt;
6514                 else
6515                         return false;
6516                 break;
6517
6518         case succeed_n:
6519                 /* Get to the number of times to succeed.  */
6520                 p1 += 2;
6521                 EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6522
6523                 if (mcnt == 0) {
6524                         p1 -= 4;
6525                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6526                         p1 += mcnt;
6527                 } else
6528                         return false;
6529                 break;
6530
6531         case duplicate:
6532                 if (!REG_MATCH_NULL_STRING_P(register_info[*p1]))
6533                         return false;
6534                 break;
6535
6536         case set_number_at:
6537                 p1 += 4;
6538
6539         default:
6540                 /* All other opcodes mean we cannot match the empty string.  */
6541                 return false;
6542         }
6543
6544         *p = p1;
6545         return true;
6546 }                               /* common_op_match_null_string_p */
6547
6548 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
6549    bytes; nonzero otherwise.  */
6550
6551 static int
6552 bcmp_translate(re_char *s1, re_char *s2,
6553                REGISTER int len, RE_TRANSLATE_TYPE translate)
6554 {
6555         REGISTER const unsigned char *p1 = s1, *p2 = s2;
6556 #ifdef MULE
6557         const unsigned char *p1_end = s1 + len;
6558         const unsigned char *p2_end = s2 + len;
6559
6560         while (p1 != p1_end && p2 != p2_end) {
6561                 Emchar p1_ch, p2_ch;
6562
6563                 p1_ch = charptr_emchar(p1);
6564                 p2_ch = charptr_emchar(p2);
6565
6566                 if (RE_TRANSLATE(p1_ch)
6567                     != RE_TRANSLATE(p2_ch))
6568                         return 1;
6569                 INC_CHARPTR(p1);
6570                 INC_CHARPTR(p2);
6571         }
6572 #else                           /* not MULE */
6573         while (len) {
6574                 if (RE_TRANSLATE(*p1++) != RE_TRANSLATE(*p2++))
6575                         return 1;
6576                 len--;
6577         }
6578 #endif                          /* MULE */
6579         return 0;
6580 }
6581 \f
6582 /* Entry points for GNU code.  */
6583
6584 /* re_compile_pattern is the GNU regular expression compiler: it
6585    compiles PATTERN (of length SIZE) and puts the result in BUFP.
6586    Returns 0 if the pattern was valid, otherwise an error string.
6587
6588    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
6589    are set in BUFP on entry.
6590
6591    We call regex_compile to do the actual compilation.  */
6592
6593 const char *re_compile_pattern(const char *pattern, int length,
6594                                struct re_pattern_buffer *bufp)
6595 {
6596         reg_errcode_t ret;
6597
6598         /* GNU code is written to assume at least RE_NREGS registers will be set
6599            (and at least one extra will be -1).  */
6600         bufp->regs_allocated = REGS_UNALLOCATED;
6601
6602         /* And GNU code determines whether or not to get register information
6603            by passing null for the REGS argument to re_match, etc., not by
6604            setting no_sub.  */
6605         bufp->no_sub = 0;
6606
6607         /* Match anchors at newline.  */
6608         bufp->newline_anchor = 1;
6609
6610         ret = regex_compile((const unsigned char*)pattern,
6611                             length, re_syntax_options, bufp);
6612
6613         if (!ret)
6614                 return NULL;
6615         return gettext(re_error_msgid[(int)ret]);
6616 }
6617 \f
6618 /* Entry points compatible with 4.2 BSD regex library.  We don't define
6619    them unless specifically requested.  */
6620
6621 #ifdef _REGEX_RE_COMP
6622
6623 /* BSD has one and only one pattern buffer.  */
6624 static struct re_pattern_buffer re_comp_buf;
6625
6626 char *re_comp(const char *s)
6627 {
6628         reg_errcode_t ret;
6629
6630         if (!s) {
6631                 if (!re_comp_buf.buffer)
6632                         return gettext("No previous regular expression");
6633                 return 0;
6634         }
6635
6636         if (!re_comp_buf.buffer) {
6637                 re_comp_buf.buffer = (unsigned char *)xmalloc_atomic(200);
6638                 if (re_comp_buf.buffer == NULL)
6639                         return gettext(re_error_msgid[(int)REG_ESPACE]);
6640                 re_comp_buf.allocated = 200;
6641
6642                 re_comp_buf.fastmap = (char *)xmalloc_atomic(1 << BYTEWIDTH);
6643                 if (re_comp_buf.fastmap == NULL)
6644                         return gettext(re_error_msgid[(int)REG_ESPACE]);
6645         }
6646
6647         /* Since `re_exec' always passes NULL for the `regs' argument, we
6648            don't need to initialize the pattern buffer fields which affect it.  */
6649
6650         /* Match anchors at newlines.  */
6651         re_comp_buf.newline_anchor = 1;
6652
6653         ret =
6654             regex_compile((unsigned char *)s, strlen(s), re_syntax_options,
6655                           &re_comp_buf);
6656
6657         if (!ret)
6658                 return NULL;
6659
6660         /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
6661         return (char *)gettext(re_error_msgid[(int)ret]);
6662 }
6663
6664 int re_exec(const char *s)
6665 {
6666         const int len = strlen(s);
6667         return
6668             0 <= re_search(&re_comp_buf, s, len, 0, len,
6669                            (struct re_registers *)0);
6670 }
6671 #endif                          /* _REGEX_RE_COMP */
6672 \f
6673 /* POSIX.2 functions.  Don't define these for Emacs.  */
6674
6675 #ifndef emacs
6676
6677 /* regcomp takes a regular expression as a string and compiles it.
6678
6679    PREG is a regex_t *.  We do not expect any fields to be initialized,
6680    since POSIX says we shouldn't.  Thus, we set
6681
6682      `buffer' to the compiled pattern;
6683      `used' to the length of the compiled pattern;
6684      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
6685        REG_EXTENDED bit in CFLAGS is set; otherwise, to
6686        RE_SYNTAX_POSIX_BASIC;
6687      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
6688      `fastmap' and `fastmap_accurate' to zero;
6689      `re_nsub' to the number of subexpressions in PATTERN.
6690      (non-shy of course.  POSIX probably doesn't know about
6691      shy ones, and in any case they should be invisible.)
6692
6693    PATTERN is the address of the pattern string.
6694
6695    CFLAGS is a series of bits which affect compilation.
6696
6697      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
6698      use POSIX basic syntax.
6699
6700      If REG_NEWLINE is set, then . and [^...] don't match newline.
6701      Also, regexec will try a match beginning after every newline.
6702
6703      If REG_ICASE is set, then we considers upper- and lowercase
6704      versions of letters to be equivalent when matching.
6705
6706      If REG_NOSUB is set, then when PREG is passed to regexec, that
6707      routine will report only success or failure, and nothing about the
6708      registers.
6709
6710    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
6711    the return codes and their meanings.)  */
6712
6713 int regcomp(regex_t * preg, const char *pattern, int cflags)
6714 {
6715         reg_errcode_t ret;
6716         unsigned syntax
6717             = (cflags & REG_EXTENDED) ?
6718             RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
6719
6720         /* regex_compile will allocate the space for the compiled pattern.  */
6721         preg->buffer = 0;
6722         preg->allocated = 0;
6723         preg->used = 0;
6724
6725         /* Don't bother to use a fastmap when searching.  This simplifies the
6726            REG_NEWLINE case: if we used a fastmap, we'd have to put all the
6727            characters after newlines into the fastmap.  This way, we just try
6728            every character.  */
6729         preg->fastmap = 0;
6730
6731         if (cflags & REG_ICASE) {
6732                 int i;
6733
6734                 preg->translate = (char *)xmalloc_atomic(CHAR_SET_SIZE);
6735                 if (preg->translate == NULL)
6736                         return (int)REG_ESPACE;
6737
6738                 /* Map uppercase characters to corresponding lowercase ones.  */
6739                 for (i = 0; i < CHAR_SET_SIZE; i++)
6740                         preg->translate[i] = ISUPPER(i) ? tolower(i) : i;
6741         } else
6742                 preg->translate = NULL;
6743
6744         /* If REG_NEWLINE is set, newlines are treated differently.  */
6745         if (cflags & REG_NEWLINE) {
6746                 /* REG_NEWLINE implies neither . nor [^...] match newline.  */
6747                 syntax &= ~RE_DOT_NEWLINE;
6748                 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
6749                 /* It also changes the matching behavior.  */
6750                 preg->newline_anchor = 1;
6751         } else {
6752                 preg->newline_anchor = 0;
6753         }
6754         preg->no_sub = !!(cflags & REG_NOSUB);
6755
6756         /* POSIX says a null character in the pattern terminates it, so we
6757            can use strlen here in compiling the pattern.  */
6758         ret = regex_compile((const unsigned char*)pattern,
6759                             strlen(pattern), syntax, preg);
6760
6761         /* POSIX doesn't distinguish between an unmatched open-group and an
6762            unmatched close-group: both are REG_EPAREN.  */
6763         if (ret == REG_ERPAREN)
6764                 ret = REG_EPAREN;
6765
6766         return (int)ret;
6767 }
6768
6769 /* regexec searches for a given pattern, specified by PREG, in the
6770    string STRING.
6771
6772    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
6773    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
6774    least NMATCH elements, and we set them to the offsets of the
6775    corresponding matched substrings.
6776
6777    EFLAGS specifies `execution flags' which affect matching: if
6778    REG_NOTBOL is set, then ^ does not match at the beginning of the
6779    string; if REG_NOTEOL is set, then $ does not match at the end.
6780
6781    We return 0 if we find a match and REG_NOMATCH if not.  */
6782
6783 int
6784 regexec(const regex_t * preg, const char *string, Element_count nmatch,
6785         regmatch_t pmatch[], int eflags)
6786 {
6787         int ret;
6788         struct re_registers regs;
6789         regex_t private_preg;
6790         int len = strlen(string);
6791         re_bool want_reg_info = !preg->no_sub && nmatch > 0;
6792
6793         private_preg = *preg;
6794
6795         private_preg.not_bol = !!(eflags & REG_NOTBOL);
6796         private_preg.not_eol = !!(eflags & REG_NOTEOL);
6797
6798         /* The user has told us exactly how many registers to return
6799            information about, via `nmatch'.  We have to pass that on to the
6800            matching routines.  */
6801         private_preg.regs_allocated = REGS_FIXED;
6802
6803         if (want_reg_info) {
6804                 regs.num_regs = nmatch;
6805                 regs.start = TALLOC(nmatch, regoff_t);
6806                 regs.end = TALLOC(nmatch, regoff_t);
6807                 if (regs.start == NULL || regs.end == NULL)
6808                         return (int)REG_NOMATCH;
6809         }
6810
6811         /* Perform the searching operation.  */
6812         ret = re_search(&private_preg, string, len,
6813                         /* start: */ 0, /* range: */ len,
6814                         want_reg_info ? &regs : (struct re_registers *)0);
6815
6816         /* Copy the register information to the POSIX structure.  */
6817         if (want_reg_info) {
6818                 if (ret >= 0) {
6819                         Element_count r;
6820
6821                         for (r = 0; r < nmatch; r++) {
6822                                 pmatch[r].rm_so = regs.start[r];
6823                                 pmatch[r].rm_eo = regs.end[r];
6824                         }
6825                 }
6826
6827                 /* If we needed the temporary register info, free the space now.  */
6828                 xfree(regs.start);
6829                 xfree(regs.end);
6830         }
6831
6832         /* We want zero return to mean success, unlike `re_search'.  */
6833         return ret >= 0 ? (int)REG_NOERROR : (int)REG_NOMATCH;
6834 }
6835
6836 /* Returns a message corresponding to an error code, ERRCODE, returned
6837    from either regcomp or regexec.   We don't use PREG here.  */
6838
6839 Memory_count
6840 regerror(int errcode, const regex_t * preg, char *errbuf,
6841          Memory_count errbuf_size)
6842 {
6843         const char *msg;
6844         Memory_count msg_size;
6845
6846         if (errcode < 0 || (size_t) errcode >= (sizeof(re_error_msgid)
6847                                                 / sizeof(re_error_msgid[0])))
6848                 /* Only error codes returned by the rest of the code should be passed
6849                    to this routine.  If we are given anything else, or if other regex
6850                    code generates an invalid error code, then the program has a bug.
6851                    Dump core so we can fix it.  */
6852                 abort();
6853
6854         msg = gettext(re_error_msgid[errcode]);
6855
6856         msg_size = strlen(msg) + 1;     /* Includes the null.  */
6857
6858         if (errbuf_size != 0) {
6859                 strncpy(errbuf, msg, errbuf_size - 1);
6860                 errbuf[errbuf_size-1]='\0';
6861         }
6862
6863         return msg_size;
6864 }
6865
6866 /* Free dynamically allocated space used by PREG.  */
6867
6868 void regfree(regex_t * preg)
6869 {
6870         if (preg->buffer != NULL) {
6871                 xfree(preg->buffer);
6872         }
6873         preg->buffer = NULL;
6874
6875         preg->allocated = 0;
6876         preg->used = 0;
6877
6878         if (preg->fastmap != NULL) {
6879                 xfree(preg->fastmap);
6880         }
6881         preg->fastmap = NULL;
6882         preg->fastmap_accurate = 0;
6883
6884         if (preg->translate != NULL) {
6885                 xfree(preg->translate);
6886         }
6887         preg->translate = NULL;
6888 }
6889
6890 #endif                          /* not emacs  */
6891 \f
6892 /*
6893 Local variables:
6894 make-backup-files: t
6895 version-control: t
6896 trim-versions-without-asking: nil
6897 End:
6898 */