Fix metadata usage
[sxemacs] / src / regex.c
1 /* Extended regular expression matching and search library,
2    version 0.12, extended for XEmacs.
3    (Implements POSIX draft P10003.2/D11.2, except for
4    internationalization features.)
5
6    Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
7    Copyright (C) 1995 Sun Microsystems, Inc.
8    Copyright (C) 1995 Ben Wing.
9
10 This file is part of SXEmacs
11
12 SXEmacs is free software: you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation, either version 3 of the License, or
15 (at your option) any later version.
16
17 SXEmacs is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program.  If not, see <http://www.gnu.org/licenses/>. */
24
25
26 /* Synched up with: FSF 19.29. */
27
28 /* Changes made for XEmacs:
29
30    (1) the REGEX_BEGLINE_CHECK code from the XEmacs v18 regex routines
31        was added.  This causes a huge speedup in font-locking.
32    (2) Rel-alloc is disabled when the MMAP version of rel-alloc is
33        being used, because it's too slow -- all those calls to mmap()
34        add humongous overhead.
35    (3) Lots and lots of changes for Mule.  They are bracketed by
36        `#ifdef MULE' or with comments that have `XEmacs' in them.
37  */
38
39 #ifdef HAVE_CONFIG_H
40 #include <config.h>
41 #endif
42
43 #ifndef REGISTER                /* Rigidly enforced as of 20.3 */
44 #define REGISTER
45 #endif
46
47 #ifndef _GNU_SOURCE
48 #define _GNU_SOURCE 1
49 #endif
50
51 #ifdef emacs
52 /* Converts the pointer to the char to BEG-based offset from the start.  */
53 #define PTR_TO_OFFSET(d) (MATCHING_IN_FIRST_STRING                      \
54                           ? (d) - string1 : (d) - (string2 - size1))
55 #else
56 #define PTR_TO_OFFSET(d) 0
57 #endif
58
59 /* We assume non-Mule if emacs isn't defined. */
60 #ifndef emacs
61 #undef MULE
62 #endif
63
64 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
65 #include <sys/types.h>
66
67 /* This is for other GNU distributions with internationalized messages.  */
68 #if defined (I18N3) && (defined (HAVE_LIBINTL_H) || defined (_LIBC))
69 # include <libintl.h>
70 #else
71 # define gettext(msgid) (msgid)
72 #endif
73
74 /* XEmacs: define this to add in a speedup for patterns anchored at
75    the beginning of a line.  Keep the ifdefs so that it's easier to
76    tell where/why this code has diverged from v19. */
77 #define REGEX_BEGLINE_CHECK
78
79 /* XEmacs: the current mmap-based ralloc handles small blocks very
80    poorly, so we disable it here. */
81
82 #if (defined (REL_ALLOC) && defined (HAVE_MMAP)) || defined(DOUG_LEA_MALLOC)
83 # undef REL_ALLOC
84 #endif
85
86 #include "sxe-utils.h"
87
88 /* The `emacs' switch turns on certain matching commands
89    that make sense only in Emacs. */
90 #ifdef emacs
91
92 #include "lisp.h"
93 #include "buffer.h"
94 #include "syntax.h"
95
96 #ifdef MULE
97
98 Lisp_Object Vthe_lisp_rangetab;
99
100 void complex_vars_of_regex(void)
101 {
102         Vthe_lisp_rangetab = Fmake_range_table();
103         staticpro(&Vthe_lisp_rangetab);
104 }
105
106 #else  /* not MULE */
107
108 void complex_vars_of_regex(void)
109 {
110 }
111
112 #endif  /* MULE */
113
114 #define RE_TRANSLATE(ch) TRT_TABLE_OF (translate, (Emchar) ch)
115 #define TRANSLATE_P(tr) (!NILP (tr) && CHAR_TABLEP(tr))
116
117 /* just massage that xfree thing */
118 #undef xfree
119 #if defined HAVE_BDWGC && defined EF_USE_BDWGC
120 # define xfree          zfree
121 #else  /* !BDWGC */
122 # define xfree          yfree
123 #endif  /* BDWGC */
124
125 #else  /* !emacs */
126
127 /* If we are not linking with Emacs proper,
128    we can't use the relocating allocator
129    even if config.h says that we can.  */
130 #undef REL_ALLOC
131
132 #if defined (STDC_HEADERS) || defined (_LIBC)
133 # include <stdlib.h>
134 # include <stdbool.h>
135 #else
136 char *malloc();
137 char *realloc();
138 #endif
139
140 /* Types normally included via lisp.h */
141 #include <stddef.h>             /* for ptrdiff_t */
142
143 #ifdef REGEX_MALLOC
144 #ifndef DECLARE_NOTHING
145 #define DECLARE_NOTHING struct nosuchstruct
146 #endif
147 #endif
148
149 typedef int Emchar;
150
151 #define charptr_emchar(str)             ((Emchar) (str)[0])
152
153 #define INC_CHARPTR(p) ((p)++)
154 #define DEC_CHARPTR(p) ((p)--)
155
156 #include <string.h>
157
158 /* Define the syntax stuff for \<, \>, etc.  */
159
160 /* This must be nonzero for the wordchar and notwordchar pattern
161    commands in re_match_2.  */
162 #ifndef Sword
163 #define Sword 1
164 #endif
165
166 #ifdef SYNTAX_TABLE
167
168 extern char *re_syntax_table;
169
170 #else                           /* not SYNTAX_TABLE */
171
172 /* How many characters in the character set.  */
173 #define CHAR_SET_SIZE 256
174
175 static char re_syntax_table[CHAR_SET_SIZE];
176
177 static void init_syntax_once(void)
178 {
179         static int done = 0;
180
181         if (!done) {
182                 const char *word_syntax_chars =
183                     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_";
184
185                 memset(re_syntax_table, 0, sizeof(re_syntax_table));
186
187                 while (*word_syntax_chars)
188                         re_syntax_table[(unsigned int)(*word_syntax_chars++)] =
189                             Sword;
190
191                 done = 1;
192         }
193 }
194
195 #endif                          /* SYNTAX_TABLE */
196
197 #define SYNTAX_UNSAFE(ignored, c) re_syntax_table[c]
198 #undef SYNTAX_FROM_CACHE
199 #define SYNTAX_FROM_CACHE SYNTAX_UNSAFE
200
201 #define RE_TRANSLATE(c) translate[(unsigned char) (c)]
202 #define TRANSLATE_P(tr) tr
203
204 #if defined HAVE_BDWGC && defined EF_USE_BDWGC
205 # if defined HAVE_GC_GC_H
206 #  include "gc/gc.h"
207 # elif defined HAVE_GC_H
208 #  include "gc.h"
209 # endif
210 # define xmalloc        GC_MALLOC
211 # define xmalloc_atomic GC_MALLOC_ATOMIC
212 # define xrealloc       GC_REALLOC
213 # define xfree          GC_FREE
214 # define xstrdup        GC_STRDUP
215 #else  /* !HAVE_BDWGC */
216 # define xmalloc        malloc
217 # define xmalloc_atomic malloc
218 # define xrealloc       realloc
219 # define xfree          free
220 # define xstrdup        strdup
221 #endif  /* HAVE_BDWGC */
222
223 #endif  /* emacs */
224
225
226 /* Under XEmacs, this is needed because we don't define it elsewhere. */
227 #ifdef SWITCH_ENUM_BUG
228 #define SWITCH_ENUM_CAST(x) ((int)(x))
229 #else
230 #define SWITCH_ENUM_CAST(x) (x)
231 #endif
232 \f
233 /* Get the interface, including the syntax bits.  */
234 #include "regex.h"
235
236 /* isalpha etc. are used for the character classes.  */
237 #include <ctype.h>
238
239 /* Jim Meyering writes:
240
241    "... Some ctype macros are valid only for character codes that
242    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
243    using /bin/cc or gcc but without giving an ansi option).  So, all
244    ctype uses should be through macros like ISPRINT...  If
245    STDC_HEADERS is defined, then autoconf has verified that the ctype
246    macros don't need to be guarded with references to isascii. ...
247    Defining isascii to 1 should let any compiler worth its salt
248    eliminate the && through constant folding."  */
249
250 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
251 #define ISASCII_1(c) 1
252 #else
253 #define ISASCII_1(c) isascii(c)
254 #endif
255
256 #ifdef MULE
257 /* The IS*() macros can be passed any character, including an extended
258    one.  We need to make sure there are no crashes, which would occur
259    otherwise due to out-of-bounds array references. */
260 #define ISASCII(c) (((EMACS_UINT) (c)) < 0x100 && ISASCII_1 (c))
261 #else
262 #define ISASCII(c) ISASCII_1 (c)
263 #endif                          /* MULE */
264
265 #ifdef isblank
266 #define ISBLANK(c) (ISASCII (c) && isblank (c))
267 #else
268 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
269 #endif
270 #ifdef isgraph
271 #define ISGRAPH(c) (ISASCII (c) && isgraph (c))
272 #else
273 #define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
274 #endif
275
276 /* stolen from FSF/src/charset.h */
277 #define SINGLE_BYTE_CHAR_P(c)   (((unsigned int)(c) & 0xFF) == (c))
278 /* 1 if C is an ASCII character.  */
279 #define IS_REAL_ASCII(c) ((c) < 128)
280 /* 1 if C is a unibyte character.  */
281 #define ISUNIBYTE(c)    (SINGLE_BYTE_CHAR_P(c))
282
283 #define ISPRINT(c) (ISASCII (c) && isprint (c))
284 #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
285 #define ISALNUM(c) (ISASCII (c) && isalnum (c))
286 #define ISALPHA(c) (ISASCII (c) && isalpha (c))
287 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
288 #define ISLOWER(c) (ISASCII (c) && islower (c))
289 #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
290 #define ISSPACE(c) (ISASCII (c) && isspace (c))
291 #define ISUPPER(c) (ISASCII (c) && isupper (c))
292 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
293
294 #if defined emacs
295 static inline bool
296 ISWORD(int c)
297         __attribute__((always_inline));
298 static inline bool
299 ISWORD(int c)
300 {
301         Lisp_Object ct = regex_emacs_buffer->mirror_syntax_table;
302
303         return (SYNTAX(XCHAR_TABLE(ct), c) == Sword);
304 }
305 #else
306 # define ISWORD(c)      (ISALPHA(c))
307 #endif  /* emacs */
308
309 #ifndef NULL
310 #define NULL (void *)0
311 #endif
312
313 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
314    since ours (we hope) works properly with all combinations of
315    machines, compilers, `char' and `unsigned char' argument types.
316    (Per Bothner suggested the basic approach.)  */
317 #undef SIGN_EXTEND_CHAR
318 #if __STDC__
319 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
320 #else                           /* not __STDC__ */
321 /* As in Harbison and Steele.  */
322 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
323 #endif
324 \f
325 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
326    use `alloca' instead of `malloc'.  This is because using malloc in
327    re_search* or re_match* could cause memory leaks when C-g is used in
328    Emacs; also, malloc is slower and causes storage fragmentation.  On
329    the other hand, malloc is more portable, and easier to debug.
330
331    Because we sometimes use alloca, some routines have to be macros,
332    not functions -- `alloca'-allocated space disappears at the end of the
333    function it is called in.  */
334
335 #ifdef REGEX_MALLOC
336
337 #define REGEX_ALLOCATE  xmalloc
338 #define REGEX_REALLOCATE(source, osize, nsize) xrealloc(source, nsize)
339 #define REGEX_FREE      xfree
340
341 #else  /* !REGEX_MALLOC */
342
343 /* Emacs already defines alloca, sometimes.  */
344 #ifndef alloca
345
346 /* Make alloca work the best possible way.  */
347 #ifdef __GNUC__
348 #define alloca __builtin_alloca
349 #else                           /* not __GNUC__ */
350 #if HAVE_ALLOCA_H
351 #include <alloca.h>
352 #else                           /* not __GNUC__ or HAVE_ALLOCA_H */
353 #ifndef _AIX                    /* Already did AIX, up at the top.  */
354 void *alloca();
355 #endif                          /* not _AIX */
356 #endif                          /* HAVE_ALLOCA_H */
357 #endif                          /* __GNUC__ */
358
359 #endif                          /* not alloca */
360
361 #define REGEX_ALLOCATE alloca
362
363 /* Assumes a `char *destination' variable.  */
364 #define REGEX_REALLOCATE(source, osize, nsize)                          \
365   (destination = (char *) alloca (nsize),                               \
366    memmove (destination, source, osize),                                \
367    destination)
368
369 /* No need to do anything to free, after alloca.  */
370 #define REGEX_FREE(arg) ((void)0)       /* Do nothing!  But inhibit gcc warning.  */
371
372 #endif  /* REGEX_MALLOC */
373
374 /* Define how to allocate the failure stack.  */
375
376 #ifdef REL_ALLOC
377 #error not again
378 #define REGEX_ALLOCATE_STACK(size)                              \
379         r_alloc ((char **) &failure_stack_ptr, (size))
380 #define REGEX_REALLOCATE_STACK(source, osize, nsize)            \
381         r_re_alloc ((char **) &failure_stack_ptr, (nsize))
382 #define REGEX_FREE_STACK(ptr)                                   \
383         r_alloc_free ((void **) &failure_stack_ptr)
384
385 #else  /* !REL_ALLOC */
386
387 #ifdef REGEX_MALLOC
388
389 #define REGEX_ALLOCATE_STACK    xmalloc
390 #define REGEX_REALLOCATE_STACK(source, osize, nsize) xrealloc(source, nsize)
391 #define REGEX_FREE_STACK        xfree
392
393 #else  /* !REGEX_MALLOC */
394
395 #define REGEX_ALLOCATE_STACK alloca
396
397 #define REGEX_REALLOCATE_STACK(source, osize, nsize)                    \
398    REGEX_REALLOCATE (source, osize, nsize)
399 /* No need to explicitly free anything.  */
400 #define REGEX_FREE_STACK(arg)
401
402 #endif  /* REGEX_MALLOC */
403 #endif  /* REL_ALLOC */
404
405 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
406    `string1' or just past its end.  This works if PTR is NULL, which is
407    a good thing.  */
408 #define FIRST_STRING_P(ptr)                                     \
409   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
410
411 /* (Re)Allocate N items of type T using malloc, or fail.  */
412 #define TALLOC(n, t)                            \
413         ((t *)xmalloc((n) * sizeof (t)))
414 #define TALLOC_ATOMIC(n, t)                     \
415         ((t *)xmalloc_atomic((n) * sizeof (t)))
416 #define RETALLOC(addr, n, t)                                    \
417         ((addr) = (t *)xrealloc(addr, (n) * sizeof (t)))
418 #define RETALLOC_IF(addr, n, t)                                 \
419         if (addr) {                                             \
420                 RETALLOC((addr), (n), t);                       \
421         } else                                                  \
422                 (addr) = TALLOC ((n), t)
423 #define REGEX_TALLOC(n, t)                              \
424         ((t *)REGEX_ALLOCATE((n) * sizeof (t)))
425
426 #define BYTEWIDTH 8             /* In bits.  */
427
428 #define STREQ(s1, s2) (strcmp (s1, s2) == 0)
429
430 #undef MAX
431 #undef MIN
432 #define MAX(a, b) ((a) > (b) ? (a) : (b))
433 #define MIN(a, b) ((a) < (b) ? (a) : (b))
434
435 /* Type of source-pattern and string chars.  */
436 typedef const unsigned char re_char;
437
438 typedef char re_bool;
439 #define false 0
440 #define true 1
441 \f
442 /* These are the command codes that appear in compiled regular
443    expressions.  Some opcodes are followed by argument bytes.  A
444    command code can specify any interpretation whatsoever for its
445    arguments.  Zero bytes may appear in the compiled regular expression.  */
446
447 typedef enum {
448         no_op = 0,
449
450         /* Succeed right away--no more backtracking.  */
451         succeed,
452
453         /* Followed by one byte giving n, then by n literal bytes.  */
454         exactn,
455
456         /* Matches any (more or less) character.  */
457         anychar,
458
459         /* Matches any one char belonging to specified set.  First
460            following byte is number of bitmap bytes.  Then come bytes
461            for a bitmap saying which chars are in.  Bits in each byte
462            are ordered low-bit-first.  A character is in the set if its
463            bit is 1.  A character too large to have a bit in the map is
464            automatically not in the set.  */
465         charset,
466
467         /* Same parameters as charset, but match any character that is
468            not one of those specified.  */
469         charset_not,
470
471         /* Start remembering the text that is matched, for storing in a
472            register.  Followed by one byte with the register number, in
473            the range 1 to the pattern buffer's re_ngroups
474            field.  Then followed by one byte with the number of groups
475            inner to this one.  (This last has to be part of the
476            start_memory only because we need it in the on_failure_jump
477            of re_match_2.)  */
478         start_memory,
479
480         /* Stop remembering the text that is matched and store it in a
481            memory register.  Followed by one byte with the register
482            number, in the range 1 to `re_ngroups' in the
483            pattern buffer, and one byte with the number of inner groups,
484            just like `start_memory'.  (We need the number of inner
485            groups here because we don't have any easy way of finding the
486            corresponding start_memory when we're at a stop_memory.)  */
487         stop_memory,
488
489         /* Match a duplicate of something remembered. Followed by one
490            byte containing the register number.  */
491         duplicate,
492
493         /* Fail unless at beginning of line.  */
494         begline,
495
496         /* Fail unless at end of line.  */
497         endline,
498
499         /* Succeeds if at beginning of buffer (if emacs) or at beginning
500            of string to be matched (if not).  */
501         begbuf,
502
503         /* Analogously, for end of buffer/string.  */
504         endbuf,
505
506         /* Followed by two byte relative address to which to jump.  */
507         jump,
508
509         /* Same as jump, but marks the end of an alternative.  */
510         jump_past_alt,
511
512         /* Followed by two-byte relative address of place to resume at
513            in case of failure.  */
514         on_failure_jump,
515
516         /* Like on_failure_jump, but pushes a placeholder instead of the
517            current string position when executed.  */
518         on_failure_keep_string_jump,
519
520         /* Throw away latest failure point and then jump to following
521            two-byte relative address.  */
522         pop_failure_jump,
523
524         /* Change to pop_failure_jump if know won't have to backtrack to
525            match; otherwise change to jump.  This is used to jump
526            back to the beginning of a repeat.  If what follows this jump
527            clearly won't match what the repeat does, such that we can be
528            sure that there is no use backtracking out of repetitions
529            already matched, then we change it to a pop_failure_jump.
530            Followed by two-byte address.  */
531         maybe_pop_jump,
532
533         /* Jump to following two-byte address, and push a dummy failure
534            point. This failure point will be thrown away if an attempt
535            is made to use it for a failure.  A `+' construct makes this
536            before the first repeat.  Also used as an intermediary kind
537            of jump when compiling an alternative.  */
538         dummy_failure_jump,
539
540         /* Push a dummy failure point and continue.  Used at the end of
541            alternatives.  */
542         push_dummy_failure,
543
544         /* Followed by two-byte relative address and two-byte number n.
545            After matching N times, jump to the address upon failure.  */
546         succeed_n,
547
548         /* Followed by two-byte relative address, and two-byte number n.
549            Jump to the address N times, then fail.  */
550         jump_n,
551
552         /* Set the following two-byte relative address to the
553            subsequent two-byte number.  The address *includes* the two
554            bytes of number.  */
555         set_number_at,
556
557         wordchar,               /* Matches any word-constituent character.  */
558         notwordchar,            /* Matches any char that is not a word-constituent.  */
559
560         wordbeg,                /* Succeeds if at word beginning.  */
561         wordend,                /* Succeeds if at word end.  */
562
563         wordbound,              /* Succeeds if at a word boundary.  */
564         notwordbound            /* Succeeds if not at a word boundary.  */
565 #ifdef emacs
566             , before_dot,       /* Succeeds if before point.  */
567         at_dot,                 /* Succeeds if at point.  */
568         after_dot,              /* Succeeds if after point.  */
569
570         /* Matches any character whose syntax is specified.  Followed by
571            a byte which contains a syntax code, e.g., Sword.  */
572         syntaxspec,
573
574         /* Matches any character whose syntax is not that specified.  */
575         notsyntaxspec
576 #endif                          /* emacs */
577 #ifdef MULE
578             /* need extra stuff to be able to properly work with XEmacs/Mule
579                characters (which may take up more than one byte) */
580             , charset_mule,     /* Matches any character belonging to specified set.
581                                    The set is stored in "unified range-table
582                                    format"; see rangetab.c.  Unlike the `charset'
583                                    opcode, this can handle arbitrary characters. */
584
585         charset_mule_not        /* Same parameters as charset_mule, but match any
586                                    character that is not one of those specified.  */
587             /* 97/2/17 jhod: The following two were merged back in from the Mule
588                2.3 code to enable some language specific processing */
589             , categoryspec,     /* Matches entries in the character category tables */
590         notcategoryspec         /* The opposite of the above */
591 #endif                          /* MULE */
592 } re_opcode_t;
593 \f
594 /* Common operations on the compiled pattern.  */
595
596 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
597
598 #define STORE_NUMBER(destination, number)                               \
599   do {                                                                  \
600     (destination)[0] = (number) & 0377;                                 \
601     (destination)[1] = (number) >> 8;                                   \
602   } while (0)
603
604 /* Same as STORE_NUMBER, except increment DESTINATION to
605    the byte after where the number is stored.  Therefore, DESTINATION
606    must be an lvalue.  */
607
608 #define STORE_NUMBER_AND_INCR(destination, number)                      \
609   do {                                                                  \
610     STORE_NUMBER (destination, number);                                 \
611     (destination) += 2;                                                 \
612   } while (0)
613
614 /* Put into DESTINATION a number stored in two contiguous bytes starting
615    at SOURCE.  */
616
617 #define EXTRACT_NUMBER(destination, source)                             \
618   do {                                                                  \
619     (destination) = *(source) & 0377;                                   \
620     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;           \
621   } while (0)
622
623 #ifdef REGEX_DEBUG_FLAG
624 static void extract_number(int *dest, re_char * source)
625 {
626         int temp = SIGN_EXTEND_CHAR(*(source + 1));
627         *dest = *source & 0377;
628         *dest += temp << 8;
629 }
630
631 #ifndef EXTRACT_MACROS          /* To debug the macros.  */
632 #undef EXTRACT_NUMBER
633 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
634 #endif                          /* not EXTRACT_MACROS */
635
636 #endif                          /* REGEX_DEBUG_FLAG */
637
638 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
639    SOURCE must be an lvalue.  */
640
641 #define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
642         do {                                                            \
643                 EXTRACT_NUMBER (destination, source);                   \
644                 (source) += 2;                                          \
645         } while (0)
646
647 #ifdef REGEX_DEBUG_FLAG
648 static void
649 extract_number_and_incr(int *destination, unsigned char **source)
650 {
651         extract_number(destination, *source);
652         *source += 2;
653 }
654
655 #ifndef EXTRACT_MACROS
656 #undef EXTRACT_NUMBER_AND_INCR
657 #define EXTRACT_NUMBER_AND_INCR(dest, src)      \
658         extract_number_and_incr (&dest, &src)
659 #endif                          /* not EXTRACT_MACROS */
660
661 #endif                          /* REGEX_DEBUG_FLAG */
662 \f
663 /* If REGEX_DEBUG_FLAG is defined, Regex prints many voluminous messages about
664    what it is doing (if the variable `debug' is nonzero).  If linked with the
665    main program in `iregex.c', you can enter patterns and strings interactively.
666    And if linked with the main program in `main.c' and the other test files, you
667    can run the already-written tests.  */
668
669 #if defined (REGEX_DEBUG_FLAG)
670
671 /* We use standard I/O for debugging.  */
672 #include <stdio.h>
673
674 #ifndef emacs
675 /* XEmacs provides its own version of assert() */
676 /* It is useful to test things that ``must'' be true when debugging.  */
677 #include <assert.h>
678 #endif
679
680 #if 0
681 /* no point in having this one, since we do not offer any accessor to this */
682 static int debug = 0;
683 #endif
684
685 #define DEBUG_STATEMENT(e)              e
686 #define DEBUG_PRINT1(x)                 printf(x)
687 #define DEBUG_PRINT2(x1, x2)            printf(x1, x2)
688 #define DEBUG_PRINT3(x1, x2, x3)        printf(x1, x2, x3)
689 #define DEBUG_PRINT4(x1, x2, x3, x4)    printf(x1, x2, x3, x4)
690 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                           \
691         print_partial_compiled_pattern(s, e)
692 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                  \
693         print_double_string (w, s1, sz1, s2, sz2)
694
695 /* Print the fastmap in human-readable form.  */
696
697 static void print_fastmap(char *fastmap)
698 {
699         unsigned was_a_range = 0;
700         unsigned i = 0;
701
702         while (i < (1 << BYTEWIDTH)) {
703                 if (fastmap[i++]) {
704                         was_a_range = 0;
705                         putchar(i - 1);
706                         while (i < (1 << BYTEWIDTH) && fastmap[i]) {
707                                 was_a_range = 1;
708                                 i++;
709                         }
710                         if (was_a_range) {
711                                 putchar('-');
712                                 putchar(i - 1);
713                         }
714                 }
715         }
716         putchar('\n');
717 }
718
719 /* Print a compiled pattern string in human-readable form, starting at
720    the START pointer into it and ending just before the pointer END.  */
721
722 /* can't help it ... the ugly mule code changes `start' upon alignment,
723  * so this can't have the const qualifier ... bugger */
724 static void
725 print_partial_compiled_pattern(unsigned char *start, re_char *end)
726 {
727         int mcnt, mcnt2;
728         unsigned char *p = start;
729         re_char *pend = end;
730
731         if (start == NULL) {
732                 puts("(null)");
733                 return;
734         }
735
736         /* Loop over pattern commands.  */
737         while (p < pend) {
738                 printf("%ld:\t", (long)(p - start));
739
740                 switch ((const re_opcode_t)*p++) {
741                 case no_op:
742                         printf("/no_op");
743                         break;
744
745                 case exactn:
746                         mcnt = *p++;
747                         printf("/exactn/%d", mcnt);
748                         do {
749                                 putchar('/');
750                                 putchar(*p++);
751                         }
752                         while (--mcnt);
753                         break;
754
755                 case start_memory:
756                         mcnt = *p++;
757                         printf("/start_memory/%d/%d", mcnt, *p++);
758                         break;
759
760                 case stop_memory:
761                         mcnt = *p++;
762                         printf("/stop_memory/%d/%d", mcnt, *p++);
763                         break;
764
765                 case duplicate:
766                         printf("/duplicate/%d", *p++);
767                         break;
768
769                 case anychar:
770                         printf("/anychar");
771                         break;
772
773                 case charset:
774                 case charset_not: {
775                         REGISTER int c, last = -100;
776                         REGISTER int in_range = 0;
777
778                         printf("/charset [%s",
779                                (re_opcode_t) * (p - 1) == charset_not
780                                ? "^" : "");
781
782                         assert(p + *p < pend);
783
784                         for (c = 0; c < 256; c++)
785                                 if (((unsigned char)(c / 8) < *p)
786                                     && (p[1 + (c / 8)] &
787                                         (1 << (c % 8)))) {
788                                         /* Are we starting a range?  */
789                                         if (last + 1 == c && !in_range) {
790                                                 putchar('-');
791                                                 in_range = 1;
792                                         }
793                                         /* Have we broken a range?  */
794                                         else if (last + 1 != c
795                                                  && in_range) {
796                                                 putchar(last);
797                                                 in_range = 0;
798                                         }
799
800                                         if (!in_range)
801                                                 putchar(c);
802
803                                         last = c;
804                                 }
805
806                         if (in_range)
807                                 putchar(last);
808
809                         putchar(']');
810
811                         p += 1 + *p;
812                         break;
813                 }
814
815 #ifdef MULE
816                 case charset_mule:
817                 case charset_mule_not: {
818                         int nentries, i;
819
820                         printf("/charset_mule [%s",
821                                (re_opcode_t) * (p - 1) == charset_mule_not
822                                ? "^" : "");
823                         /* u_r_t_nentries() CHANGES its arg, why on earth
824                          * is this marked const here then? :O -hrop */
825                         nentries = unified_range_table_nentries(p);
826                         for (i = 0; i < nentries; i++) {
827                                 EMACS_INT first, last;
828                                 Lisp_Object dummy_val;
829
830                                 /* u_r_t_get_range CHANGES its arg ...
831                                  * p can't be const thence */
832                                 unified_range_table_get_range(p, i,
833                                                               &first,
834                                                               &last,
835                                                               &dummy_val);
836                                 if (first < 0x100)
837                                         putchar(first);
838                                 else
839                                         printf("(0x%lx)", (long)first);
840                                 if (first != last) {
841                                         putchar('-');
842                                         if (last < 0x100)
843                                                 putchar(last);
844                                         else
845                                                 printf("(0x%lx)",
846                                                        (long)last);
847                                 }
848                         }
849                         putchar(']');
850                         p += unified_range_table_bytes_used(p);
851                 }
852                         break;
853 #endif
854
855                 case begline:
856                         printf("/begline");
857                         break;
858
859                 case endline:
860                         printf("/endline");
861                         break;
862
863                 case on_failure_jump:
864                         extract_number_and_incr(&mcnt, &p);
865                         printf("/on_failure_jump to %ld",
866                                (long)(p + mcnt - start));
867                         break;
868
869                 case on_failure_keep_string_jump:
870                         extract_number_and_incr(&mcnt, &p);
871                         printf("/on_failure_keep_string_jump to %ld",
872                                (long)(p + mcnt - start));
873                         break;
874
875                 case dummy_failure_jump:
876                         extract_number_and_incr(&mcnt, &p);
877                         printf("/dummy_failure_jump to %ld",
878                                (long)(p + mcnt - start));
879                         break;
880
881                 case push_dummy_failure:
882                         printf("/push_dummy_failure");
883                         break;
884
885                 case maybe_pop_jump:
886                         extract_number_and_incr(&mcnt, &p);
887                         printf("/maybe_pop_jump to %ld",
888                                (long)(p + mcnt - start));
889                         break;
890
891                 case pop_failure_jump:
892                         extract_number_and_incr(&mcnt, &p);
893                         printf("/pop_failure_jump to %ld",
894                                (long)(p + mcnt - start));
895                         break;
896
897                 case jump_past_alt:
898                         extract_number_and_incr(&mcnt, &p);
899                         printf("/jump_past_alt to %ld",
900                                (long)(p + mcnt - start));
901                         break;
902
903                 case jump:
904                         extract_number_and_incr(&mcnt, &p);
905                         printf("/jump to %ld", (long)(p + mcnt - start));
906                         break;
907
908                 case succeed_n:
909                         extract_number_and_incr(&mcnt, &p);
910                         extract_number_and_incr(&mcnt2, &p);
911                         printf("/succeed_n to %ld, %d times",
912                                (long)(p + mcnt - start), mcnt2);
913                         break;
914
915                 case jump_n:
916                         extract_number_and_incr(&mcnt, &p);
917                         extract_number_and_incr(&mcnt2, &p);
918                         printf("/jump_n to %ld, %d times",
919                                (long)(p + mcnt - start), mcnt2);
920                         break;
921
922                 case set_number_at:
923                         extract_number_and_incr(&mcnt, &p);
924                         extract_number_and_incr(&mcnt2, &p);
925                         printf("/set_number_at location %ld to %d",
926                                (long)(p + mcnt - start), mcnt2);
927                         break;
928
929                 case wordbound:
930                         printf("/wordbound");
931                         break;
932
933                 case notwordbound:
934                         printf("/notwordbound");
935                         break;
936
937                 case wordbeg:
938                         printf("/wordbeg");
939                         break;
940
941                 case wordend:
942                         printf("/wordend");
943
944 #ifdef emacs
945                 case before_dot:
946                         printf("/before_dot");
947                         break;
948
949                 case at_dot:
950                         printf("/at_dot");
951                         break;
952
953                 case after_dot:
954                         printf("/after_dot");
955                         break;
956
957                 case syntaxspec:
958                         printf("/syntaxspec");
959                         mcnt = *p++;
960                         printf("/%d", mcnt);
961                         break;
962
963                 case notsyntaxspec:
964                         printf("/notsyntaxspec");
965                         mcnt = *p++;
966                         printf("/%d", mcnt);
967                         break;
968
969 #ifdef MULE
970 /* 97/2/17 jhod Mule category patch */
971                 case categoryspec:
972                         printf("/categoryspec");
973                         mcnt = *p++;
974                         printf("/%d", mcnt);
975                         break;
976
977                 case notcategoryspec:
978                         printf("/notcategoryspec");
979                         mcnt = *p++;
980                         printf("/%d", mcnt);
981                         break;
982 /* end of category patch */
983 #endif                          /* MULE */
984 #endif                          /* emacs */
985
986                 case wordchar:
987                         printf("/wordchar");
988                         break;
989
990                 case notwordchar:
991                         printf("/notwordchar");
992                         break;
993
994                 case begbuf:
995                         printf("/begbuf");
996                         break;
997
998                 case endbuf:
999                         printf("/endbuf");
1000                         break;
1001
1002                 case succeed:
1003                 default:
1004                         printf("?%d", *(p - 1));
1005                 }
1006
1007                 putchar('\n');
1008         }
1009
1010         printf("%ld:\tend of pattern.\n", (long)(p - start));
1011 }
1012
1013 static void
1014 print_compiled_pattern(struct re_pattern_buffer *bufp)
1015 {
1016 #if 0
1017         /* cant be const */
1018         re_char *buffer = bufp->buffer;
1019 #else
1020         unsigned char *buffer = bufp->buffer;
1021 #endif
1022
1023         print_partial_compiled_pattern(buffer, buffer + bufp->used);
1024         printf("%ld bytes used/%ld bytes allocated.\n", bufp->used,
1025                bufp->allocated);
1026
1027         if (bufp->fastmap_accurate && bufp->fastmap) {
1028                 printf("fastmap: ");
1029                 print_fastmap(bufp->fastmap);
1030         }
1031
1032         printf("re_nsub: %ld\t", (long)bufp->re_nsub);
1033         printf("re_ngroups: %ld\t", (long)bufp->re_ngroups);
1034         printf("regs_alloc: %d\t", bufp->regs_allocated);
1035         printf("can_be_null: %d\t", bufp->can_be_null);
1036         printf("newline_anchor: %d\n", bufp->newline_anchor);
1037         printf("no_sub: %d\t", bufp->no_sub);
1038         printf("not_bol: %d\t", bufp->not_bol);
1039         printf("not_eol: %d\t", bufp->not_eol);
1040         printf("syntax: %d\n", bufp->syntax);
1041         /* Perhaps we should print the translate table?  */
1042         /* and maybe the category table? */
1043
1044         if (bufp->external_to_internal_register) {
1045                 int i;
1046
1047                 printf("external_to_internal_register:\n");
1048                 for (i = 0; i <= bufp->re_nsub; i++) {
1049                         if (i > 0)
1050                                 printf(", ");
1051                         printf("%d -> %d", i,
1052                                bufp->external_to_internal_register[i]);
1053                 }
1054                 printf("\n");
1055         }
1056 }
1057
1058 static void
1059 print_double_string(re_char * where, re_char * string1, int size1,
1060                     re_char * string2, int size2)
1061 {
1062         if (where == NULL)
1063                 printf("(null)");
1064         else {
1065                 Element_count this_char;
1066
1067                 if (FIRST_STRING_P(where)) {
1068                         for (this_char = where - string1; this_char < size1;
1069                              this_char++)
1070                                 putchar(string1[this_char]);
1071
1072                         where = string2;
1073                 }
1074
1075                 for (this_char = where - string2; this_char < size2;
1076                      this_char++)
1077                         putchar(string2[this_char]);
1078         }
1079 }
1080
1081 #else                           /* not REGEX_DEBUG_FLAG */
1082
1083 #undef assert
1084 #define assert(e)
1085
1086 #define DEBUG_STATEMENT(e)
1087 #define DEBUG_PRINT1(x)
1088 #define DEBUG_PRINT2(x1, x2)
1089 #define DEBUG_PRINT3(x1, x2, x3)
1090 #define DEBUG_PRINT4(x1, x2, x3, x4)
1091 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1092 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1093
1094 #endif                          /* REGEX_DEBUG_FLAG */
1095 \f
1096 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
1097    also be assigned to arbitrarily: each pattern buffer stores its own
1098    syntax, so it can be changed between regex compilations.  */
1099 /* This has no initializer because initialized variables in Emacs
1100    become read-only after dumping.  */
1101 reg_syntax_t re_syntax_options;
1102
1103 /* Specify the precise syntax of regexps for compilation.  This provides
1104    for compatibility for various utilities which historically have
1105    different, incompatible syntaxes.
1106
1107    The argument SYNTAX is a bit mask comprised of the various bits
1108    defined in regex.h.  We return the old syntax.  */
1109
1110 reg_syntax_t re_set_syntax(reg_syntax_t syntax)
1111 {
1112         reg_syntax_t ret = re_syntax_options;
1113
1114         re_syntax_options = syntax;
1115         return ret;
1116 }
1117 \f
1118 /* This table gives an error message for each of the error codes listed
1119    in regex.h.  Obviously the order here has to be same as there.
1120    POSIX doesn't require that we do anything for REG_NOERROR,
1121    but why not be nice?  */
1122
1123 static const char *re_error_msgid[] = {
1124         "Success",              /* REG_NOERROR */
1125         "No match",             /* REG_NOMATCH */
1126         "Invalid regular expression",   /* REG_BADPAT */
1127         "Invalid collation character",  /* REG_ECOLLATE */
1128         "Invalid character class name", /* REG_ECTYPE */
1129         "Trailing backslash",   /* REG_EESCAPE */
1130         "Invalid back reference",       /* REG_ESUBREG */
1131         "Unmatched [ or [^",    /* REG_EBRACK */
1132         "Unmatched ( or \\(",   /* REG_EPAREN */
1133         "Unmatched \\{",        /* REG_EBRACE */
1134         "Invalid content of \\{\\}",    /* REG_BADBR */
1135         "Invalid range end",    /* REG_ERANGE */
1136         "Memory exhausted",     /* REG_ESPACE */
1137         "Invalid preceding regular expression", /* REG_BADRPT */
1138         "Premature end of regular expression",  /* REG_EEND */
1139         "Regular expression too big",   /* REG_ESIZE */
1140         "Unmatched ) or \\)",   /* REG_ERPAREN */
1141 #ifdef emacs
1142         "Invalid syntax designator",    /* REG_ESYNTAX */
1143 #endif
1144 #ifdef MULE
1145         "Ranges may not span charsets", /* REG_ERANGESPAN */
1146         "Invalid category designator",  /* REG_ECATEGORY */
1147 #endif
1148 };
1149 \f
1150 /* Avoiding alloca during matching, to placate r_alloc.  */
1151
1152 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1153    searching and matching functions should not call alloca.  On some
1154    systems, alloca is implemented in terms of malloc, and if we're
1155    using the relocating allocator routines, then malloc could cause a
1156    relocation, which might (if the strings being searched are in the
1157    ralloc heap) shift the data out from underneath the regexp
1158    routines.
1159
1160    Here's another reason to avoid allocation: Emacs
1161    processes input from X in a signal handler; processing X input may
1162    call malloc; if input arrives while a matching routine is calling
1163    malloc, then we're scrod.  But Emacs can't just block input while
1164    calling matching routines; then we don't notice interrupts when
1165    they come in.  So, Emacs blocks input around all regexp calls
1166    except the matching calls, which it leaves unprotected, in the
1167    faith that they will not malloc.  */
1168
1169 /* Normally, this is fine.  */
1170 #define MATCH_MAY_ALLOCATE
1171
1172 /* When using GNU C, we are not REALLY using the C alloca, no matter
1173    what config.h may say.  So don't take precautions for it.  */
1174 #ifdef __GNUC__
1175 #undef C_ALLOCA
1176 #endif
1177
1178 /* The match routines may not allocate if (1) they would do it with malloc
1179    and (2) it's not safe for them to use malloc.
1180    Note that if REL_ALLOC is defined, matching would not use malloc for the
1181    failure stack, but we would still use it for the register vectors;
1182    so REL_ALLOC should not affect this.  */
1183 #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
1184 #undef MATCH_MAY_ALLOCATE
1185 #endif
1186 \f
1187 /* Failure stack declarations and macros; both re_compile_fastmap and
1188    re_match_2 use a failure stack.  These have to be macros because of
1189    REGEX_ALLOCATE_STACK.  */
1190
1191 /* Number of failure points for which to initially allocate space
1192    when matching.  If this number is exceeded, we allocate more
1193    space, so it is not a hard limit.  */
1194 #ifndef INIT_FAILURE_ALLOC
1195 #define INIT_FAILURE_ALLOC 20
1196 #endif
1197
1198 /* Roughly the maximum number of failure points on the stack.  Would be
1199    exactly that if always used MAX_FAILURE_SPACE each time we failed.
1200    This is a variable only so users of regex can assign to it; we never
1201    change it ourselves.  */
1202 #if defined (MATCH_MAY_ALLOCATE) || defined (REGEX_MALLOC)
1203 /* 4400 was enough to cause a crash on Alpha OSF/1,
1204    whose default stack limit is 2mb.  */
1205 int re_max_failures = 50000;
1206 #else
1207 int re_max_failures = 2000;
1208 #endif
1209
1210 union fail_stack_elt {
1211         const unsigned char *pointer;
1212         int integer;
1213 };
1214
1215 typedef union fail_stack_elt fail_stack_elt_t;
1216
1217 typedef struct {
1218         fail_stack_elt_t *stack;
1219         Element_count size;
1220         Element_count avail;    /* Offset of next open position.  */
1221 } fail_stack_type;
1222
1223 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1224 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1225 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1226
1227 /* Define macros to initialize and free the failure stack.
1228    Do `return -2' if the alloc fails.  */
1229
1230 #ifdef MATCH_MAY_ALLOCATE
1231 #define INIT_FAIL_STACK()                                               \
1232         do {                                                            \
1233                 fail_stack.stack = (fail_stack_elt_t *)                 \
1234                         REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC *      \
1235                                               sizeof (fail_stack_elt_t)); \
1236                                                                         \
1237                 if (fail_stack.stack == NULL)                           \
1238                         return -2;                                      \
1239                                                                         \
1240                 fail_stack.size = INIT_FAILURE_ALLOC;                   \
1241                 fail_stack.avail = 0;                                   \
1242         } while (0)
1243
1244 #define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1245 #else
1246 #define INIT_FAIL_STACK()                       \
1247         do {                                    \
1248                 fail_stack.avail = 0;           \
1249         } while (0)
1250
1251 #define RESET_FAIL_STACK()
1252 #endif
1253
1254 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1255
1256    Return 1 if succeeds, and 0 if either ran out of memory
1257    allocating space for it or it was already too large.
1258
1259    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1260
1261 #define DOUBLE_FAIL_STACK(fail_stack)                                   \
1262   ((int) (fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS        \
1263    ? 0                                                                  \
1264    : ((fail_stack).stack = (fail_stack_elt_t *)                         \
1265         REGEX_REALLOCATE_STACK ((fail_stack).stack,                     \
1266           (fail_stack).size * sizeof (fail_stack_elt_t),                \
1267           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),        \
1268                                                                         \
1269       (fail_stack).stack == NULL                                        \
1270       ? 0                                                               \
1271       : ((fail_stack).size <<= 1,                                       \
1272          1)))
1273
1274 /* Push pointer POINTER on FAIL_STACK.
1275    Return 1 if was able to do so and 0 if ran out of memory allocating
1276    space to do so.  */
1277 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)                            \
1278   ((FAIL_STACK_FULL ()                                                  \
1279     && !DOUBLE_FAIL_STACK (FAIL_STACK))                                 \
1280    ? 0                                                                  \
1281    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,       \
1282       1))
1283
1284 /* Push a pointer value onto the failure stack.
1285    Assumes the variable `fail_stack'.  Probably should only
1286    be called from within `PUSH_FAILURE_POINT'.  */
1287 #define PUSH_FAILURE_POINTER(item)                                      \
1288         fail_stack.stack[fail_stack.avail++].pointer =                  \
1289                 (const unsigned char*)(item)
1290
1291 /* This pushes an integer-valued item onto the failure stack.
1292    Assumes the variable `fail_stack'.  Probably should only
1293    be called from within `PUSH_FAILURE_POINT'.  */
1294 #define PUSH_FAILURE_INT(item)                                  \
1295         fail_stack.stack[fail_stack.avail++].integer = (item)
1296
1297 /* Push a fail_stack_elt_t value onto the failure stack.
1298    Assumes the variable `fail_stack'.  Probably should only
1299    be called from within `PUSH_FAILURE_POINT'.  */
1300 #define PUSH_FAILURE_ELT(item)                                  \
1301         fail_stack.stack[fail_stack.avail++] =  (item)
1302
1303 /* These three POP... operations complement the three PUSH... operations.
1304    All assume that `fail_stack' is nonempty.  */
1305 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1306 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1307 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1308
1309 /* Used to omit pushing failure point id's when we're not debugging.  */
1310 #ifdef REGEX_DEBUG_FLAG
1311 #define DEBUG_PUSH PUSH_FAILURE_INT
1312 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1313 #else
1314 #define DEBUG_PUSH(item)
1315 #define DEBUG_POP(item_addr)
1316 #endif
1317
1318 /* Push the information about the state we will need
1319    if we ever fail back to it.
1320
1321    Requires variables fail_stack, regstart, regend, reg_info, and
1322    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
1323    declared.
1324
1325    Does `return FAILURE_CODE' if runs out of memory.  */
1326
1327 #if !defined (REGEX_MALLOC) && !defined (REL_ALLOC)
1328 #define DECLARE_DESTINATION char *destination
1329 #else
1330 #define DECLARE_DESTINATION DECLARE_NOTHING
1331 #endif
1332
1333 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)   \
1334         do {                                                            \
1335                 DECLARE_DESTINATION;                                    \
1336                 /* Must be int, so when we don't save any registers, the\
1337                    arithmetic \ of 0 + -1 isn't done as unsigned.  */   \
1338                 int this_reg;                                           \
1339                                                                         \
1340                 DEBUG_STATEMENT (failure_id++);                         \
1341                 DEBUG_STATEMENT (nfailure_points_pushed++);             \
1342                 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
1343                 DEBUG_PRINT2 ("  Before push, next avail: %lu\n",       \
1344                               (long unsigned int)(fail_stack).avail);   \
1345                 DEBUG_PRINT2 ("                     size: %lu\n",       \
1346                               (long unsigned int)(fail_stack).size);    \
1347                                                                         \
1348                 DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS); \
1349                 DEBUG_PRINT2 ("     available: %ld\n",                  \
1350                               (long) REMAINING_AVAIL_SLOTS);            \
1351                                                                         \
1352                 /* Ensure we have enough space allocated for what       \
1353                  * we will push.  */                                    \
1354                 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) {     \
1355                         if (!DOUBLE_FAIL_STACK (fail_stack))            \
1356                                 return failure_code;                    \
1357                                                                         \
1358                         DEBUG_PRINT2 ("\n  Doubled stack; size now: %lu\n", \
1359                                       (unsigned long) (fail_stack).size); \
1360                         DEBUG_PRINT2 ("  slots available: %ld\n",       \
1361                                       (long) REMAINING_AVAIL_SLOTS);    \
1362                 }                                                       \
1363                                                                         \
1364                 /* Push the info, starting with the registers.  */      \
1365                 DEBUG_PRINT1 ("\n");                                    \
1366                                                                         \
1367                 for (this_reg = lowest_active_reg;                      \
1368                      this_reg <= highest_active_reg;                    \
1369                      this_reg++) {                                      \
1370                         DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg); \
1371                         DEBUG_STATEMENT (num_regs_pushed++);            \
1372                                                                         \
1373                         DEBUG_PRINT2("    start: %p\n",                 \
1374                                      regstart[this_reg]);               \
1375                         PUSH_FAILURE_POINTER(regstart[this_reg]);       \
1376                                                                         \
1377                         DEBUG_PRINT2 ("    end: %p\n",                  \
1378                                       regend[this_reg]);                \
1379                         PUSH_FAILURE_POINTER(regend[this_reg]); \
1380                                                                         \
1381                         DEBUG_PRINT2 ("    info: 0x%p\n      ",         \
1382                                       &reg_info[this_reg]);             \
1383                         DEBUG_PRINT2 (" match_null=%d",                 \
1384                                       REG_MATCH_NULL_STRING_P           \
1385                                       (reg_info[this_reg]));            \
1386                         DEBUG_PRINT2 (" active=%d",                     \
1387                                       IS_ACTIVE (reg_info[this_reg]));  \
1388                         DEBUG_PRINT2 (" matched_something=%d",          \
1389                                       MATCHED_SOMETHING                 \
1390                                       (reg_info[this_reg]));            \
1391                         DEBUG_PRINT2 (" ever_matched_something=%d",     \
1392                                       EVER_MATCHED_SOMETHING            \
1393                                       (reg_info[this_reg]));            \
1394                         DEBUG_PRINT1 ("\n");                            \
1395                         PUSH_FAILURE_ELT(reg_info[this_reg].word);      \
1396                 }                                                       \
1397                                                                         \
1398                 DEBUG_PRINT2 ("  Pushing  low active reg: %d\n",        \
1399                               lowest_active_reg);                       \
1400                 PUSH_FAILURE_INT (lowest_active_reg);                   \
1401                                                                         \
1402                 DEBUG_PRINT2 ("  Pushing high active reg: %d\n",        \
1403                               highest_active_reg);                      \
1404                 PUSH_FAILURE_INT (highest_active_reg);                  \
1405                                                                         \
1406                 DEBUG_PRINT2 ("  Pushing pattern 0x%lx: \n",            \
1407                               (long) pattern_place);                    \
1408                 DEBUG_PRINT_COMPILED_PATTERN(bufp, pattern_place, pend); \
1409                 PUSH_FAILURE_POINTER (pattern_place);                   \
1410                                                                         \
1411                 DEBUG_PRINT2("  Pushing string %p: `",                  \
1412                              string_place);                             \
1413                 DEBUG_PRINT_DOUBLE_STRING(string_place,                 \
1414                                           string1, size1,               \
1415                                           string2, size2);              \
1416                 DEBUG_PRINT1("'\n");                                    \
1417                 PUSH_FAILURE_POINTER (string_place);                    \
1418                                                                         \
1419                 DEBUG_PRINT2("  Pushing failure id: %u\n", failure_id); \
1420                 DEBUG_PUSH(failure_id);                                 \
1421         } while (0)
1422
1423 /* This is the number of items that are pushed and popped on the stack
1424    for each register.  */
1425 #define NUM_REG_ITEMS  3
1426
1427 /* Individual items aside from the registers.  */
1428 #ifdef DEBUG
1429 #define NUM_NONREG_ITEMS 5      /* Includes failure point id.  */
1430 #else
1431 #define NUM_NONREG_ITEMS 4
1432 #endif
1433
1434 /* We push at most this many items on the stack.  */
1435 /* We used to use (num_regs - 1), which is the number of registers
1436    this regexp will save; but that was changed to 5
1437    to avoid stack overflow for a regexp with lots of parens.  */
1438 #define MAX_FAILURE_ITEMS (5 * (NUM_REG_ITEMS + NUM_NONREG_ITEMS))
1439
1440 /* We actually push this many items.  */
1441 #define NUM_FAILURE_ITEMS                                               \
1442   ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \
1443     + NUM_NONREG_ITEMS)
1444
1445 /* How many items can still be added to the stack without overflowing it.  */
1446 #define REMAINING_AVAIL_SLOTS ((int) ((fail_stack).size - (fail_stack).avail))
1447
1448 /* Pops what PUSH_FAIL_STACK pushes.
1449
1450    We restore into the parameters, all of which should be lvalues:
1451      STR -- the saved data position.
1452      PAT -- the saved pattern position.
1453      LOW_REG, HIGH_REG -- the highest and lowest active registers.
1454      REGSTART, REGEND -- arrays of string positions.
1455      REG_INFO -- array of information about each subexpression.
1456
1457    Also assumes the variables `fail_stack' and (if debugging), `bufp',
1458    `pend', `string1', `size1', `string2', and `size2'.  */
1459
1460 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg,                  \
1461                           regstart, regend, reg_info)                   \
1462         do {                                                            \
1463                 int this_reg;                                           \
1464                 const unsigned char *string_temp;                       \
1465                 DEBUG_STATEMENT (fail_stack_elt_t ffailure_id);         \
1466                                                                         \
1467                 assert (!FAIL_STACK_EMPTY ());                          \
1468                                                                         \
1469                 /* Remove failure points and point to                   \
1470                  * how many regs pushed.  */                            \
1471                 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                  \
1472                 DEBUG_PRINT2 ("  Before pop, next avail: %lu\n",        \
1473                               (unsigned long) fail_stack.avail);        \
1474                 DEBUG_PRINT2 ("                    size: %lu\n",        \
1475                               (unsigned long) fail_stack.size);         \
1476                                                                         \
1477                 assert (fail_stack.avail >= NUM_NONREG_ITEMS);          \
1478                                                                         \
1479                 DEBUG_POP (&ffailure_id.integer);                       \
1480                 DEBUG_PRINT2 ("  Popping failure id: %u\n",             \
1481                               * (unsigned int *) &ffailure_id);         \
1482                                                                         \
1483                 /* If the saved string location is NULL, it             \
1484                    came from an on_failure_keep_string_jump opcode,     \
1485                    and we want to throw away the saved NULL, thus       \
1486                    retaining our current position in the string.  */    \
1487                 string_temp = (const unsigned char *)                   \
1488                         POP_FAILURE_POINTER ();                         \
1489                 if (string_temp != NULL)                                \
1490                         str = string_temp;                              \
1491                                                                         \
1492                 DEBUG_PRINT2("  Popping string %p: `", str);            \
1493                 DEBUG_PRINT_DOUBLE_STRING(str,                          \
1494                                           string1, size1,               \
1495                                           string2, size2);              \
1496                 DEBUG_PRINT1("'\n");                                    \
1497                                                                         \
1498                 pat = (const unsigned char*)POP_FAILURE_POINTER();      \
1499                 DEBUG_PRINT2 ("  Popping pattern %p: ", pat);           \
1500                 DEBUG_PRINT_COMPILED_PATTERN(bufp, pat, pend);          \
1501                                                                         \
1502                 /* Restore register info.  */                           \
1503                 high_reg = (unsigned) POP_FAILURE_INT ();               \
1504                 DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg); \
1505                                                                         \
1506                 low_reg = (unsigned) POP_FAILURE_INT ();                \
1507                 DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg); \
1508                                                                         \
1509                 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) { \
1510                         DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg); \
1511                                                                         \
1512                         reg_info[this_reg].word = POP_FAILURE_ELT ();   \
1513                         DEBUG_PRINT2 ("      info: %p\n",               \
1514                                       &reg_info[this_reg]);             \
1515                                                                         \
1516                         regend[this_reg] = POP_FAILURE_POINTER ();      \
1517                         DEBUG_PRINT2 ("      end: %p\n",                \
1518                                       regend[this_reg]);                \
1519                                                                         \
1520                         regstart[this_reg] = POP_FAILURE_POINTER ();    \
1521                         DEBUG_PRINT2 ("      start: %p\n",              \
1522                                       regstart[this_reg]);              \
1523                 }                                                       \
1524                                                                         \
1525                 set_regs_matched_done = 0;                              \
1526                 DEBUG_STATEMENT (nfailure_points_popped++);             \
1527         } while (0)                     /* POP_FAILURE_POINT */
1528 \f
1529 /* Structure for per-register (a.k.a. per-group) information.
1530    Other register information, such as the
1531    starting and ending positions (which are addresses), and the list of
1532    inner groups (which is a bits list) are maintained in separate
1533    variables.
1534
1535    We are making a (strictly speaking) nonportable assumption here: that
1536    the compiler will pack our bit fields into something that fits into
1537    the type of `word', i.e., is something that fits into one item on the
1538    failure stack.  */
1539
1540 typedef union {
1541         fail_stack_elt_t word;
1542         struct {
1543                 /* This field is one if this group can match the empty string,
1544                    zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1545 #define MATCH_NULL_UNSET_VALUE 3
1546                 unsigned match_null_string_p:2;
1547                 unsigned is_active:1;
1548                 unsigned matched_something:1;
1549                 unsigned ever_matched_something:1;
1550         } bits;
1551 } register_info_type;
1552
1553 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1554 #define IS_ACTIVE(R)  ((R).bits.is_active)
1555 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1556 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1557
1558 /* Call this when have matched a real character; it sets `matched' flags
1559    for the subexpressions which we are currently inside.  Also records
1560    that those subexprs have matched.  */
1561 #define SET_REGS_MATCHED()                                              \
1562   do                                                                    \
1563     {                                                                   \
1564       if (!set_regs_matched_done)                                       \
1565         {                                                               \
1566           Element_count r;                                              \
1567           set_regs_matched_done = 1;                                    \
1568           for (r = lowest_active_reg; r <= highest_active_reg; r++)     \
1569             {                                                           \
1570               MATCHED_SOMETHING (reg_info[r])                           \
1571                 = EVER_MATCHED_SOMETHING (reg_info[r])                  \
1572                 = 1;                                                    \
1573             }                                                           \
1574         }                                                               \
1575     }                                                                   \
1576   while (0)
1577
1578 /* Registers are set to a sentinel when they haven't yet matched.  */
1579 static unsigned char reg_unset_dummy;
1580 #define REG_UNSET_VALUE (&reg_unset_dummy)
1581 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1582 \f
1583 /* Subroutine declarations and macros for regex_compile.  */
1584
1585 /* Fetch the next character in the uncompiled pattern---translating it
1586    if necessary.  Also cast from a signed character in the constant
1587    string passed to us by the user to an unsigned char that we can use
1588    as an array index (in, e.g., `translate').  */
1589 #define PATFETCH(c)                                                     \
1590   do {                                                                  \
1591     PATFETCH_RAW (c);                                                   \
1592     c = TRANSLATE (c);                                                  \
1593   } while (0)
1594
1595 /* Fetch the next character in the uncompiled pattern, with no
1596    translation.  */
1597 #define PATFETCH_RAW(c)                                                 \
1598   do {if (p == pend) return REG_EEND;                                   \
1599     assert (p < pend);                                                  \
1600     c = charptr_emchar (p);                                             \
1601     INC_CHARPTR (p);                                                    \
1602   } while (0)
1603
1604 /* Go backwards one character in the pattern.  */
1605 #define PATUNFETCH DEC_CHARPTR (p)
1606
1607 #ifdef MULE
1608
1609 #define PATFETCH_EXTENDED(emch)                                         \
1610   do {if (p == pend) return REG_EEND;                                   \
1611     assert (p < pend);                                                  \
1612     emch = charptr_emchar ((const Bufbyte *) p);                        \
1613     INC_CHARPTR (p);                                                    \
1614     if (TRANSLATE_P (translate) && emch < 0x80)                         \
1615       emch = (Emchar) (unsigned char) RE_TRANSLATE (emch);              \
1616   } while (0)
1617
1618 #define PATFETCH_RAW_EXTENDED(emch)                                     \
1619   do {if (p == pend) return REG_EEND;                                   \
1620     assert (p < pend);                                                  \
1621     emch = charptr_emchar ((const Bufbyte *) p);                        \
1622     INC_CHARPTR (p);                                                    \
1623   } while (0)
1624
1625 #define PATUNFETCH_EXTENDED DEC_CHARPTR (p)
1626
1627 #define PATFETCH_EITHER(emch)                   \
1628   do {                                          \
1629     if (has_extended_chars)                     \
1630       PATFETCH_EXTENDED (emch);                 \
1631     else                                        \
1632       PATFETCH (emch);                          \
1633   } while (0)
1634
1635 #define PATFETCH_RAW_EITHER(emch)               \
1636   do {                                          \
1637     if (has_extended_chars)                     \
1638       PATFETCH_RAW_EXTENDED (emch);             \
1639     else                                        \
1640       PATFETCH_RAW (emch);                      \
1641   } while (0)
1642
1643 #define PATUNFETCH_EITHER                       \
1644   do {                                          \
1645     if (has_extended_chars)                     \
1646       PATUNFETCH_EXTENDED (emch);               \
1647     else                                        \
1648       PATUNFETCH (emch);                        \
1649   } while (0)
1650
1651 #else                           /* not MULE */
1652
1653 #define PATFETCH_EITHER(emch) PATFETCH (emch)
1654 #define PATFETCH_RAW_EITHER(emch) PATFETCH_RAW (emch)
1655 #define PATUNFETCH_EITHER PATUNFETCH
1656
1657 #endif                          /* MULE */
1658
1659 /* If `translate' is non-null, return translate[D], else just D.  We
1660    cast the subscript to translate because some data is declared as
1661    `char *', to avoid warnings when a string constant is passed.  But
1662    when we use a character as a subscript we must make it unsigned.  */
1663 #define TRANSLATE(d) (TRANSLATE_P (translate) ? RE_TRANSLATE (d) : (d))
1664
1665 /* Macros for outputting the compiled pattern into `buffer'.  */
1666
1667 /* If the buffer isn't allocated when it comes in, use this.  */
1668 #define INIT_BUF_SIZE  32
1669
1670 /* Make sure we have at least N more bytes of space in buffer.  */
1671 #define GET_BUFFER_SPACE(n)                                             \
1672     while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated)  \
1673       EXTEND_BUFFER ()
1674
1675 /* Make sure we have one more byte of buffer space and then add C to it.  */
1676 #define BUF_PUSH(c)                                                     \
1677   do {                                                                  \
1678     GET_BUFFER_SPACE (1);                                               \
1679     *buf_end++ = (unsigned char) (c);                                   \
1680   } while (0)
1681
1682 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1683 #define BUF_PUSH_2(c1, c2)                                              \
1684   do {                                                                  \
1685     GET_BUFFER_SPACE (2);                                               \
1686     *buf_end++ = (unsigned char) (c1);                                  \
1687     *buf_end++ = (unsigned char) (c2);                                  \
1688   } while (0)
1689
1690 /* As with BUF_PUSH_2, except for three bytes.  */
1691 #define BUF_PUSH_3(c1, c2, c3)                                          \
1692   do {                                                                  \
1693     GET_BUFFER_SPACE (3);                                               \
1694     *buf_end++ = (unsigned char) (c1);                                  \
1695     *buf_end++ = (unsigned char) (c2);                                  \
1696     *buf_end++ = (unsigned char) (c3);                                  \
1697   } while (0)
1698
1699 /* Store a jump with opcode OP at LOC to location TO.  We store a
1700    relative address offset by the three bytes the jump itself occupies.  */
1701 #define STORE_JUMP(op, loc, to) \
1702   store_op1 (op, loc, (to) - (loc) - 3)
1703
1704 /* Likewise, for a two-argument jump.  */
1705 #define STORE_JUMP2(op, loc, to, arg) \
1706   store_op2 (op, loc, (to) - (loc) - 3, arg)
1707
1708 /* Like `STORE_JUMP', but for inserting.  Assume `buf_end' is the
1709    buffer end.  */
1710 #define INSERT_JUMP(op, loc, to) \
1711   insert_op1 (op, loc, (to) - (loc) - 3, buf_end)
1712
1713 /* Like `STORE_JUMP2', but for inserting.  Assume `buf_end' is the
1714    buffer end.  */
1715 #define INSERT_JUMP2(op, loc, to, arg) \
1716   insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end)
1717
1718 /* This is not an arbitrary limit: the arguments which represent offsets
1719    into the pattern are two bytes long.  So if 2^16 bytes turns out to
1720    be too small, many things would have to change.  */
1721 #define MAX_BUF_SIZE (1L << 16)
1722
1723 /* Extend the buffer by twice its current size via realloc and
1724    reset the pointers that pointed into the old block to point to the
1725    correct places in the new one.  If extending the buffer results in it
1726    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1727 #define EXTEND_BUFFER()                                                 \
1728         do {                                                            \
1729                 re_char *old_buffer = bufp->buffer;                     \
1730                 if (bufp->allocated == MAX_BUF_SIZE)                    \
1731                         return REG_ESIZE;                               \
1732                 bufp->allocated <<= 1;                                  \
1733                 if (bufp->allocated > MAX_BUF_SIZE)                     \
1734                         bufp->allocated = MAX_BUF_SIZE;                 \
1735                 bufp->buffer = (unsigned char *)xrealloc(               \
1736                         bufp->buffer, bufp->allocated);                 \
1737                 if (bufp->buffer == NULL) {                             \
1738                         return REG_ESPACE;                              \
1739                 }                                                       \
1740                 /* If the buffer moved, move all the pointers into it.  */ \
1741                 if (old_buffer != bufp->buffer) {                       \
1742                         buf_end = (buf_end - old_buffer) + bufp->buffer; \
1743                         begalt = (begalt - old_buffer) + bufp->buffer;  \
1744                         if (fixup_alt_jump) {                           \
1745                                 fixup_alt_jump =                        \
1746                                         (fixup_alt_jump - old_buffer) + \
1747                                         bufp->buffer;                   \
1748                         }                                               \
1749                         if (laststart) {                                \
1750                                 laststart = (laststart - old_buffer) +  \
1751                                         bufp->buffer;                   \
1752                         }                                               \
1753                         if (pending_exact) {                            \
1754                                 pending_exact =                         \
1755                                         (pending_exact - old_buffer) +  \
1756                                         bufp->buffer;                   \
1757                         }                                               \
1758                 }                                                       \
1759         } while (0)
1760
1761 /* Since we have one byte reserved for the register number argument to
1762    {start,stop}_memory, the maximum number of groups we can report
1763    things about is what fits in that byte.  */
1764 #define MAX_REGNUM 255
1765
1766 /* But patterns can have more than `MAX_REGNUM' registers.  We just
1767    ignore the excess.  */
1768 typedef unsigned regnum_t;
1769
1770 #define INIT_REG_TRANSLATE_SIZE 5
1771
1772 /* Macros for the compile stack.  */
1773
1774 /* Since offsets can go either forwards or backwards, this type needs to
1775    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1776 typedef int pattern_offset_t;
1777
1778 typedef struct {
1779         pattern_offset_t begalt_offset;
1780         pattern_offset_t fixup_alt_jump;
1781         pattern_offset_t inner_group_offset;
1782         pattern_offset_t laststart_offset;
1783         regnum_t regnum;
1784 } compile_stack_elt_t;
1785
1786 typedef struct {
1787         compile_stack_elt_t *stack;
1788         unsigned size;
1789         unsigned avail;         /* Offset of next open position.  */
1790 } compile_stack_type;
1791
1792 #define INIT_COMPILE_STACK_SIZE 32
1793
1794 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1795 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1796
1797 /* The next available element.  */
1798 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1799
1800 /* Set the bit for character C in a bit vector.  */
1801 #define SET_LIST_BIT(c)                         \
1802   (buf_end[((unsigned char) (c)) / BYTEWIDTH]   \
1803    |= 1 << (((unsigned char) c) % BYTEWIDTH))
1804
1805 #ifdef MULE
1806
1807 /* Set the "bit" for character C in a range table. */
1808 #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt)
1809
1810 /* Set the "bit" for character c in the appropriate table. */
1811 #define SET_EITHER_BIT(c)                       \
1812         do {                                    \
1813                 if (has_extended_chars)         \
1814                         SET_RANGETAB_BIT (c);   \
1815                 else                            \
1816                         SET_LIST_BIT (c);       \
1817         } while (0)
1818
1819 #else                           /* not MULE */
1820
1821 #define SET_EITHER_BIT(c) SET_LIST_BIT (c)
1822
1823 #endif
1824
1825 /* Get the next unsigned number in the uncompiled pattern.  */
1826 #define GET_UNSIGNED_NUMBER(num)                                        \
1827   { if (p != pend)                                                      \
1828      {                                                                  \
1829        PATFETCH (c);                                                    \
1830        while (ISDIGIT (c))                                              \
1831          {                                                              \
1832            if (num < 0)                                                 \
1833               num = 0;                                                  \
1834            num = num * 10 + c - '0';                                    \
1835            if (p == pend)                                               \
1836               break;                                                    \
1837            PATFETCH (c);                                                \
1838          }                                                              \
1839        }                                                                \
1840     }
1841
1842 #define CHAR_CLASS_MAX_LENGTH  9        /* Namely, `multibyte'.  */
1843
1844 \f
1845 static void store_op1(re_opcode_t op, unsigned char *loc, int arg);
1846 static void store_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2);
1847 static void insert_op1(re_opcode_t op, unsigned char *loc, int arg,
1848                        unsigned char *end);
1849 static void insert_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2,
1850                        unsigned char *end);
1851 static re_bool at_begline_loc_p(re_char*, re_char*, reg_syntax_t);
1852 static re_bool at_endline_loc_p(re_char*, re_char*, int syntax);
1853 static re_bool group_in_compile_stack(compile_stack_type compile_stack,
1854                                       regnum_t regnum);
1855 static reg_errcode_t compile_range(re_char ** p_ptr, re_char * pend,
1856                                    RE_TRANSLATE_TYPE translate,
1857                                    reg_syntax_t syntax, unsigned char *b);
1858 #ifdef MULE
1859 static reg_errcode_t compile_extended_range(re_char ** p_ptr,
1860                                             re_char * pend,
1861                                             RE_TRANSLATE_TYPE translate,
1862                                             reg_syntax_t syntax,
1863                                             Lisp_Object rtab);
1864 #endif                          /* MULE */
1865 static re_bool group_match_null_string_p(unsigned char **p,
1866                                          unsigned char *end,
1867                                          register_info_type * reg_info);
1868 static re_bool alt_match_null_string_p(unsigned char *p, unsigned char *end,
1869                                        register_info_type * reg_info);
1870 static re_bool common_op_match_null_string_p(unsigned char **p,
1871                                              unsigned char *end,
1872                                              register_info_type * reg_info);
1873 static int bcmp_translate(const unsigned char *s1, const unsigned char *s2,
1874                           REGISTER int len, RE_TRANSLATE_TYPE translate);
1875 static int re_match_2_internal(struct re_pattern_buffer *bufp,
1876                                re_char * string1, int size1,
1877                                re_char * string2, int size2, int pos,
1878                                struct re_registers *regs, int stop);
1879 \f
1880 #ifndef MATCH_MAY_ALLOCATE
1881
1882 /* If we cannot allocate large objects within re_match_2_internal,
1883    we make the fail stack and register vectors global.
1884    The fail stack, we grow to the maximum size when a regexp
1885    is compiled.
1886    The register vectors, we adjust in size each time we
1887    compile a regexp, according to the number of registers it needs.  */
1888
1889 static fail_stack_type fail_stack;
1890
1891 /* Size with which the following vectors are currently allocated.
1892    That is so we can make them bigger as needed,
1893    but never make them smaller.  */
1894 static int regs_allocated_size;
1895
1896 static re_char **regstart, **regend;
1897 static re_char **old_regstart, **old_regend;
1898 static re_char **best_regstart, **best_regend;
1899 static register_info_type *reg_info;
1900 static re_char **reg_dummy;
1901 static register_info_type *reg_info_dummy;
1902
1903 /* Make the register vectors big enough for NUM_REGS registers,
1904    but don't make them smaller.  */
1905
1906 static void regex_grow_registers(int num_regs)
1907 {
1908         if (num_regs > regs_allocated_size) {
1909                 RETALLOC_IF(regstart, num_regs, re_char *);
1910                 RETALLOC_IF(regend, num_regs, re_char *);
1911                 RETALLOC_IF(old_regstart, num_regs, re_char *);
1912                 RETALLOC_IF(old_regend, num_regs, re_char *);
1913                 RETALLOC_IF(best_regstart, num_regs, re_char *);
1914                 RETALLOC_IF(best_regend, num_regs, re_char *);
1915                 RETALLOC_IF(reg_info, num_regs, register_info_type);
1916                 RETALLOC_IF(reg_dummy, num_regs, re_char *);
1917                 RETALLOC_IF(reg_info_dummy, num_regs, register_info_type);
1918
1919                 regs_allocated_size = num_regs;
1920         }
1921 }
1922
1923 #endif                          /* not MATCH_MAY_ALLOCATE */
1924
1925 /* auxiliary stuff, FSF theft */
1926 /* Character classes.  */
1927 typedef enum {
1928         RECC_ERROR = 0,
1929         RECC_ALNUM, RECC_ALPHA, RECC_WORD,
1930         RECC_GRAPH, RECC_PRINT,
1931         RECC_LOWER, RECC_UPPER,
1932         RECC_PUNCT, RECC_CNTRL,
1933         RECC_DIGIT, RECC_XDIGIT,
1934         RECC_BLANK, RECC_SPACE,
1935         RECC_MULTIBYTE, RECC_NONASCII,
1936         RECC_ASCII, RECC_UNIBYTE
1937 } re_wctype_t;
1938
1939 /* Map a string to the char class it names (if any).  */
1940 static inline re_wctype_t
1941 re_wctype(re_char *str)
1942 {
1943         const char *string = (const char*)str;
1944
1945         if (STREQ (string, "alnum")) {
1946                 return RECC_ALNUM;
1947         } else if (STREQ (string, "alpha")) {
1948                 return RECC_ALPHA;
1949         } else if (STREQ (string, "digit")) {
1950                 return RECC_DIGIT;
1951         } else if (STREQ (string, "graph")) {
1952                 return RECC_GRAPH;
1953         } else if (STREQ (string, "space")) {
1954                 return RECC_SPACE;
1955         } else if (STREQ (string, "upper")) {
1956                 return RECC_UPPER;
1957         } else if (STREQ (string, "lower")) {
1958                 return RECC_LOWER;
1959         } else if (STREQ (string, "word")) {
1960                 return RECC_WORD;
1961         } else if (STREQ (string, "print")) {
1962                 return RECC_PRINT;
1963         } else if (STREQ (string, "punct")) {
1964                 return RECC_PUNCT;
1965         } else if (STREQ (string, "xdigit")) {
1966                 return RECC_XDIGIT;
1967         } else if (STREQ (string, "cntrl")) {
1968                 return RECC_CNTRL;
1969         } else if (STREQ (string, "blank")) {
1970                 return RECC_BLANK;
1971         } else if (STREQ (string, "ascii")) {
1972                 return RECC_ASCII;
1973         } else if (STREQ (string, "nonascii")) {
1974                 return RECC_NONASCII;
1975         } else if (STREQ (string, "unibyte")) {
1976                 return RECC_UNIBYTE;
1977         } else if (STREQ (string, "multibyte")) {
1978                 return RECC_MULTIBYTE;
1979         } else {
1980                 return RECC_ERROR;
1981         }
1982 }
1983
1984 /* True if CH is in the char class CC.  */
1985 static inline bool
1986 re_iswctype(int ch, re_wctype_t cc)
1987 {
1988         switch (cc) {
1989         case RECC_ALNUM:
1990                 return ISALNUM (ch);
1991         case RECC_ALPHA:
1992                 return ISALPHA (ch);
1993         case RECC_BLANK:
1994                 return ISBLANK (ch);
1995         case RECC_CNTRL:
1996                 return ISCNTRL (ch);
1997         case RECC_DIGIT:
1998                 return ISDIGIT (ch);
1999         case RECC_GRAPH:
2000                 return ISGRAPH (ch);
2001         case RECC_LOWER:
2002                 return ISLOWER (ch);
2003         case RECC_PRINT:
2004                 return ISPRINT (ch);
2005         case RECC_PUNCT:
2006                 return ISPUNCT (ch);
2007         case RECC_SPACE:
2008                 return ISSPACE (ch);
2009         case RECC_UPPER:
2010                 return ISUPPER (ch);
2011         case RECC_XDIGIT:
2012                 return ISXDIGIT (ch);
2013         case RECC_ASCII:
2014                 return IS_REAL_ASCII (ch);
2015         case RECC_NONASCII:
2016                 return !IS_REAL_ASCII (ch);
2017         case RECC_WORD:
2018                 return ISWORD(ch);
2019         case RECC_UNIBYTE:
2020                 return ISUNIBYTE((unsigned int)ch);
2021         case RECC_MULTIBYTE:
2022                 return !ISUNIBYTE((unsigned int)ch);
2023         case RECC_ERROR:
2024                 return false;
2025         default:
2026                 abort();
2027         }
2028         return false;
2029 }
2030
2031 \f
2032 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
2033    Returns one of error codes defined in `regex.h', or zero for success.
2034
2035    Assumes the `allocated' (and perhaps `buffer') and `translate'
2036    fields are set in BUFP on entry.
2037
2038    If it succeeds, results are put in BUFP (if it returns an error, the
2039    contents of BUFP are undefined):
2040      `buffer' is the compiled pattern;
2041      `syntax' is set to SYNTAX;
2042      `used' is set to the length of the compiled pattern;
2043      `fastmap_accurate' is zero;
2044      `re_ngroups' is the number of groups/subexpressions (including shy
2045         groups) in PATTERN;
2046      `re_nsub' is the number of non-shy groups in PATTERN;
2047      `not_bol' and `not_eol' are zero;
2048
2049    The `fastmap' and `newline_anchor' fields are neither
2050    examined nor set.  */
2051
2052 static reg_errcode_t
2053 regex_compile(re_char * pattern, int size, reg_syntax_t syntax,
2054               struct re_pattern_buffer *bufp)
2055 {
2056         /* We fetch characters from PATTERN here.  We declare these as int
2057            (or possibly long) so that chars above 127 can be used as
2058            array indices.  The macros that fetch a character from the pattern
2059            make sure to coerce to unsigned char before assigning, so we won't
2060            get bitten by negative numbers here. */
2061         /* XEmacs change: used to be unsigned char. */
2062         REGISTER EMACS_INT c, c1;
2063
2064         /* A random temporary spot in PATTERN.  */
2065         re_char *p1;
2066
2067         /* Points to the end of the buffer, where we should append.  */
2068         REGISTER unsigned char *buf_end;
2069
2070         /* Keeps track of unclosed groups.  */
2071         compile_stack_type compile_stack;
2072
2073         /* Points to the current (ending) position in the pattern.  */
2074         re_char *p = pattern;
2075         re_char *pend = pattern + size;
2076
2077         /* How to translate the characters in the pattern.  */
2078         RE_TRANSLATE_TYPE translate = bufp->translate;
2079
2080         /* Address of the count-byte of the most recently inserted `exactn'
2081            command.  This makes it possible to tell if a new exact-match
2082            character can be added to that command or if the character requires
2083            a new `exactn' command.  */
2084         unsigned char *pending_exact = 0;
2085
2086         /* Address of start of the most recently finished expression.
2087            This tells, e.g., postfix * where to find the start of its
2088            operand.  Reset at the beginning of groups and alternatives.  */
2089         unsigned char *laststart = 0;
2090
2091         /* Address of beginning of regexp, or inside of last group.  */
2092         unsigned char *begalt;
2093
2094         /* Place in the uncompiled pattern (i.e., the {) to
2095            which to go back if the interval is invalid.  */
2096         re_char *beg_interval;
2097
2098         /* Address of the place where a forward jump should go to the end of
2099            the containing expression.  Each alternative of an `or' -- except the
2100            last -- ends with a forward jump of this sort.  */
2101         unsigned char *fixup_alt_jump = 0;
2102
2103         /* Counts open-groups as they are encountered.  Remembered for the
2104            matching close-group on the compile stack, so the same register
2105            number is put in the stop_memory as the start_memory.  */
2106         regnum_t regnum = 0;
2107
2108         /* we need to close over compile_stack */
2109 #       define free_stack(args...)      xfree(compile_stack.stack)
2110
2111 #ifdef REGEX_DEBUG_FLAG
2112         DEBUG_PRINT1("\nCompiling pattern: ");
2113         if (debug) {
2114                 int debug_count;
2115
2116                 for (debug_count = 0; debug_count < size; debug_count++)
2117                         putchar(pattern[debug_count]);
2118                 putchar('\n');
2119         }
2120 #endif                          /* DEBUG */
2121
2122         /* Initialize the compile stack.  */
2123         compile_stack.stack =
2124                 TALLOC(INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2125         if (compile_stack.stack == NULL) {
2126                 return REG_ESPACE;
2127         }
2128         compile_stack.size = INIT_COMPILE_STACK_SIZE;
2129         compile_stack.avail = 0;
2130
2131         /* Initialize the pattern buffer.  */
2132         bufp->syntax = syntax;
2133         bufp->fastmap_accurate = 0;
2134         bufp->not_bol = bufp->not_eol = 0;
2135
2136         /* Set `used' to zero, so that if we return an error, the pattern
2137            printer (for debugging) will think there's no pattern.  We reset it
2138            at the end.  */
2139         bufp->used = 0;
2140
2141         /* Always count groups, whether or not bufp->no_sub is set.  */
2142         bufp->re_nsub = 0;
2143         bufp->re_ngroups = 0;
2144
2145         /* Allocate index translation array if needed. */
2146         if (bufp->external_to_internal_register == 0) {
2147                 bufp->external_to_internal_register_size =
2148                     INIT_REG_TRANSLATE_SIZE;
2149                 RETALLOC(bufp->external_to_internal_register,
2150                          bufp->external_to_internal_register_size, int);
2151         }
2152         /* Initialize translations to impossible value to aid debugging. */
2153         {
2154                 int i;
2155
2156                 bufp->external_to_internal_register[0] = 0;
2157                 for (i = 1; i < bufp->external_to_internal_register_size; i++)
2158                         bufp->external_to_internal_register[i] =
2159                             (int)0xDEADBEEF;
2160         }
2161
2162 #if !defined (emacs) && !defined (SYNTAX_TABLE)
2163         /* Initialize the syntax table.  */
2164         init_syntax_once();
2165 #endif
2166
2167         if (bufp->allocated == 0) {
2168                 if (bufp->buffer) {
2169                         /* If zero allocated, but buffer is non-null, try to
2170                            realloc enough space.  This loses if buffer's address
2171                            is bogus, but that is the user's responsibility.  */
2172                         RETALLOC(bufp->buffer, INIT_BUF_SIZE, unsigned char);
2173                 } else {
2174                         /* Caller did not allocate a buffer.  Do it for them. */
2175                         bufp->buffer = TALLOC(INIT_BUF_SIZE, unsigned char);
2176                 }
2177                 if (!bufp->buffer) {
2178                         free_stack();
2179                         return REG_ESPACE;
2180                 }
2181
2182                 bufp->allocated = INIT_BUF_SIZE;
2183         }
2184
2185         begalt = buf_end = bufp->buffer;
2186
2187         /* Loop through the uncompiled pattern until we're at the end.  */
2188         while (p != pend) {
2189                 PATFETCH(c);
2190
2191                 switch (c) {
2192                 case '^': {
2193                         if (    /* If at start of pattern, it's an operator.  */
2194                                 p == pattern + 1
2195                                 /* If context independent, it's an operator.  */
2196                                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2197                                 /* Otherwise, depends on what's come before.  */
2198                                 || at_begline_loc_p(pattern, p, syntax))
2199                                 BUF_PUSH(begline);
2200                         else
2201                                 goto normal_char;
2202                 }
2203                         break;
2204
2205                 case '$': {
2206                         if (    /* If at end of pattern, it's an
2207                                    operator.  */
2208                                 p == pend
2209                                 /* If context independent, it's an
2210                                    operator.  */
2211                                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2212                                 /* Otherwise, depends on what's
2213                                    next.  */
2214                                 || at_endline_loc_p(p, pend, syntax))
2215                                 BUF_PUSH(endline);
2216                         else
2217                                 goto normal_char;
2218                 }
2219                         break;
2220
2221                 case '+':
2222                 case '?':
2223                         if ((syntax & RE_BK_PLUS_QM) ||
2224                             (syntax & RE_LIMITED_OPS)) {
2225                                 goto normal_char;
2226                         }
2227                         /* pardon? */
2228                 handle_plus:
2229                 case '*': {
2230                         re_bool zero_times_ok;
2231                         re_bool many_times_ok;
2232                         re_bool minimal;
2233
2234                         /* If there is no previous pattern... */
2235                         if (!laststart) {
2236                                 if (syntax & RE_CONTEXT_INVALID_OPS) {
2237                                         free_stack();
2238                                         return REG_BADRPT;
2239                                 } else if (!(syntax & RE_CONTEXT_INDEP_OPS)) {
2240                                         goto normal_char;
2241                                 }
2242                         }
2243
2244                         /* true means zero/many matches are allowed. */
2245                         zero_times_ok = c != '+';
2246                         many_times_ok = c != '?';
2247
2248                         /* true means match shortest string possible. */
2249                         minimal = false;
2250
2251                         /* If there is a sequence of repetition chars, collapse it
2252                            down to just one (the right one).  We can't combine
2253                            interval operators with these because of, e.g., `a{2}*',
2254                            which should only match an even number of `a's.  */
2255                         while (p != pend) {
2256                                 PATFETCH(c);
2257
2258                                 if (c == '*'
2259                                     || (!(syntax & RE_BK_PLUS_QM)
2260                                         && (c == '+' || c == '?'))) ;
2261
2262                                 else if (syntax & RE_BK_PLUS_QM
2263                                          && c == '\\') {
2264                                         if (p == pend) {
2265                                                 free_stack();
2266                                                 return REG_EESCAPE;
2267                                         }
2268
2269                                         PATFETCH(c1);
2270                                         if (!(c1 == '+' || c1 == '?')) {
2271                                                 PATUNFETCH;
2272                                                 PATUNFETCH;
2273                                                 break;
2274                                         }
2275
2276                                         c = c1;
2277                                 } else {
2278                                         PATUNFETCH;
2279                                         break;
2280                                 }
2281
2282                                 /* If we get here, we found another repeat
2283                                    character.  */
2284                                 if (!(syntax & RE_NO_MINIMAL_MATCHING)) {
2285                                         /* "*?" and "+?" and "??" are okay (and
2286                                            mean match minimally), but other
2287                                            sequences (such as "*??" and "+++")
2288                                            are rejected (reserved for future
2289                                            use). */
2290                                         if (minimal || c != '?') {
2291                                                 free_stack();
2292                                                 return REG_BADRPT;
2293                                         }
2294                                         minimal = true;
2295                                 } else {
2296                                         zero_times_ok |= c != '+';
2297                                         many_times_ok |= c != '?';
2298                                 }
2299                         }
2300
2301                         /* Star, etc. applied to an empty pattern is equivalent
2302                            to an empty pattern.  */
2303                         if (!laststart) {
2304                                 break;
2305                         }
2306
2307                         /* Now we know whether zero matches is allowed
2308                            and whether two or more matches is allowed
2309                            and whether we want minimal or maximal matching. */
2310                         if (minimal) {
2311                                 if (!many_times_ok) {
2312                                         /* "a??" becomes:
2313                                            0: /on_failure_jump to 6
2314                                            3: /jump to 9
2315                                            6: /exactn/1/A
2316                                            9: end of pattern.
2317                                         */
2318                                         GET_BUFFER_SPACE(6);
2319                                         INSERT_JUMP(jump, laststart,
2320                                                     buf_end + 3);
2321                                         buf_end += 3;
2322                                         INSERT_JUMP(on_failure_jump,
2323                                                     laststart,
2324                                                     laststart + 6);
2325                                         buf_end += 3;
2326                                 } else if (zero_times_ok) {
2327                                         /* "a*?" becomes:
2328                                            0: /jump to 6
2329                                            3: /exactn/1/A
2330                                            6: /on_failure_jump to 3
2331                                            9: end of pattern.
2332                                         */
2333                                         GET_BUFFER_SPACE(6);
2334                                         INSERT_JUMP(jump, laststart,
2335                                                     buf_end + 3);
2336                                         buf_end += 3;
2337                                         STORE_JUMP(on_failure_jump,
2338                                                    buf_end,
2339                                                    laststart + 3);
2340                                         buf_end += 3;
2341                                 } else {
2342                                         /* "a+?" becomes:
2343                                            0: /exactn/1/A
2344                                            3: /on_failure_jump to 0
2345                                            6: end of pattern.
2346                                         */
2347                                         GET_BUFFER_SPACE(3);
2348                                         STORE_JUMP(on_failure_jump,
2349                                                    buf_end, laststart);
2350                                         buf_end += 3;
2351                                 }
2352                         } else {
2353                                 /* Are we optimizing this jump?  */
2354                                 re_bool keep_string_p = false;
2355
2356                                 if (many_times_ok) {
2357                                         /* More than one repetition is allowed,
2358                                            so put in at the end a backward
2359                                            relative jump from `buf_end' to
2360                                            before the next jump we're going to
2361                                            put in below (which jumps from
2362                                            laststart to after this jump).
2363
2364                                            But if we are at the `*' in the exact
2365                                            sequence `.*\n', insert an
2366                                            unconditional jump backwards to the
2367                                            ., instead of the beginning of the
2368                                            loop.  This way we only push a
2369                                            failure point once, instead of every
2370                                            time through the loop.  */
2371                                         assert(p - 1 > pattern);
2372
2373                                         /* Allocate the space for the jump.  */
2374                                         GET_BUFFER_SPACE(3);
2375
2376                                         /* We know we are not at the first
2377                                            character of the pattern, because
2378                                            laststart was nonzero.  And we've
2379                                            already incremented `p', by the way,
2380                                            to be the character after the `*'.
2381                                            Do we have to do something analogous
2382                                            here for null bytes, because of
2383                                            RE_DOT_NOT_NULL? */
2384                                         if (*(p - 2) == '.' &&
2385                                             zero_times_ok &&
2386                                             p < pend &&
2387                                             *p == '\n' &&
2388                                             !(syntax & RE_DOT_NEWLINE)) {
2389                                                 /* We have .*\n.  */
2390                                                 STORE_JUMP(jump,
2391                                                            buf_end,
2392                                                            laststart);
2393                                                 keep_string_p = true;
2394                                         } else {
2395                                                 /* Anything else.  */
2396                                                 STORE_JUMP
2397                                                         (maybe_pop_jump,
2398                                                          buf_end,
2399                                                          laststart - 3);
2400                                         }
2401                                         /* We've added more stuff to the
2402                                            buffer.  */
2403                                         buf_end += 3;
2404                                 }
2405
2406                                 /* On failure, jump from laststart to buf_end +
2407                                    3, which will be the end of the buffer after
2408                                    this jump is inserted.  */
2409                                 GET_BUFFER_SPACE(3);
2410                                 INSERT_JUMP(keep_string_p ?
2411                                             on_failure_keep_string_jump
2412                                             : on_failure_jump,
2413                                             laststart, buf_end + 3);
2414                                 buf_end += 3;
2415
2416                                 if (!zero_times_ok) {
2417                                         /* At least one repetition is required,
2418                                            so insert a `dummy_failure_jump'
2419                                            before the initial `on_failure_jump'
2420                                            instruction of the loop. This effects
2421                                            a skip over that instruction the
2422                                            first time we hit that loop.  */
2423                                         GET_BUFFER_SPACE(3);
2424                                         INSERT_JUMP(dummy_failure_jump,
2425                                                     laststart,
2426                                                     laststart + 6);
2427                                         buf_end += 3;
2428                                 }
2429                         }
2430                         pending_exact = 0;
2431                 }
2432                         break;
2433
2434                 case '.':
2435                         laststart = buf_end;
2436                         BUF_PUSH(anychar);
2437                         break;
2438
2439                 case '[': {
2440                         /* XEmacs change: this whole section */
2441                         re_bool had_char_class = false;
2442 #ifdef MULE
2443                         re_bool has_extended_chars = false;
2444                         REGISTER Lisp_Object rtab = Qnil;
2445 #endif
2446
2447                         if (p == pend) {
2448                                 free_stack();
2449                                 return REG_EBRACK;
2450                         }
2451
2452                         /* Ensure that we have enough space to push a charset:
2453                            the opcode, the length count, and the bitset; 34
2454                            bytes in all.  */
2455                         GET_BUFFER_SPACE(34);
2456
2457                         laststart = buf_end;
2458
2459                         /* We test `*p == '^' twice, instead of using an if
2460                            statement, so we only need one BUF_PUSH.  */
2461                         BUF_PUSH(*p == '^' ? charset_not : charset);
2462                         if (*p == '^')
2463                                 p++;
2464
2465                         /* Remember the first position in the bracket
2466                            expression.  */
2467                         p1 = p;
2468
2469                         /* Push the number of bytes in the bitmap.  */
2470                         BUF_PUSH((1 << BYTEWIDTH) / BYTEWIDTH);
2471
2472                         /* Clear the whole map.  */
2473                         memset(buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
2474
2475                         /* charset_not matches newline according to a syntax
2476                            bit.  */
2477                         if ((re_opcode_t) buf_end[-2] == charset_not
2478                             && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2479                                 SET_LIST_BIT('\n');
2480
2481 #ifdef MULE
2482                         start_over_with_extended:
2483                         if (has_extended_chars) {
2484                                 /* There are extended chars here, which means we
2485                                    need to start over and shift to unified
2486                                    range-table format. */
2487                                 if (buf_end[-2] == charset)
2488                                         buf_end[-2] = charset_mule;
2489                                 else
2490                                         buf_end[-2] = charset_mule_not;
2491                                 buf_end--;
2492                                 p = p1;
2493                                 /* go back to the beginning of the charset,
2494                                    after a possible ^. */
2495                                 rtab = Vthe_lisp_rangetab;
2496                                 Fclear_range_table(rtab);
2497
2498                                 /* charset_not matches newline according to a
2499                                    syntax bit.  */
2500                                 if ((re_opcode_t) buf_end[-1] ==
2501                                     charset_mule_not
2502                                     && (syntax &
2503                                         RE_HAT_LISTS_NOT_NEWLINE))
2504                                         SET_EITHER_BIT('\n');
2505                         }
2506 #endif                          /* MULE */
2507
2508                                 /* Read in characters and ranges, setting map bits.  */
2509                         for (;;) {
2510                                 if (p == pend) {
2511                                         free_stack();
2512                                         return REG_EBRACK;
2513                                 }
2514                                 PATFETCH(c);
2515
2516 #ifdef MULE
2517                                 if (c >= 0x80 && !has_extended_chars) {
2518                                         has_extended_chars = 1;
2519                                         /* Frumble-bumble, we've found some
2520                                            extended chars.  Need to start over,
2521                                            process everything using the general
2522                                            extended-char mechanism, and need to
2523                                            use charset_mule and charset_mule_not
2524                                            instead of charset and
2525                                            charset_not. */
2526                                         goto start_over_with_extended;
2527                                 }
2528 #endif                          /* MULE */
2529                                 /* \ might escape characters inside [...] and
2530                                    [^...].  */
2531                                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) &&
2532                                     c == '\\') {
2533                                         if (p == pend) {
2534                                                 free_stack();
2535                                                 return REG_EESCAPE;
2536                                         }
2537
2538                                         PATFETCH(c1);
2539 #ifdef MULE
2540                                         if (c1 >= 0x80
2541                                             && !has_extended_chars) {
2542                                                 has_extended_chars = 1;
2543                                                 goto start_over_with_extended;
2544                                         }
2545 #endif                          /* MULE */
2546                                         SET_EITHER_BIT(c1);
2547                                         continue;
2548                                 }
2549
2550                                 /* Could be the end of the bracket
2551                                    expression.  If it's not (i.e., when
2552                                    the bracket expression is `[]' so
2553                                    far), the ']' character bit gets set
2554                                    way below.  */
2555                                 if (c == ']' && p != p1 + 1)
2556                                         break;
2557
2558                                 /* Look ahead to see if it's a range
2559                                    when the last thing was a character
2560                                    class.  */
2561                                 if (had_char_class && c == '-'
2562                                     && *p != ']') {
2563                                         free_stack();
2564                                         return REG_ERANGE;
2565                                 }
2566
2567                                 /* Look ahead to see if it's a range
2568                                    when the last thing was a character:
2569                                    if this is a hyphen not at the
2570                                    beginning or the end of a list, then
2571                                    it's the range operator.  */
2572                                 if (c == '-'
2573                                     && !(p - 2 >= pattern
2574                                          && p[-2] == '[')
2575                                     && !(p - 3 >= pattern
2576                                          && p[-3] == '['
2577                                          && p[-2] == '^')
2578                                     && *p != ']') {
2579                                         reg_errcode_t ret;
2580
2581 #ifdef MULE
2582                                         if (*(const unsigned char *)p >= 0x80
2583                                             && !has_extended_chars) {
2584                                                 has_extended_chars = 1;
2585                                                 goto start_over_with_extended;
2586                                         }
2587                                         if (has_extended_chars)
2588                                                 ret = compile_extended_range
2589                                                         (&p, pend, translate,
2590                                                          syntax, rtab);
2591                                         else
2592 #endif                          /* MULE */
2593                                                 ret = compile_range
2594                                                         (&p, pend, translate,
2595                                                          syntax, buf_end);
2596                                         if (ret != REG_NOERROR) {
2597                                                 free_stack();
2598                                                 return ret;
2599                                         }
2600                                 }
2601
2602                                 else if (p[0] == '-' && p[1] != ']') {
2603                                         /* This handles ranges made up of
2604                                            characters only.  */
2605                                         reg_errcode_t ret;
2606
2607                                         /* Move past the `-'.  */
2608                                         PATFETCH(c1);
2609
2610 #ifdef MULE
2611                                         if (*(const unsigned char*)p >= 0x80
2612                                             && !has_extended_chars) {
2613                                                 has_extended_chars = 1;
2614                                                 goto start_over_with_extended;
2615                                         }
2616                                         if (has_extended_chars)
2617                                                 ret = compile_extended_range(
2618                                                         &p, pend, translate,
2619                                                         syntax, rtab);
2620                                         else
2621 #endif                          /* MULE */
2622                                                 ret = compile_range(
2623                                                         &p, pend, translate,
2624                                                         syntax, buf_end);
2625                                         if (ret != REG_NOERROR) {
2626                                                 free_stack();
2627                                                 return ret;
2628                                         }
2629                                 }
2630
2631                                 /* See if we're at the beginning of a possible
2632                                    character class.  */
2633
2634                                 else if (syntax & RE_CHAR_CLASSES &&
2635                                          c == '[' && *p == ':') {
2636                                         /* Leave room for the null.  */
2637                                         char str[CHAR_CLASS_MAX_LENGTH + 1];
2638
2639                                         PATFETCH(c);
2640                                         c1 = 0;
2641
2642                                         /* If pattern is `[[:'.  */
2643                                         if (p == pend) {
2644                                                 free_stack();
2645                                                 return REG_EBRACK;
2646                                         }
2647                                         for (;;) {
2648                                                 /* #### This code is unused.
2649                                                    Correctness is not checked
2650                                                    after TRT table change.  */
2651                                                 PATFETCH(c);
2652                                                 if (c == ':' || c == ']'
2653                                                     || p == pend
2654                                                     || c1 ==
2655                                                     CHAR_CLASS_MAX_LENGTH)
2656                                                         break;
2657                                                 str[c1++] = (char)c;
2658                                         }
2659                                         str[c1] = '\0';
2660
2661                                         /* If isn't a word bracketed by
2662                                            `[:' and `:]': undo the
2663                                            ending character, the
2664                                            letters, and leave the
2665                                            leading `:' and `[' (but set
2666                                            bits for them).  */
2667                                         if (c == ':' && *p == ']') {
2668                                                 re_wctype_t wct =
2669                                                         re_wctype(
2670                                                                 (unsigned char*)
2671                                                                 str);
2672
2673                                                 if (wct == RECC_ERROR) {
2674                                                         free_stack();
2675                                                         return REG_ECTYPE;
2676                                                 }
2677
2678                                                 /* Throw away the ] at
2679                                                    the end of the
2680                                                    character class.  */
2681                                                 PATFETCH(c);
2682
2683                                                 if (p == pend) {
2684                                                         free_stack();
2685                                                         return REG_EBRACK;
2686                                                 }
2687
2688                                                 for (int ch = 0;
2689                                                      ch < 1<<BYTEWIDTH;
2690                                                      ch++) {
2691                                                         /* rmsmacs has
2692                                                            this */
2693 #if 0
2694                                                         int translated =
2695                                                                 TRANSLATE(ch);
2696                                                         if (translated <
2697                                                             (1 << BYTEWIDTH) &&
2698                                                             re_iswctype(ch, wct)) {
2699                                                                 SET_EITHER_BIT(ch);
2700                                                         }
2701 #else
2702                                                         if (re_iswctype(ch, wct)) {
2703                                                                 SET_EITHER_BIT(ch);
2704                                                         }
2705 #endif
2706                                                 }
2707                                                 had_char_class = true;
2708                                         } else {
2709                                                 c1++;
2710                                                 while (c1--) {
2711                                                         PATUNFETCH;
2712                                                 }
2713                                                 SET_EITHER_BIT('[');
2714                                                 SET_EITHER_BIT(':');
2715                                                 had_char_class = false;
2716                                         }
2717                                 } else {
2718                                         had_char_class = false;
2719                                         SET_EITHER_BIT(c);
2720                                 }
2721                         }
2722
2723 #ifdef MULE
2724                         if (has_extended_chars) {
2725                                 /* We have a range table, not a bit vector. */
2726                                 int bytes_needed =
2727                                         unified_range_table_bytes_needed
2728                                         (rtab);
2729                                 GET_BUFFER_SPACE(bytes_needed);
2730                                 unified_range_table_copy_data(rtab,
2731                                                               buf_end);
2732                                 buf_end +=
2733                                         unified_range_table_bytes_used
2734                                         (buf_end);
2735                                 break;
2736                         }
2737 #endif                          /* MULE */
2738                                 /* Discard any (non)matching list bytes that are
2739                                    all 0 at the end of the map.  Decrease the
2740                                    map-length byte too.  */
2741                         while ((int)buf_end[-1] > 0
2742                                && buf_end[buf_end[-1] - 1] == 0)
2743                                 buf_end[-1]--;
2744                         buf_end += buf_end[-1];
2745                 }
2746                         break;
2747
2748                 case '(':
2749                         if (syntax & RE_NO_BK_PARENS)
2750                                 goto handle_open;
2751                         else
2752                                 goto normal_char;
2753
2754                 case ')':
2755                         if (syntax & RE_NO_BK_PARENS)
2756                                 goto handle_close;
2757                         else
2758                                 goto normal_char;
2759
2760                 case '\n':
2761                         if (syntax & RE_NEWLINE_ALT)
2762                                 goto handle_alt;
2763                         else
2764                                 goto normal_char;
2765
2766                 case '|':
2767                         if (syntax & RE_NO_BK_VBAR)
2768                                 goto handle_alt;
2769                         else
2770                                 goto normal_char;
2771
2772                 case '{':
2773                         if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2774                                 goto handle_interval;
2775                         else
2776                                 goto normal_char;
2777
2778                 case '\\':
2779                         if (p == pend) {
2780                                 free_stack();
2781                                 return REG_EESCAPE;
2782                         }
2783
2784                         /* Do not translate the character after the \, so that we can
2785                            distinguish, e.g., \B from \b, even if we normally would
2786                            translate, e.g., B to b.  */
2787                         PATFETCH_RAW(c);
2788
2789                         switch (c) {
2790                         case '(':
2791                                 if (syntax & RE_NO_BK_PARENS)
2792                                         goto normal_backslash;
2793
2794                               handle_open:
2795                                 {
2796                                         regnum_t r;
2797                                         int shy = 0;
2798
2799                                         if (!(syntax & RE_NO_SHY_GROUPS)
2800                                             && p != pend && *p == '?') {
2801                                                 p++;
2802                                                 PATFETCH(c);
2803                                                 switch (c) {
2804                                                 case ':':
2805                                                         /* shy groups */
2806                                                         shy = 1;
2807                                                         break;
2808
2809                                                         /* All others are
2810                                                            reserved for future
2811                                                            constructs. */
2812                                                 default:
2813                                                         free_stack();
2814                                                         return REG_BADPAT;
2815                                                 }
2816                                         }
2817
2818                                         r = ++regnum;
2819                                         bufp->re_ngroups++;
2820                                         if (!shy) {
2821                                                 /* Record the translation from capturing group index to
2822                                                    register number, reallocating table as needed. */
2823                                                 bufp->re_nsub++;
2824                                                 while (bufp->
2825                                                        external_to_internal_register_size
2826                                                        <= bufp->re_nsub) {
2827                                                         int i;
2828                                                         int old_size =
2829                                                             bufp->
2830                                                             external_to_internal_register_size;
2831                                                         bufp->
2832                                                             external_to_internal_register_size
2833                                                             += 5;
2834                                                         RETALLOC(bufp->
2835                                                                  external_to_internal_register,
2836                                                                  bufp->
2837                                                                  external_to_internal_register_size,
2838                                                                  int);
2839                                                         /* debugging */
2840                                                         for (i = old_size;
2841                                                              i <
2842                                                              bufp->
2843                                                              external_to_internal_register_size;
2844                                                              i++)
2845                                                                 bufp->
2846                                                                     external_to_internal_register
2847                                                                     [i] =
2848                                                                     (int)
2849                                                                     0xDEADBEEF;
2850                                                 }
2851
2852                                                 bufp->
2853                                                     external_to_internal_register
2854                                                     [bufp->re_nsub] =
2855                                                     bufp->re_ngroups;
2856                                         }
2857
2858                                         if (COMPILE_STACK_FULL) {
2859                                                 RETALLOC(compile_stack.stack,
2860                                                          compile_stack.
2861                                                          size << 1,
2862                                                          compile_stack_elt_t);
2863                                                 if (compile_stack.stack == NULL)
2864                                                         return REG_ESPACE;
2865
2866                                                 compile_stack.size <<= 1;
2867                                         }
2868
2869                                         /* These are the values to restore when
2870                                            we hit end of this group.  They are
2871                                            all relative offsets, so that if the
2872                                            whole pattern moves because of
2873                                            realloc, they will still be
2874                                            valid.  */
2875                                         COMPILE_STACK_TOP.begalt_offset =
2876                                                 begalt - bufp->buffer;
2877                                         COMPILE_STACK_TOP.fixup_alt_jump =
2878                                                 fixup_alt_jump
2879                                                 ? fixup_alt_jump -
2880                                                 bufp->buffer + 1
2881                                                 : 0;
2882                                         COMPILE_STACK_TOP.laststart_offset =
2883                                                 buf_end - bufp->buffer;
2884                                         COMPILE_STACK_TOP.regnum = r;
2885
2886                                         /* We will eventually replace the 0 with
2887                                            the number of groups inner to this
2888                                            one.  But do not push a start_memory
2889                                            for groups beyond the last one we can
2890                                            represent in the compiled pattern.
2891                                            #### bad bad bad.  this will fail in
2892                                            lots of ways, if we ever have to
2893                                            backtrack for these groups.
2894                                          */
2895                                         if (r <= MAX_REGNUM) {
2896                                                 COMPILE_STACK_TOP.
2897                                                         inner_group_offset =
2898                                                         buf_end - bufp->buffer +
2899                                                         2;
2900                                                 BUF_PUSH_3(start_memory, r, 0);
2901                                         }
2902
2903                                         compile_stack.avail++;
2904
2905                                         fixup_alt_jump = 0;
2906                                         laststart = 0;
2907                                         begalt = buf_end;
2908                                         /* If we've reached MAX_REGNUM groups,
2909                                            then this open won't actually
2910                                            generate any code, so we'll have to
2911                                            clear pending_exact explicitly.  */
2912                                         pending_exact = 0;
2913                                 }
2914                                 break;
2915
2916                         case ')':
2917                                 if (syntax & RE_NO_BK_PARENS)
2918                                         goto normal_backslash;
2919
2920                                 if (COMPILE_STACK_EMPTY) {
2921                                         if (syntax &
2922                                             RE_UNMATCHED_RIGHT_PAREN_ORD) {
2923                                                 goto normal_backslash;
2924                                         } else {
2925                                                 free_stack();
2926                                                 return REG_ERPAREN;
2927                                         }
2928                                 }
2929
2930                               handle_close:
2931                                 if (fixup_alt_jump) {
2932                                         /* Push a dummy failure point at the end
2933                                            of the alternative for a possible
2934                                            future `pop_failure_jump' to pop.
2935                                            See comments at `push_dummy_failure'
2936                                            in `re_match_2'. */
2937                                         BUF_PUSH(push_dummy_failure);
2938
2939                                         /* We allocated space for this jump when
2940                                            we assigned to `fixup_alt_jump', in
2941                                            the `handle_alt' case below.  */
2942                                         STORE_JUMP(jump_past_alt,
2943                                                    fixup_alt_jump, buf_end - 1);
2944                                 }
2945
2946                                 /* See similar code for backslashed left paren
2947                                    above.  */
2948                                 if (COMPILE_STACK_EMPTY) {
2949                                         if (syntax &
2950                                             RE_UNMATCHED_RIGHT_PAREN_ORD) {
2951                                                 goto normal_char;
2952                                         } else {
2953                                                 free_stack();
2954                                                 return REG_EPAREN;
2955                                         }
2956                                 }
2957
2958                                 /* Since we just checked for an empty stack
2959                                    above, this ``can't happen''.  */
2960                                 assert(compile_stack.avail != 0);
2961                                 {
2962                                         /* We don't just want to restore into
2963                                            `regnum', because later groups should
2964                                            continue to be numbered higher, as in
2965                                            `(ab)c(de)' -- the second group is
2966                                            #2.  */
2967                                         regnum_t this_group_regnum;
2968
2969                                         compile_stack.avail--;
2970                                         begalt =
2971                                                 bufp->buffer +
2972                                                 COMPILE_STACK_TOP.begalt_offset;
2973                                         fixup_alt_jump =
2974                                                 COMPILE_STACK_TOP.
2975                                                 fixup_alt_jump
2976                                                 ? bufp->buffer +
2977                                                 COMPILE_STACK_TOP.
2978                                                 fixup_alt_jump - 1
2979                                                 : 0;
2980                                         laststart =
2981                                                 bufp->buffer +
2982                                                 COMPILE_STACK_TOP.
2983                                                 laststart_offset;
2984                                         this_group_regnum =
2985                                                 COMPILE_STACK_TOP.regnum;
2986                                         /* If we've reached MAX_REGNUM groups,
2987                                            then this open won't actually
2988                                            generate any code, so we'll have to
2989                                            clear pending_exact explicitly.  */
2990                                         pending_exact = 0;
2991
2992                                         /* We're at the end of the group, so now
2993                                            we know how many groups were inside
2994                                            this one.  */
2995                                         if (this_group_regnum <= MAX_REGNUM) {
2996                                                 unsigned char *inner_group_loc
2997                                                         =
2998                                                         bufp->buffer +
2999                                                         COMPILE_STACK_TOP.
3000                                                         inner_group_offset;
3001
3002                                                 *inner_group_loc =
3003                                                         regnum -
3004                                                         this_group_regnum;
3005                                                 BUF_PUSH_3(stop_memory,
3006                                                            this_group_regnum,
3007                                                            regnum -
3008                                                            this_group_regnum);
3009                                         }
3010                                 }
3011                                 break;
3012
3013                         case '|':       /* `\|'.  */
3014                                 if (syntax & RE_LIMITED_OPS
3015                                     || syntax & RE_NO_BK_VBAR) {
3016                                         goto normal_backslash;
3017                                 }
3018                         handle_alt:
3019                                 if (syntax & RE_LIMITED_OPS) {
3020                                         goto normal_char;
3021                                 }
3022
3023                                 /* Insert before the previous alternative a jump
3024                                    which jumps to this alternative if the former
3025                                    fails.  */
3026                                 GET_BUFFER_SPACE(3);
3027                                 INSERT_JUMP(on_failure_jump, begalt,
3028                                             buf_end + 6);
3029                                 pending_exact = 0;
3030                                 buf_end += 3;
3031
3032                                 /* The alternative before this one has a jump after it
3033                                    which gets executed if it gets matched.  Adjust that
3034                                    jump so it will jump to this alternative's analogous
3035                                    jump (put in below, which in turn will jump to the next
3036                                    (if any) alternative's such jump, etc.).  The last such
3037                                    jump jumps to the correct final destination.  A picture:
3038                                    _____ _____
3039                                    |   | |   |
3040                                    |   v |   v
3041                                    a | b   | c
3042
3043                                    If we are at `b', then fixup_alt_jump right now points to a
3044                                    three-byte space after `a'.  We'll put in the jump, set
3045                                    fixup_alt_jump to right after `b', and leave behind three
3046                                    bytes which we'll fill in when we get to after `c'.  */
3047
3048                                 if (fixup_alt_jump)
3049                                         STORE_JUMP(jump_past_alt,
3050                                                    fixup_alt_jump, buf_end);
3051
3052                                 /* Mark and leave space for a jump after this alternative,
3053                                    to be filled in later either by next alternative or
3054                                    when know we're at the end of a series of alternatives.  */
3055                                 fixup_alt_jump = buf_end;
3056                                 GET_BUFFER_SPACE(3);
3057                                 buf_end += 3;
3058
3059                                 laststart = 0;
3060                                 begalt = buf_end;
3061                                 break;
3062
3063                         case '{':
3064                                 /* If \{ is a literal.  */
3065                                 if (!(syntax & RE_INTERVALS)
3066                                     /* If we're at `\{' and it's not the open-interval
3067                                        operator.  */
3068                                     || ((syntax & RE_INTERVALS)
3069                                         && (syntax & RE_NO_BK_BRACES))
3070                                     || (p - 2 == pattern && p == pend))
3071                                         goto normal_backslash;
3072
3073                               handle_interval:
3074                                 {
3075                                         /* If got here, then the syntax allows
3076                                            intervals.  */
3077
3078                                         /* At least (most) this many matches
3079                                            must be made.  */
3080                                         int lower_bound = -1, upper_bound = -1;
3081
3082                                         beg_interval = p - 1;
3083
3084                                         if (p == pend) {
3085                                                 if (syntax & RE_NO_BK_BRACES) {
3086                                                         goto unfetch_interval;
3087                                                 } else {
3088                                                         free_stack();
3089                                                         return REG_EBRACE;
3090                                                 }
3091                                         }
3092
3093                                         GET_UNSIGNED_NUMBER(lower_bound);
3094
3095                                         if (c == ',') {
3096                                                 GET_UNSIGNED_NUMBER(
3097                                                         upper_bound);
3098                                                 if (upper_bound < 0) {
3099                                                         upper_bound =
3100                                                                 RE_DUP_MAX;
3101                                                 }
3102                                         } else {
3103                                                 /* Interval such as `{1}' =>
3104                                                    match exactly once. */
3105                                                 upper_bound = lower_bound;
3106                                         }
3107
3108                                         if (lower_bound < 0
3109                                             || upper_bound > RE_DUP_MAX
3110                                             || lower_bound > upper_bound) {
3111                                                 if (syntax & RE_NO_BK_BRACES) {
3112                                                         goto unfetch_interval;
3113                                                 } else {
3114                                                         free_stack();
3115                                                         return REG_BADBR;
3116                                                 }
3117                                         }
3118
3119                                         if (!(syntax & RE_NO_BK_BRACES)) {
3120                                                 if (c != '\\') {
3121                                                         free_stack();
3122                                                         return REG_EBRACE;
3123                                                 }
3124                                                 PATFETCH(c);
3125                                         }
3126
3127                                         if (c != '}') {
3128                                                 if (syntax & RE_NO_BK_BRACES) {
3129                                                         goto unfetch_interval;
3130                                                 } else {
3131                                                         free_stack();
3132                                                         return REG_BADBR;
3133                                                 }
3134                                         }
3135
3136                                         /* We just parsed a valid interval.  */
3137
3138                                         /* If it's invalid to have no preceding
3139                                            re.  */
3140                                         if (!laststart) {
3141                                                 if (syntax &
3142                                                     RE_CONTEXT_INVALID_OPS) {
3143                                                         free_stack();
3144                                                         return REG_BADRPT;
3145                                                 } else if (
3146                                                         syntax &
3147                                                         RE_CONTEXT_INDEP_OPS) {
3148                                                         laststart = buf_end;
3149                                                 } else {
3150                                                         goto unfetch_interval;
3151                                                 }
3152                                         }
3153
3154                                         /* If the upper bound is zero, don't
3155                                            want to succeed at all; jump from
3156                                            `laststart' to `b + 3', which will be
3157                                            the end of the buffer after we insert
3158                                            the jump.  */
3159                                         if (upper_bound == 0) {
3160                                                 GET_BUFFER_SPACE(3);
3161                                                 INSERT_JUMP(jump, laststart,
3162                                                             buf_end + 3);
3163                                                 buf_end += 3;
3164                                         }
3165
3166                                         /* Otherwise, we have a nontrivial interval.  When
3167                                            we're all done, the pattern will look like:
3168                                            set_number_at <jump count> <upper bound>
3169                                            set_number_at <succeed_n count> <lower bound>
3170                                            succeed_n <after jump addr> <succeed_n count>
3171                                            <body of loop>
3172                                            jump_n <succeed_n addr> <jump count>
3173                                            (The upper bound and `jump_n' are omitted if
3174                                            `upper_bound' is 1, though.)  */
3175                                         else {  /* If the upper bound is > 1, we need to insert
3176                                                    more at the end of the loop.  */
3177                                                 Memory_count nbytes =
3178                                                     10 + (upper_bound > 1) * 10;
3179
3180                                                 GET_BUFFER_SPACE(nbytes);
3181
3182                                                 /* Initialize lower bound of the `succeed_n', even
3183                                                    though it will be set during matching by its
3184                                                    attendant `set_number_at' (inserted next),
3185                                                    because `re_compile_fastmap' needs to know.
3186                                                    Jump to the `jump_n' we might insert below.  */
3187                                                 INSERT_JUMP2(succeed_n,
3188                                                              laststart,
3189                                                              buf_end + 5 +
3190                                                              (upper_bound >
3191                                                               1) * 5,
3192                                                              lower_bound);
3193                                                 buf_end += 5;
3194
3195                                                 /* Code to initialize the lower bound.  Insert
3196                                                    before the `succeed_n'.  The `5' is the last two
3197                                                    bytes of this `set_number_at', plus 3 bytes of
3198                                                    the following `succeed_n'.  */
3199                                                 insert_op2(set_number_at,
3200                                                            laststart, 5,
3201                                                            lower_bound,
3202                                                            buf_end);
3203                                                 buf_end += 5;
3204
3205                                                 if (upper_bound > 1) {  /* More than one repetition is allowed, so
3206                                                                            append a backward jump to the `succeed_n'
3207                                                                            that starts this interval.
3208
3209                                                                            When we've reached this during matching,
3210                                                                            we'll have matched the interval once, so
3211                                                                            jump back only `upper_bound - 1' times.  */
3212                                                         STORE_JUMP2(jump_n,
3213                                                                     buf_end,
3214                                                                     laststart +
3215                                                                     5,
3216                                                                     upper_bound
3217                                                                     - 1);
3218                                                         buf_end += 5;
3219
3220                                                         /* The location we want to set is the second
3221                                                            parameter of the `jump_n'; that is `b-2' as
3222                                                            an absolute address.  `laststart' will be
3223                                                            the `set_number_at' we're about to insert;
3224                                                            `laststart+3' the number to set, the source
3225                                                            for the relative address.  But we are
3226                                                            inserting into the middle of the pattern --
3227                                                            so everything is getting moved up by 5.
3228                                                            Conclusion: (b - 2) - (laststart + 3) + 5,
3229                                                            i.e., b - laststart.
3230
3231                                                            We insert this at the beginning of the loop
3232                                                            so that if we fail during matching, we'll
3233                                                            reinitialize the bounds.  */
3234                                                         insert_op2
3235                                                             (set_number_at,
3236                                                              laststart,
3237                                                              buf_end -
3238                                                              laststart,
3239                                                              upper_bound - 1,
3240                                                              buf_end);
3241                                                         buf_end += 5;
3242                                                 }
3243                                         }
3244                                         pending_exact = 0;
3245                                         beg_interval = NULL;
3246                                 }
3247                                 break;
3248
3249                               unfetch_interval:
3250                                 /* If an invalid interval, match the characters as literals.  */
3251                                 assert(beg_interval);
3252                                 p = beg_interval;
3253                                 beg_interval = NULL;
3254
3255                                 /* normal_char and normal_backslash need `c'.  */
3256                                 PATFETCH(c);
3257
3258                                 if (!(syntax & RE_NO_BK_BRACES)) {
3259                                         if (p > pattern && p[-1] == '\\')
3260                                                 goto normal_backslash;
3261                                 }
3262                                 goto normal_char;
3263
3264 #ifdef emacs
3265                                 /* There is no way to specify the before_dot and after_dot
3266                                    operators.  rms says this is ok.  --karl  */
3267                         case '=':
3268                                 BUF_PUSH(at_dot);
3269                                 break;
3270
3271                         case 's':
3272                                 laststart = buf_end;
3273                                 PATFETCH(c);
3274                                 /* XEmacs addition */
3275                                 if (c >= 0x80 || syntax_spec_code[c] == 0377) {
3276                                         free_stack();
3277                                         return REG_ESYNTAX;
3278                                 }
3279                                 BUF_PUSH_2(syntaxspec, syntax_spec_code[c]);
3280                                 break;
3281
3282                         case 'S':
3283                                 laststart = buf_end;
3284                                 PATFETCH(c);
3285                                 /* XEmacs addition */
3286                                 if (c >= 0x80 || syntax_spec_code[c] == 0377) {
3287                                         free_stack();
3288                                         return REG_ESYNTAX;
3289                                 }
3290                                 BUF_PUSH_2(notsyntaxspec, syntax_spec_code[c]);
3291                                 break;
3292
3293 #ifdef MULE
3294 /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */
3295                         case 'c':
3296                                 laststart = buf_end;
3297                                 PATFETCH_RAW(c);
3298                                 if (c < 32 || c > 127) {
3299                                         free_stack();
3300                                         return REG_ECATEGORY;
3301                                 }
3302                                 BUF_PUSH_2(categoryspec, c);
3303                                 break;
3304
3305                         case 'C':
3306                                 laststart = buf_end;
3307                                 PATFETCH_RAW(c);
3308                                 if (c < 32 || c > 127) {
3309                                         free_stack();
3310                                         return REG_ECATEGORY;
3311                                 }
3312                                 BUF_PUSH_2(notcategoryspec, c);
3313                                 break;
3314 /* end of category patch */
3315 #endif                          /* MULE */
3316 #endif                          /* emacs */
3317
3318                         case 'w':
3319                                 laststart = buf_end;
3320                                 BUF_PUSH(wordchar);
3321                                 break;
3322
3323                         case 'W':
3324                                 laststart = buf_end;
3325                                 BUF_PUSH(notwordchar);
3326                                 break;
3327
3328                         case '<':
3329                                 BUF_PUSH(wordbeg);
3330                                 break;
3331
3332                         case '>':
3333                                 BUF_PUSH(wordend);
3334                                 break;
3335
3336                         case 'b':
3337                                 BUF_PUSH(wordbound);
3338                                 break;
3339
3340                         case 'B':
3341                                 BUF_PUSH(notwordbound);
3342                                 break;
3343
3344                         case '`':
3345                                 BUF_PUSH(begbuf);
3346                                 break;
3347
3348                         case '\'':
3349                                 BUF_PUSH(endbuf);
3350                                 break;
3351
3352                         case '1':
3353                         case '2':
3354                         case '3':
3355                         case '4':
3356                         case '5':
3357                         case '6':
3358                         case '7':
3359                         case '8':
3360                         case '9': {
3361                                 int reg;
3362
3363                                 if (syntax & RE_NO_BK_REFS) {
3364                                         goto normal_char;
3365                                 }
3366                                 /* External register indexing. */
3367                                 reg = c - '0';
3368
3369                                 if (reg > bufp->re_nsub) {
3370                                         free_stack();
3371                                         return REG_ESUBREG;
3372                                 }
3373
3374                                 /* Convert external to internal as soon
3375                                    as possible. */
3376                                 reg = bufp->external_to_internal_register[reg];
3377
3378                                 /* Can't back reference to a
3379                                    subexpression if inside it. */
3380                                 if (group_in_compile_stack(
3381                                             compile_stack, reg)) {
3382                                         goto normal_char;
3383                                 }
3384                                 laststart = buf_end;
3385                                 BUF_PUSH_2(duplicate, reg);
3386                         }
3387                                 break;
3388
3389                         case '+':
3390                         case '?':
3391                                 if (syntax & RE_BK_PLUS_QM) {
3392                                         goto handle_plus;
3393                                 } else {
3394                                         goto normal_backslash;
3395                                 }
3396                         default:
3397                         normal_backslash:
3398                                 /* You might think it would be useful for \ to
3399                                    mean not to translate; but if we don't
3400                                    translate it, it will never match
3401                                    anything.  */
3402                                 c = TRANSLATE(c);
3403                                 goto normal_char;
3404                         }
3405                         break;
3406
3407                         /* Expects the character in `c'.  */
3408                         /* `p' points to the location after where `c' came
3409                            from. */
3410                 normal_char:
3411                 default: {
3412                         /* XEmacs: modifications here for Mule. */
3413                         /* `q' points to the beginning of the next char. */
3414                         re_char *q = p;
3415
3416                         /* If no exactn currently being built.  */
3417                         if (!pending_exact
3418                             /* If last exactn not at current position.  */
3419                             || pending_exact + *pending_exact + 1 !=
3420                             buf_end
3421                             /* We have only one byte following the exactn for
3422                                the count. */
3423                             || ((unsigned int)(*pending_exact + (q - p)) >=
3424                                 ((unsigned int)(1 << BYTEWIDTH) - 1))
3425
3426                             /* If followed by a repetition operator.  */
3427                             || *q == '*' || *q == '^'
3428                             || ((syntax & RE_BK_PLUS_QM)
3429                                 ? *q == '\\' && (q[1] == '+'
3430                                                  || q[1] == '?')
3431                                 : (*q == '+' || *q == '?'))
3432                             || ((syntax & RE_INTERVALS)
3433                                 && ((syntax & RE_NO_BK_BRACES)
3434                                     ? *q == '{'
3435                                     : (q[0] == '\\' && q[1] == '{')))) {
3436                                 /* Start building a new exactn.  */
3437
3438                                 laststart = buf_end;
3439
3440                                 BUF_PUSH_2(exactn, 0);
3441                                 pending_exact = buf_end - 1;
3442                         }
3443 #ifndef MULE
3444                         BUF_PUSH(c);
3445                         (*pending_exact)++;
3446 #else
3447                         {
3448                                 Bytecount bt_count;
3449                                 Bufbyte tmp_buf[MAX_EMCHAR_LEN];
3450                                 int i;
3451
3452                                 bt_count =
3453                                         set_charptr_emchar(tmp_buf, c);
3454
3455                                 for (i = 0; i < bt_count; i++) {
3456                                         BUF_PUSH(tmp_buf[i]);
3457                                         (*pending_exact)++;
3458                                 }
3459                         }
3460 #endif
3461                         break;
3462                 }
3463                 }               /* switch (c) */
3464         }                       /* while p != pend */
3465
3466         /* Through the pattern now.  */
3467
3468         if (fixup_alt_jump) {
3469                 STORE_JUMP(jump_past_alt, fixup_alt_jump, buf_end);
3470         }
3471         if (!COMPILE_STACK_EMPTY) {
3472                 free_stack();
3473                 return REG_EPAREN;
3474         }
3475         /* If we don't want backtracking, force success
3476            the first time we reach the end of the compiled pattern.  */
3477         if (syntax & RE_NO_POSIX_BACKTRACKING) {
3478                 BUF_PUSH(succeed);
3479         }
3480         xfree(compile_stack.stack);
3481
3482         /* We have succeeded; set the length of the buffer.  */
3483         bufp->used = buf_end - bufp->buffer;
3484
3485 #ifdef REGEX_DEBUG_FLAG
3486         if (debug) {
3487                 DEBUG_PRINT1("\nCompiled pattern: \n");
3488                 print_compiled_pattern(bufp);
3489         }
3490 #endif                          /* DEBUG */
3491
3492 #ifndef MATCH_MAY_ALLOCATE
3493         /* Initialize the failure stack to the largest possible stack.  This
3494            isn't necessary unless we're trying to avoid calling alloca in
3495            the search and match routines.  */
3496         {
3497                 int num_regs = bufp->re_ngroups + 1;
3498
3499                 /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
3500                    is strictly greater than re_max_failures, the largest possible stack
3501                    is 2 * re_max_failures failure points.  */
3502                 if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) {
3503                         fail_stack.size =
3504                             (2 * re_max_failures * MAX_FAILURE_ITEMS);
3505
3506                         if (!fail_stack.stack) {
3507                                 fail_stack.stack =
3508                                         (fail_stack_elt_t *)
3509                                         xmalloc_atomic(
3510                                                 fail_stack.size *
3511                                                 sizeof(fail_stack_elt_t));
3512                         } else {
3513                                 fail_stack.stack =
3514                                         (fail_stack_elt_t *)
3515                                         xrealloc(fail_stack.stack,
3516                                                  (fail_stack.size *
3517                                                   sizeof(fail_stack_elt_t)));
3518                         }
3519                 }
3520
3521                 regex_grow_registers(num_regs);
3522         }
3523 #endif  /* not MATCH_MAY_ALLOCATE */
3524
3525         return REG_NOERROR;
3526 }
3527 /* regex_compile */
3528 \f
3529 /* Subroutines for `regex_compile'.  */
3530
3531 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
3532
3533 static void store_op1(re_opcode_t op, unsigned char *loc, int arg)
3534 {
3535         *loc = (unsigned char)op;
3536         STORE_NUMBER(loc + 1, arg);
3537 }
3538
3539 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
3540
3541 static void store_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2)
3542 {
3543         *loc = (unsigned char)op;
3544         STORE_NUMBER(loc + 1, arg1);
3545         STORE_NUMBER(loc + 3, arg2);
3546 }
3547
3548 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
3549    for OP followed by two-byte integer parameter ARG.  */
3550
3551 static void
3552 insert_op1(re_opcode_t op, unsigned char *loc, int arg, unsigned char *end)
3553 {
3554         REGISTER unsigned char *pfrom = end;
3555         REGISTER unsigned char *pto = end + 3;
3556
3557         while (pfrom != loc)
3558                 *--pto = *--pfrom;
3559
3560         store_op1(op, loc, arg);
3561 }
3562
3563 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
3564
3565 static void
3566 insert_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2,
3567            unsigned char *end)
3568 {
3569         REGISTER unsigned char *pfrom = end;
3570         REGISTER unsigned char *pto = end + 5;
3571
3572         while (pfrom != loc)
3573                 *--pto = *--pfrom;
3574
3575         store_op2(op, loc, arg1, arg2);
3576 }
3577
3578 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
3579    after an alternative or a begin-subexpression.  We assume there is at
3580    least one character before the ^.  */
3581
3582 static re_bool
3583 at_begline_loc_p(re_char *pattern, re_char *p, reg_syntax_t syntax)
3584 {
3585         re_char *prev = p - 2;
3586         re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3587
3588         return
3589             /* After a subexpression?  */
3590             (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3591             /* After an alternative?  */
3592             || (*prev == '|'
3593                 && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
3594 }
3595
3596 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
3597    at least one character after the $, i.e., `P < PEND'.  */
3598
3599 static re_bool
3600 at_endline_loc_p(re_char *p, re_char *pend, int syntax)
3601 {
3602         re_char *next = p;
3603         re_bool next_backslash = *next == '\\';
3604         re_char *next_next = p + 1 < pend ? p + 1 : 0;
3605
3606         return
3607             /* Before a subexpression?  */
3608             (syntax & RE_NO_BK_PARENS ? *next == ')'
3609              : next_backslash && next_next && *next_next == ')')
3610             /* Before an alternative?  */
3611             || (syntax & RE_NO_BK_VBAR ? *next == '|'
3612                 : next_backslash && next_next && *next_next == '|');
3613 }
3614
3615 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3616    false if it's not.  */
3617
3618 static re_bool
3619 group_in_compile_stack(compile_stack_type compile_stack, regnum_t regnum)
3620 {
3621         int this_element;
3622
3623         for (this_element = compile_stack.avail - 1;
3624              this_element >= 0; this_element--)
3625                 if (compile_stack.stack[this_element].regnum == regnum) {
3626                         return true;
3627                 }
3628         return false;
3629 }
3630
3631 /* Read the ending character of a range (in a bracket expression) from the
3632    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
3633    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
3634    Then we set the translation of all bits between the starting and
3635    ending characters (inclusive) in the compiled pattern B.
3636
3637    Return an error code.
3638
3639    We use these short variable names so we can use the same macros as
3640    `regex_compile' itself.  */
3641
3642 static reg_errcode_t
3643 compile_range(re_char ** p_ptr, re_char * pend, RE_TRANSLATE_TYPE translate,
3644               reg_syntax_t syntax, unsigned char *buf_end)
3645 {
3646         Element_count this_char;
3647
3648         re_char *p = *p_ptr;
3649         int range_start, range_end;
3650
3651         if (p == pend)
3652                 return REG_ERANGE;
3653
3654         /* Even though the pattern is a signed `char *', we need to fetch
3655            with unsigned char *'s; if the high bit of the pattern character
3656            is set, the range endpoints will be negative if we fetch using a
3657            signed char *.
3658
3659            We also want to fetch the endpoints without translating them; the
3660            appropriate translation is done in the bit-setting loop below.  */
3661         /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
3662         range_start = ((const unsigned char *)p)[-2];
3663         range_end = ((const unsigned char *)p)[0];
3664
3665         /* Have to increment the pointer into the pattern string, so the
3666            caller isn't still at the ending character.  */
3667         (*p_ptr)++;
3668
3669         /* If the start is after the end, the range is empty.  */
3670         if (range_start > range_end)
3671                 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3672
3673         /* Here we see why `this_char' has to be larger than an `unsigned
3674            char' -- the range is inclusive, so if `range_end' == 0xff
3675            (assuming 8-bit characters), we would otherwise go into an infinite
3676            loop, since all characters <= 0xff.  */
3677         for (this_char = range_start; this_char <= range_end; this_char++) {
3678                 SET_LIST_BIT(TRANSLATE(this_char));
3679         }
3680
3681         return REG_NOERROR;
3682 }
3683
3684 #ifdef MULE
3685
3686 static reg_errcode_t
3687 compile_extended_range(re_char ** p_ptr, re_char * pend,
3688                        RE_TRANSLATE_TYPE translate,
3689                        reg_syntax_t syntax, Lisp_Object rtab)
3690 {
3691         Emchar this_char, range_start, range_end;
3692         const Bufbyte *p;
3693
3694         if (*p_ptr == pend)
3695                 return REG_ERANGE;
3696
3697         p = (const Bufbyte *)*p_ptr;
3698         range_end = charptr_emchar(p);
3699         p--;                    /* back to '-' */
3700         DEC_CHARPTR(p);         /* back to start of range */
3701         /* We also want to fetch the endpoints without translating them; the
3702            appropriate translation is done in the bit-setting loop below.  */
3703         range_start = charptr_emchar(p);
3704         INC_CHARPTR(*p_ptr);
3705
3706         /* If the start is after the end, the range is empty.  */
3707         if (range_start > range_end)
3708                 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3709
3710         /* Can't have ranges spanning different charsets, except maybe for
3711            ranges entirely within the first 256 chars. */
3712
3713         if ((range_start >= 0x100 || range_end >= 0x100)
3714             && CHAR_LEADING_BYTE(range_start) != CHAR_LEADING_BYTE(range_end))
3715                 return REG_ERANGESPAN;
3716
3717         /* As advertised, translations only work over the 0 - 0x7F range.
3718            Making this kind of stuff work generally is much harder.
3719            Iterating over the whole range like this would be way efficient
3720            if the range encompasses 10,000 chars or something.  You'd have
3721            to do something like this:
3722
3723            range_table a;
3724            range_table b;
3725            map over translation table in [range_start, range_end] of
3726            (put the mapped range in a;
3727            put the translation in b)
3728            invert the range in a and truncate to [range_start, range_end]
3729            compute the union of a, b
3730            union the result into rtab
3731          */
3732         for (this_char = range_start;
3733              this_char <= range_end && this_char < 0x80; this_char++) {
3734                 SET_RANGETAB_BIT(TRANSLATE(this_char));
3735         }
3736
3737         if (this_char <= range_end)
3738                 put_range_table(rtab, this_char, range_end, Qt);
3739
3740         return REG_NOERROR;
3741 }
3742
3743 #endif                          /* MULE */
3744 \f
3745 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3746    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
3747    characters can start a string that matches the pattern.  This fastmap
3748    is used by re_search to skip quickly over impossible starting points.
3749
3750    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3751    area as BUFP->fastmap.
3752
3753    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3754    the pattern buffer.
3755
3756    Returns 0 if we succeed, -2 if an internal error.   */
3757
3758 int re_compile_fastmap(struct re_pattern_buffer *bufp)
3759 {
3760         int j, k;
3761 #ifdef MATCH_MAY_ALLOCATE
3762         fail_stack_type fail_stack;
3763 #endif
3764         DECLARE_DESTINATION;
3765         /* We don't push any register information onto the failure stack.  */
3766
3767         REGISTER char *fastmap = bufp->fastmap;
3768         unsigned char *pattern = bufp->buffer;
3769         unsigned long size = bufp->used;
3770         /* actually p supposed to carry the const qualifier, however, some
3771            silly mule code below CHANGES p and hence we cant go with the const
3772            qualifier here, leaving an `unfixable' warning on the way */
3773 #ifdef MULE
3774         unsigned char *p = pattern;
3775 #else
3776         re_char *p = pattern;
3777 #endif
3778         REGISTER unsigned char *pend = pattern + size;
3779
3780 #ifdef REL_ALLOC
3781         /* This holds the pointer to the failure stack, when
3782            it is allocated relocatably.  */
3783         fail_stack_elt_t *failure_stack_ptr;
3784 #endif
3785
3786         /* Assume that each path through the pattern can be null until
3787            proven otherwise.  We set this false at the bottom of switch
3788            statement, to which we get only if a particular path doesn't
3789            match the empty string.  */
3790         re_bool path_can_be_null = true;
3791
3792         /* We aren't doing a `succeed_n' to begin with.  */
3793         re_bool succeed_n_p = false;
3794
3795         assert(fastmap != NULL && p != NULL);
3796
3797         INIT_FAIL_STACK();
3798         memset(fastmap, 0, 1 << BYTEWIDTH);     /* Assume nothing's valid.  */
3799         bufp->fastmap_accurate = 1;     /* It will be when we're done.  */
3800         bufp->can_be_null = 0;
3801
3802         while (1) {
3803                 if (p == pend || *p == succeed) {
3804                         /* We have reached the (effective) end of pattern.  */
3805                         if (!FAIL_STACK_EMPTY()) {
3806                                 bufp->can_be_null |= path_can_be_null;
3807
3808                                 /* Reset for next path.  */
3809                                 path_can_be_null = true;
3810
3811                                 /* fuck, p isnt const thanks to that
3812                                  * unified range table function below */
3813 #ifdef MULE
3814 #pragma GCC diagnostic push
3815 #pragma GCC diagnostic ignored "-Wcast-qual"
3816                                 p = (unsigned char*)fail_stack.
3817                                     stack[--fail_stack.avail].pointer;
3818 #pragma GCC diagnostic pop
3819 #else
3820                                 p = fail_stack.stack[--fail_stack.avail]
3821                                         .pointer;
3822 #endif
3823
3824                                 continue;
3825                         } else {
3826                                 break;
3827                         }
3828                 }
3829
3830                 /* We should never be about to go beyond the end of the pattern.  */
3831                 assert(p < pend);
3832
3833                 switch (SWITCH_ENUM_CAST((re_opcode_t) * p++)) {
3834
3835                         /* I guess the idea here is to simply not bother with a
3836                            fastmap if a backreference is used, since it's too
3837                            hard to figure out the fastmap for the corresponding
3838                            group.  Setting `can_be_null' stops `re_search_2'
3839                            from using the fastmap, so that is all we do.  */
3840                 case duplicate:
3841                         bufp->can_be_null = 1;
3842                         goto done;
3843
3844                         /* Following are the cases which match a character.
3845                            These end with `break'.  */
3846
3847                 case exactn:
3848                         fastmap[p[1]] = 1;
3849                         break;
3850
3851                 case charset:
3852                         /* XEmacs: Under Mule, these bit vectors will
3853                            only contain values for characters below 0x80. */
3854                         for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3855                                 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3856                                         fastmap[j] = 1;
3857                         break;
3858
3859                 case charset_not:
3860                         /* Chars beyond end of map must be allowed.  */
3861 #ifdef MULE
3862                         for (j = *p * BYTEWIDTH; j < 0x80; j++)
3863                                 fastmap[j] = 1;
3864                         /* And all extended characters must be allowed, too. */
3865                         for (j = 0x80; j < 0xA0; j++)
3866                                 fastmap[j] = 1;
3867 #else                           /* not MULE */
3868                         for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3869                                 fastmap[j] = 1;
3870 #endif                          /* MULE */
3871
3872                         for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3873                                 if (!
3874                                     (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3875                                         fastmap[j] = 1;
3876                         break;
3877
3878 #ifdef MULE
3879                 case charset_mule: {
3880                         int nentries;
3881                         int i;
3882
3883                         nentries = unified_range_table_nentries(p);
3884                         for (i = 0; i < nentries; i++) {
3885                                 EMACS_INT first, last;
3886                                 Lisp_Object dummy_val;
3887                                 int jj;
3888                                 Bufbyte strr[MAX_EMCHAR_LEN];
3889
3890                                 unified_range_table_get_range(p, i,
3891                                                               &first,
3892                                                               &last,
3893                                                               &dummy_val);
3894                                 for (jj = first;
3895                                      jj <= last && jj < 0x80; jj++)
3896                                         fastmap[jj] = 1;
3897                                 /* Ranges below 0x100 can span charsets, but there
3898                                    are only two (Control-1 and Latin-1), and
3899                                    either first or last has to be in them. */
3900                                 set_charptr_emchar(strr, first);
3901                                 fastmap[*strr] = 1;
3902                                 if (last < 0x100) {
3903                                         set_charptr_emchar(strr, last);
3904                                         fastmap[*strr] = 1;
3905                                 }
3906                         }
3907                 }
3908                         break;
3909
3910                 case charset_mule_not: {
3911                         int nentries;
3912                         int i;
3913
3914                         nentries = unified_range_table_nentries(p);
3915                         for (i = 0; i < nentries; i++) {
3916                                 EMACS_INT first, last;
3917                                 Lisp_Object dummy_val;
3918                                 int jj;
3919                                 int smallest_prev = 0;
3920
3921                                 unified_range_table_get_range(p, i,
3922                                                               &first,
3923                                                               &last,
3924                                                               &dummy_val);
3925                                 for (jj = smallest_prev;
3926                                      jj < first && jj < 0x80; jj++)
3927                                         fastmap[jj] = 1;
3928                                 smallest_prev = last + 1;
3929                                 if (smallest_prev >= 0x80)
3930                                         break;
3931                         }
3932                         /* Calculating which leading bytes are actually allowed
3933                            here is rather difficult, so we just punt and allow
3934                            all of them. */
3935                         for (i = 0x80; i < 0xA0; i++)
3936                                 fastmap[i] = 1;
3937                 }
3938                         break;
3939 #endif                          /* MULE */
3940
3941                 case wordchar:
3942 #ifdef emacs
3943                         k = (int)Sword;
3944                         goto matchsyntax;
3945 #else
3946                         for (j = 0; j < (1 << BYTEWIDTH); j++)
3947                                 if (SYNTAX_UNSAFE
3948                                     (XCHAR_TABLE
3949                                      (regex_emacs_buffer->mirror_syntax_table),
3950                                      j) == Sword)
3951                                         fastmap[j] = 1;
3952                         break;
3953 #endif
3954
3955                 case notwordchar:
3956 #ifdef emacs
3957                         k = (int)Sword;
3958                         goto matchnotsyntax;
3959 #else
3960                         for (j = 0; j < (1 << BYTEWIDTH); j++)
3961                                 if (SYNTAX_UNSAFE
3962                                     (XCHAR_TABLE
3963                                      (regex_emacs_buffer->mirror_syntax_table),
3964                                      j) != Sword)
3965                                         fastmap[j] = 1;
3966                         break;
3967 #endif
3968
3969                 case anychar: {
3970                         int fastmap_newline = fastmap['\n'];
3971
3972                         /* `.' matches anything ...  */
3973 #ifdef MULE
3974                         /* "anything" only includes bytes that can be the
3975                            first byte of a character. */
3976                         for (j = 0; j < 0xA0; j++)
3977                                 fastmap[j] = 1;
3978 #else
3979                         for (j = 0; j < (1 << BYTEWIDTH); j++)
3980                                 fastmap[j] = 1;
3981 #endif
3982
3983                         /* ... except perhaps newline.  */
3984                         if (!(bufp->syntax & RE_DOT_NEWLINE))
3985                                 fastmap['\n'] = fastmap_newline;
3986
3987                         /* Return if we have already set `can_be_null'; if we
3988                            have, then the fastmap is irrelevant.  Something's
3989                            wrong here.  */
3990                         else if (bufp->can_be_null)
3991                                 goto done;
3992
3993                         /* Otherwise, have to check alternative paths.  */
3994                         break;
3995                 }
3996
3997 #ifdef emacs
3998                 case wordbound:
3999                 case notwordbound:
4000                 case wordbeg:
4001                 case wordend:
4002                 case notsyntaxspec:
4003                 case syntaxspec:
4004                         /* This match depends on text properties.  These end with
4005                            aborting optimizations.  */
4006                         bufp->can_be_null = 1;
4007                         goto done;
4008
4009 #ifdef emacs
4010                 matchsyntax:
4011 #ifdef MULE
4012                         for (j = 0; j < 0x80; j++)
4013                                 if (SYNTAX_UNSAFE
4014                                     (XCHAR_TABLE
4015                                      (regex_emacs_buffer->mirror_syntax_table),
4016                                      j) == (enum syntaxcode)k)
4017                                         fastmap[j] = 1;
4018                         for (j = 0x80; j < 0xA0; j++) {
4019                                 if (LEADING_BYTE_PREFIX_P(j))
4020                                         /* too complicated to calculate this right */
4021                                         fastmap[j] = 1;
4022                                 else {
4023                                         int multi_p;
4024                                         Lisp_Object cset;
4025
4026                                         cset = CHARSET_BY_LEADING_BYTE(j);
4027                                         if (CHARSETP(cset)) {
4028                                                 if (charset_syntax
4029                                                     (regex_emacs_buffer, cset,
4030                                                      &multi_p)
4031                                                     == Sword || multi_p)
4032                                                         fastmap[j] = 1;
4033                                         }
4034                                 }
4035                         }
4036 #else                           /* not MULE */
4037                         for (j = 0; j < (1 << BYTEWIDTH); j++)
4038                                 if (SYNTAX_UNSAFE
4039                                     (XCHAR_TABLE
4040                                      (regex_emacs_buffer->mirror_syntax_table),
4041                                      j) == (enum syntaxcode)k)
4042                                         fastmap[j] = 1;
4043 #endif                          /* MULE */
4044                         break;
4045
4046                 matchnotsyntax:
4047 #ifdef MULE
4048                         for (j = 0; j < 0x80; j++)
4049                                 if (SYNTAX_UNSAFE
4050                                     (XCHAR_TABLE
4051                                      (regex_emacs_buffer->mirror_syntax_table),
4052                                      j) != (enum syntaxcode)k)
4053                                         fastmap[j] = 1;
4054                         for (j = 0x80; j < 0xA0; j++) {
4055                                 if (LEADING_BYTE_PREFIX_P(j))
4056                                         /* too complicated to calculate this right */
4057                                         fastmap[j] = 1;
4058                                 else {
4059                                         int multi_p;
4060                                         Lisp_Object cset;
4061
4062                                         cset = CHARSET_BY_LEADING_BYTE(j);
4063                                         if (CHARSETP(cset)) {
4064                                                 if (charset_syntax
4065                                                     (regex_emacs_buffer, cset,
4066                                                      &multi_p)
4067                                                     != Sword || multi_p)
4068                                                         fastmap[j] = 1;
4069                                         }
4070                                 }
4071                         }
4072 #else                           /* not MULE */
4073                         for (j = 0; j < (1 << BYTEWIDTH); j++)
4074                                 if (SYNTAX_UNSAFE
4075                                     (XCHAR_TABLE
4076                                      (regex_emacs_buffer->mirror_syntax_table),
4077                                      j) != (enum syntaxcode)k)
4078                                         fastmap[j] = 1;
4079 #endif                          /* MULE */
4080                         break;
4081 #endif                          /* emacs */
4082
4083 #ifdef MULE
4084 /* 97/2/17 jhod category patch */
4085                 case categoryspec:
4086                 case notcategoryspec:
4087                         bufp->can_be_null = 1;
4088                         return 0;
4089 /* end if category patch */
4090 #endif                          /* MULE */
4091
4092                         /* All cases after this match the empty string.  These
4093                            end with `continue'.  */
4094
4095                 case before_dot:
4096                 case at_dot:
4097                 case after_dot:
4098                         continue;
4099 #endif                          /* emacs */
4100
4101                 case no_op:
4102                 case begline:
4103                 case endline:
4104                 case begbuf:
4105                 case endbuf:
4106 #ifndef emacs
4107                 case wordbound:
4108                 case notwordbound:
4109                 case wordbeg:
4110                 case wordend:
4111 #endif
4112                 case push_dummy_failure:
4113                         continue;
4114
4115                 case jump_n:
4116                 case pop_failure_jump:
4117                 case maybe_pop_jump:
4118                 case jump:
4119                 case jump_past_alt:
4120                 case dummy_failure_jump:
4121                         EXTRACT_NUMBER_AND_INCR(j, p);
4122                         p += j;
4123                         if (j > 0)
4124                                 continue;
4125
4126                         /* Jump backward implies we just went through the body
4127                            of a loop and matched nothing.  Opcode jumped to
4128                            should be `on_failure_jump' or `succeed_n'.  Just
4129                            treat it like an ordinary jump.  For a * loop, it has
4130                            pushed its failure point already; if so, discard that
4131                            as redundant.  */
4132                         if ((re_opcode_t) * p != on_failure_jump
4133                             && (re_opcode_t) * p != succeed_n)
4134                                 continue;
4135
4136                         p++;
4137                         EXTRACT_NUMBER_AND_INCR(j, p);
4138                         p += j;
4139
4140                         /* If what's on the stack is where we are now, pop
4141                            it.  */
4142                         if (!FAIL_STACK_EMPTY()
4143                             && fail_stack.stack[fail_stack.avail - 1].pointer ==
4144                             p)
4145                                 fail_stack.avail--;
4146
4147                         continue;
4148
4149                 case on_failure_jump:
4150                 case on_failure_keep_string_jump:
4151                       handle_on_failure_jump:
4152                         EXTRACT_NUMBER_AND_INCR(j, p);
4153
4154                         /* For some patterns, e.g., `(a?)?', `p+j' here points
4155                            to the end of the pattern.  We don't want to push
4156                            such a point, since when we restore it above,
4157                            entering the switch will increment `p' past the end
4158                            of the pattern.  We don't need to push such a point
4159                            since we obviously won't find any more fastmap
4160                            entries beyond `pend'.  Such a pattern can match the
4161                            null string, though.  */
4162                         if (p + j < pend) {
4163                                 if (!PUSH_PATTERN_OP(p + j, fail_stack)) {
4164                                         RESET_FAIL_STACK();
4165                                         return -2;
4166                                 }
4167                         } else
4168                                 bufp->can_be_null = 1;
4169
4170                         if (succeed_n_p) {
4171                                 EXTRACT_NUMBER_AND_INCR(k, p);  /* Skip the n.  */
4172                                 succeed_n_p = false;
4173                         }
4174
4175                         continue;
4176
4177                 case succeed_n:
4178                         /* Get to the number of times to succeed.  */
4179                         p += 2;
4180
4181                         /* Increment p past the n for when k != 0.  */
4182                         EXTRACT_NUMBER_AND_INCR(k, p);
4183                         if (k == 0) {
4184                                 p -= 4;
4185                                 succeed_n_p = true;     /* Spaghetti code alert.  */
4186                                 goto handle_on_failure_jump;
4187                         }
4188                         continue;
4189
4190                 case set_number_at:
4191                         p += 4;
4192                         continue;
4193
4194                 case start_memory:
4195                 case stop_memory:
4196                         p += 2;
4197                         continue;
4198
4199                 case succeed:
4200                 default:
4201                         abort();        /* We have listed all the cases.  */
4202                 }               /* switch *p++ */
4203
4204                 /* Getting here means we have found the possible starting
4205                    characters for one path of the pattern -- and that the empty
4206                    string does not match.  We need not follow this path further.
4207                    Instead, look at the next alternative (remembered on the
4208                    stack), or quit if no more.  The test at the top of the loop
4209                    does these things.  */
4210                 path_can_be_null = false;
4211                 p = pend;
4212         }                       /* while p */
4213
4214         /* Set `can_be_null' for the last path (also the first path, if the
4215            pattern is empty).  */
4216         bufp->can_be_null |= path_can_be_null;
4217
4218 done:
4219         RESET_FAIL_STACK();
4220         return 0;
4221 }       /* re_compile_fastmap */
4222 \f
4223 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
4224    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
4225    this memory for recording register information.  STARTS and ENDS
4226    must be allocated using the malloc library routine, and must each
4227    be at least NUM_REGS * sizeof (regoff_t) bytes long.
4228
4229    If NUM_REGS == 0, then subsequent matches should allocate their own
4230    register data.
4231
4232    Unless this function is called, the first search or match using
4233    PATTERN_BUFFER will allocate its own register data, without
4234    freeing the old data.  */
4235
4236 void
4237 re_set_registers(struct re_pattern_buffer *bufp, struct re_registers *regs,
4238                  unsigned num_regs, regoff_t * starts, regoff_t * ends)
4239 {
4240         if (num_regs) {
4241                 bufp->regs_allocated = REGS_REALLOCATE;
4242                 regs->num_regs = num_regs;
4243                 regs->start = starts;
4244                 regs->end = ends;
4245         } else {
4246                 bufp->regs_allocated = REGS_UNALLOCATED;
4247                 regs->num_regs = 0;
4248                 regs->start = regs->end = (regoff_t *) 0;
4249         }
4250 }
4251 \f
4252 /* Searching routines.  */
4253
4254 /* Like re_search_2, below, but only one string is specified, and
4255    doesn't let you say where to stop matching. */
4256
4257 int
4258 re_search(struct re_pattern_buffer *bufp, const char *string, int size,
4259           int startpos, int range, struct re_registers *regs)
4260 {
4261         return re_search_2(bufp, NULL, 0, string, size, startpos, range,
4262                            regs, size);
4263 }
4264
4265 #ifndef emacs
4266 /* Snarfed from src/lisp.h, needed for compiling [ce]tags. */
4267 # define bytecount_to_charcount(ptr, len) (len)
4268 # define charcount_to_bytecount(ptr, len) (len)
4269 typedef int Charcount;
4270 #endif
4271
4272 /* Using the compiled pattern in BUFP->buffer, first tries to match the
4273    virtual concatenation of STRING1 and STRING2, starting first at index
4274    STARTPOS, then at STARTPOS + 1, and so on.
4275
4276    With MULE, STARTPOS is a byte position, not a char position.  And the
4277    search will increment STARTPOS by the width of the current leading
4278    character.
4279
4280    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
4281
4282    RANGE is how far to scan while trying to match.  RANGE = 0 means try
4283    only at STARTPOS; in general, the last start tried is STARTPOS +
4284    RANGE.
4285
4286    With MULE, RANGE is a byte position, not a char position.  The last
4287    start tried is the character starting <= STARTPOS + RANGE.
4288
4289    In REGS, return the indices of the virtual concatenation of STRING1
4290    and STRING2 that matched the entire BUFP->buffer and its contained
4291    subexpressions.
4292
4293    Do not consider matching one past the index STOP in the virtual
4294    concatenation of STRING1 and STRING2.
4295
4296    We return either the position in the strings at which the match was
4297    found, -1 if no match, or -2 if error (such as failure
4298    stack overflow).  */
4299
4300 int
4301 re_search_2(struct re_pattern_buffer *bufp, const char *str1,
4302             int size1, const char *str2, int size2, int startpos,
4303             int range, struct re_registers *regs, int stop)
4304 {
4305         int val;
4306         re_char *string1 = (re_char *) str1;
4307         re_char *string2 = (re_char *) str2;
4308         REGISTER char *fastmap = bufp->fastmap;
4309         REGISTER RE_TRANSLATE_TYPE translate = bufp->translate;
4310         int total_size = size1 + size2;
4311         int endpos = startpos + range;
4312 #ifdef REGEX_BEGLINE_CHECK
4313         int anchored_at_begline = 0;
4314 #endif
4315         re_char *d;
4316         Charcount d_size;
4317
4318         /* Check for out-of-range STARTPOS.  */
4319         if (startpos < 0 || startpos > total_size)
4320                 return -1;
4321
4322         /* Fix up RANGE if it might eventually take us outside
4323            the virtual concatenation of STRING1 and STRING2.  */
4324         if (endpos < 0)
4325                 range = 0 - startpos;
4326         else if (endpos > total_size)
4327                 range = total_size - startpos;
4328
4329         /* If the search isn't to be a backwards one, don't waste time in a
4330            search for a pattern that must be anchored.  */
4331         if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf
4332             && range > 0) {
4333                 if (startpos > 0)
4334                         return -1;
4335                 else {
4336                         d = ((const unsigned char *)
4337                              (startpos >=
4338                               size1 ? string2 - size1 : string1) + startpos);
4339                         range = charcount_to_bytecount(d, 1);
4340                 }
4341         }
4342 #ifdef emacs
4343         /* In a forward search for something that starts with \=.
4344            don't keep searching past point.  */
4345         if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot
4346             && range > 0) {
4347                 range =
4348                     BUF_PT(regex_emacs_buffer) - BUF_BEGV(regex_emacs_buffer)
4349                     - startpos;
4350                 if (range < 0)
4351                         return -1;
4352         }
4353 #endif                          /* emacs */
4354
4355         /* Update the fastmap now if not correct already.  */
4356         if (fastmap && !bufp->fastmap_accurate)
4357                 if (re_compile_fastmap(bufp) == -2)
4358                         return -2;
4359
4360 #ifdef REGEX_BEGLINE_CHECK
4361         {
4362                 unsigned long i = 0;
4363
4364                 while (i < bufp->used) {
4365                         if (bufp->buffer[i] == start_memory ||
4366                             bufp->buffer[i] == stop_memory)
4367                                 i += 2;
4368                         else
4369                                 break;
4370                 }
4371                 anchored_at_begline = i < bufp->used
4372                     && bufp->buffer[i] == begline;
4373         }
4374 #endif
4375
4376 #ifdef emacs
4377         SETUP_SYNTAX_CACHE_FOR_OBJECT(regex_match_object,
4378                                       regex_emacs_buffer,
4379                                       SYNTAX_CACHE_OBJECT_BYTE_TO_CHAR
4380                                       (regex_match_object, regex_emacs_buffer,
4381                                        startpos), 1);
4382 #endif
4383
4384         /* Loop through the string, looking for a place to start matching.  */
4385         for (;;) {
4386 #ifdef REGEX_BEGLINE_CHECK
4387                 /* If the regex is anchored at the beginning of a line (i.e. with a ^),
4388                    then we can speed things up by skipping to the next beginning-of-
4389                    line. */
4390                 if (anchored_at_begline && startpos > 0 && startpos != size1 &&
4391                     range > 0) {
4392                         /* whose stupid idea was it anyway to make this
4393                            function take two strings to match?? */
4394                         int lim = 0;
4395                         int irange = range;
4396
4397                         if (startpos < size1 && startpos + range >= size1)
4398                                 lim = range - (size1 - startpos);
4399
4400                         d = ((const unsigned char *)
4401                              (startpos >=
4402                               size1 ? string2 - size1 : string1) + startpos);
4403                         DEC_CHARPTR(d); /* Ok, since startpos != size1. */
4404                         d_size = charcount_to_bytecount(d, 1);
4405
4406                         if (TRANSLATE_P(translate))
4407                                 while (range > lim && *d != '\n') {
4408                                         d += d_size;    /* Speedier INC_CHARPTR(d) */
4409                                         d_size = charcount_to_bytecount(d, 1);
4410                                         range -= d_size;
4411                         } else
4412                                 while (range > lim && *d != '\n') {
4413                                         d += d_size;    /* Speedier INC_CHARPTR(d) */
4414                                         d_size = charcount_to_bytecount(d, 1);
4415                                         range -= d_size;
4416                                 }
4417
4418                         startpos += irange - range;
4419                 }
4420 #endif                          /* REGEX_BEGLINE_CHECK */
4421
4422                 /* If a fastmap is supplied, skip quickly over characters that
4423                    cannot be the start of a match.  If the pattern can match the
4424                    null string, however, we don't need to skip characters; we want
4425                    the first null string.  */
4426                 if (fastmap && startpos < total_size && !bufp->can_be_null) {
4427                         if (range > 0) {        /* Searching forwards.  */
4428                                 int lim = 0;
4429                                 int irange = range;
4430
4431                                 if (startpos < size1
4432                                     && startpos + range >= size1)
4433                                         lim = range - (size1 - startpos);
4434
4435                                 d = ((const unsigned char *)
4436                                      (startpos >=
4437                                       size1 ? string2 - size1 : string1) +
4438                                      startpos);
4439
4440                                 /* Written out as an if-else to avoid testing `translate'
4441                                    inside the loop.  */
4442                                 if (TRANSLATE_P(translate))
4443                                         while (range > lim) {
4444 #ifdef MULE
4445                                                 Emchar buf_ch;
4446                                                 Bufbyte str[MAX_EMCHAR_LEN];
4447
4448                                                 buf_ch = charptr_emchar(d);
4449                                                 buf_ch = RE_TRANSLATE(buf_ch);
4450                                                 set_charptr_emchar(str, buf_ch);
4451                                                 if (buf_ch >= 0200
4452                                                     || fastmap[(unsigned char)
4453                                                                *str])
4454                                                         break;
4455 #else
4456                                                 if (fastmap
4457                                                     [(unsigned char)
4458                                                      RE_TRANSLATE(*d)])
4459                                                         break;
4460 #endif                          /* MULE */
4461                                                 d_size =
4462                                                     charcount_to_bytecount(d,
4463                                                                            1);
4464                                                 range -= d_size;
4465                                                 d += d_size;    /* Speedier INC_CHARPTR(d) */
4466                                 } else
4467                                         while (range > lim && !fastmap[*d]) {
4468                                                 d_size =
4469                                                     charcount_to_bytecount(d,
4470                                                                            1);
4471                                                 range -= d_size;
4472                                                 d += d_size;    /* Speedier INC_CHARPTR(d) */
4473                                         }
4474
4475                                 startpos += irange - range;
4476                         } else {        /* Searching backwards.  */
4477
4478                                 Emchar c = (size1 == 0 || startpos >= size1
4479                                             ? charptr_emchar(string2 +
4480                                                              startpos - size1)
4481                                             : charptr_emchar(string1 +
4482                                                              startpos));
4483                                 c = TRANSLATE(c);
4484 #ifdef MULE
4485                                 if (!(c >= 0200 || fastmap[(unsigned char)c]))
4486                                         goto advance;
4487 #else
4488                                 if (!fastmap[(unsigned char)c])
4489                                         goto advance;
4490 #endif
4491                         }
4492                 }
4493
4494                 /* If can't match the null string, and that's all we have left, fail.  */
4495                 if (range >= 0 && startpos == total_size && fastmap
4496                     && !bufp->can_be_null)
4497                         return -1;
4498
4499 #ifdef emacs                    /* XEmacs added, w/removal of immediate_quit */
4500                 if (!no_quit_in_re_search)
4501                         QUIT;
4502 #endif
4503                 val = re_match_2_internal(bufp, string1, size1, string2, size2,
4504                                           startpos, regs, stop);
4505 #ifndef REGEX_MALLOC
4506 #ifdef C_ALLOCA
4507                 alloca(0);
4508 #endif
4509 #endif
4510
4511                 if (val >= 0)
4512                         return startpos;
4513
4514                 if (val == -2)
4515                         return -2;
4516
4517               advance:
4518                 if (!range)
4519                         break;
4520                 else if (range > 0) {
4521                         d = ((const unsigned char *)
4522                              (startpos >=
4523                               size1 ? string2 - size1 : string1) + startpos);
4524                         d_size = charcount_to_bytecount(d, 1);
4525                         range -= d_size;
4526                         startpos += d_size;
4527                 } else {
4528                         /* Note startpos > size1 not >=.  If we are on the
4529                            string1/string2 boundary, we want to backup into string1. */
4530                         d = ((const unsigned char *)
4531                              (startpos >
4532                               size1 ? string2 - size1 : string1) + startpos);
4533                         DEC_CHARPTR(d);
4534                         d_size = charcount_to_bytecount(d, 1);
4535                         range += d_size;
4536                         startpos -= d_size;
4537                 }
4538         }
4539         return -1;
4540 }                               /* re_search_2 */
4541 \f
4542 /* Declarations and macros for re_match_2.  */
4543
4544 /* This converts PTR, a pointer into one of the search strings `string1'
4545    and `string2' into an offset from the beginning of that string.  */
4546 #define POINTER_TO_OFFSET(ptr)                  \
4547   (FIRST_STRING_P (ptr)                         \
4548    ? ((regoff_t) ((ptr) - string1))             \
4549    : ((regoff_t) ((ptr) - string2 + size1)))
4550
4551 /* Macros for dealing with the split strings in re_match_2.  */
4552
4553 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
4554
4555 /* Call before fetching a character with *d.  This switches over to
4556    string2 if necessary.  */
4557 #define REGEX_PREFETCH()                                                        \
4558   while (d == dend)                                                     \
4559     {                                                                   \
4560       /* End of string2 => fail.  */                                    \
4561       if (dend == end_match_2)                                          \
4562         goto fail;                                                      \
4563       /* End of string1 => advance to string2.  */                      \
4564       d = string2;                                                      \
4565       dend = end_match_2;                                               \
4566     }
4567
4568 /* Test if at very beginning or at very end of the virtual concatenation
4569    of `string1' and `string2'.  If only one string, it's `string2'.  */
4570 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
4571 #define AT_STRINGS_END(d) ((d) == end2)
4572
4573 /* XEmacs change:
4574    If the given position straddles the string gap, return the equivalent
4575    position that is before or after the gap, respectively; otherwise,
4576    return the same position. */
4577 #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d))
4578 #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d))
4579
4580 /* Test if CH is a word-constituent character. (XEmacs change) */
4581 #define WORDCHAR_P_UNSAFE(ch)                                              \
4582   (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),   \
4583                                ch) == Sword)
4584
4585 /* Free everything we malloc.  */
4586 #ifdef MATCH_MAY_ALLOCATE
4587 #define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
4588 #define FREE_VARIABLES()                                                \
4589   do {                                                                  \
4590     REGEX_FREE_STACK (fail_stack.stack);                                \
4591     FREE_VAR (regstart);                                                \
4592     FREE_VAR (regend);                                                  \
4593     FREE_VAR (old_regstart);                                            \
4594     FREE_VAR (old_regend);                                              \
4595     FREE_VAR (best_regstart);                                           \
4596     FREE_VAR (best_regend);                                             \
4597     FREE_VAR (reg_info);                                                \
4598     FREE_VAR (reg_dummy);                                               \
4599     FREE_VAR (reg_info_dummy);                                          \
4600   } while (0)
4601 #else                           /* not MATCH_MAY_ALLOCATE */
4602 #define FREE_VARIABLES() ((void)0)      /* Do nothing!  But inhibit gcc warning.  */
4603 #endif                          /* MATCH_MAY_ALLOCATE */
4604
4605 /* These values must meet several constraints.  They must not be valid
4606    register values; since we have a limit of 255 registers (because
4607    we use only one byte in the pattern for the register number), we can
4608    use numbers larger than 255.  They must differ by 1, because of
4609    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
4610    be larger than the value for the highest register, so we do not try
4611    to actually save any registers when none are active.  */
4612 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
4613 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
4614 \f
4615 /* Matching routines.  */
4616
4617 #ifndef emacs                   /* Emacs never uses this.  */
4618 /* re_match is like re_match_2 except it takes only a single string.  */
4619
4620 int
4621 re_match(struct re_pattern_buffer *bufp, const char *string, int size,
4622          int pos, struct re_registers *regs)
4623 {
4624         int result =
4625             re_match_2_internal(bufp, NULL, 0, (re_char *) string, size,
4626                                 pos, regs, size);
4627         alloca(0);
4628         return result;
4629 }
4630 #endif                          /* not emacs */
4631
4632 /* re_match_2 matches the compiled pattern in BUFP against the
4633    (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and
4634    SIZE2, respectively).  We start matching at POS, and stop matching
4635    at STOP.
4636
4637    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
4638    store offsets for the substring each group matched in REGS.  See the
4639    documentation for exactly how many groups we fill.
4640
4641    We return -1 if no match, -2 if an internal error (such as the
4642    failure stack overflowing).  Otherwise, we return the length of the
4643    matched substring.  */
4644
4645 int
4646 re_match_2(struct re_pattern_buffer *bufp, const char *string1,
4647            int size1, const char *string2, int size2, int pos,
4648            struct re_registers *regs, int stop)
4649 {
4650         int result;
4651
4652 #ifdef emacs
4653         SETUP_SYNTAX_CACHE_FOR_OBJECT(regex_match_object,
4654                                       regex_emacs_buffer,
4655                                       SYNTAX_CACHE_OBJECT_BYTE_TO_CHAR
4656                                       (regex_match_object, regex_emacs_buffer,
4657                                        pos), 1);
4658 #endif
4659
4660         result = re_match_2_internal(bufp, (re_char *) string1, size1,
4661                                      (re_char *) string2, size2,
4662                                      pos, regs, stop);
4663
4664         alloca(0);
4665         return result;
4666 }
4667
4668 /* This is a separate function so that we can force an alloca cleanup
4669    afterwards.  */
4670 static int
4671 re_match_2_internal(struct re_pattern_buffer *bufp, re_char * string1,
4672                     int size1, re_char * string2, int size2, int pos,
4673                     struct re_registers *regs, int stop)
4674 {
4675         /* General temporaries.  */
4676         int mcnt;
4677         unsigned char *p1;
4678         int should_succeed;     /* XEmacs change */
4679
4680         /* Just past the end of the corresponding string.  */
4681         re_char *end1, *end2;
4682
4683         /* Pointers into string1 and string2, just past the last characters in
4684            each to consider matching.  */
4685         re_char *end_match_1, *end_match_2;
4686
4687         /* Where we are in the data, and the end of the current string.  */
4688         const re_char *d, *dend;
4689
4690         /* Where we are in the pattern, and the end of the pattern.  */
4691         unsigned char *p = bufp->buffer;
4692         REGISTER unsigned char *pend = p + bufp->used;
4693
4694         /* Mark the opcode just after a start_memory, so we can test for an
4695            empty subpattern when we get to the stop_memory.  */
4696         re_char *just_past_start_mem = 0;
4697
4698         /* We use this to map every character in the string.  */
4699         RE_TRANSLATE_TYPE translate = bufp->translate;
4700
4701         /* Failure point stack.  Each place that can handle a failure further
4702            down the line pushes a failure point on this stack.  It consists of
4703            restart, regend, and reg_info for all registers corresponding to
4704            the subexpressions we're currently inside, plus the number of such
4705            registers, and, finally, two char *'s.  The first char * is where
4706            to resume scanning the pattern; the second one is where to resume
4707            scanning the strings.  If the latter is zero, the failure point is
4708            a ``dummy''; if a failure happens and the failure point is a dummy,
4709            it gets discarded and the next one is tried.  */
4710 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, this is global.  */
4711         fail_stack_type fail_stack;
4712 #endif
4713 #ifdef REGEX_DEBUG_FLAG
4714         static unsigned failure_id;
4715         int nfailure_points_pushed = 0, nfailure_points_popped = 0;
4716 #endif
4717
4718 #ifdef REL_ALLOC
4719         /* This holds the pointer to the failure stack, when
4720            it is allocated relocatably.  */
4721         fail_stack_elt_t *failure_stack_ptr;
4722 #endif
4723
4724         /* We fill all the registers internally, independent of what we
4725            return, for use in backreferences.  The number here includes
4726            an element for register zero.  */
4727         int num_regs = bufp->re_ngroups + 1;
4728
4729         /* The currently active registers.  */
4730         int lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4731         int highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4732
4733         /* Information on the contents of registers. These are pointers into
4734            the input strings; they record just what was matched (on this
4735            attempt) by a subexpression part of the pattern, that is, the
4736            regnum-th regstart pointer points to where in the pattern we began
4737            matching and the regnum-th regend points to right after where we
4738            stopped matching the regnum-th subexpression.  (The zeroth register
4739            keeps track of what the whole pattern matches.)  */
4740 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, these are global.  */
4741         re_char **regstart, **regend;
4742 #endif
4743
4744         /* If a group that's operated upon by a repetition operator fails to
4745            match anything, then the register for its start will need to be
4746            restored because it will have been set to wherever in the string we
4747            are when we last see its open-group operator.  Similarly for a
4748            register's end.  */
4749 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, these are global.  */
4750         re_char **old_regstart, **old_regend;
4751 #endif
4752
4753         /* The is_active field of reg_info helps us keep track of which (possibly
4754            nested) subexpressions we are currently in. The matched_something
4755            field of reg_info[reg_num] helps us tell whether or not we have
4756            matched any of the pattern so far this time through the reg_num-th
4757            subexpression.  These two fields get reset each time through any
4758            loop their register is in.  */
4759 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, this is global.  */
4760         register_info_type *reg_info;
4761 #endif
4762
4763         /* The following record the register info as found in the above
4764            variables when we find a match better than any we've seen before.
4765            This happens as we backtrack through the failure points, which in
4766            turn happens only if we have not yet matched the entire string. */
4767         unsigned best_regs_set = false;
4768 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, these are global.  */
4769         re_char **best_regstart, **best_regend;
4770 #endif
4771
4772         /* Logically, this is `best_regend[0]'.  But we don't want to have to
4773            allocate space for that if we're not allocating space for anything
4774            else (see below).  Also, we never need info about register 0 for
4775            any of the other register vectors, and it seems rather a kludge to
4776            treat `best_regend' differently than the rest.  So we keep track of
4777            the end of the best match so far in a separate variable.  We
4778            initialize this to NULL so that when we backtrack the first time
4779            and need to test it, it's not garbage.  */
4780         re_char *match_end = NULL;
4781
4782         /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
4783         int set_regs_matched_done = 0;
4784
4785         /* Used when we pop values we don't care about.  */
4786 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, these are global.  */
4787         re_char **reg_dummy;
4788         register_info_type *reg_info_dummy;
4789 #endif
4790
4791 #ifdef REGEX_DEBUG_FLAG
4792         /* Counts the total number of registers pushed.  */
4793         unsigned num_regs_pushed = 0;
4794 #endif
4795
4796         /* 1 if this match ends in the same string (string1 or string2)
4797            as the best previous match.  */
4798         re_bool same_str_p;
4799
4800         /* 1 if this match is the best seen so far.  */
4801         re_bool best_match_p;
4802
4803         DEBUG_PRINT1("\n\nEntering re_match_2.\n");
4804
4805         INIT_FAIL_STACK();
4806
4807 #ifdef MATCH_MAY_ALLOCATE
4808         /* Do not bother to initialize all the register variables if there are
4809            no groups in the pattern, as it takes a fair amount of time.  If
4810            there are groups, we include space for register 0 (the whole
4811            pattern), even though we never use it, since it simplifies the
4812            array indexing.  We should fix this.  */
4813         if (bufp->re_ngroups) {
4814                 regstart = REGEX_TALLOC(num_regs, re_char *);
4815                 regend = REGEX_TALLOC(num_regs, re_char *);
4816                 old_regstart = REGEX_TALLOC(num_regs, re_char *);
4817                 old_regend = REGEX_TALLOC(num_regs, re_char *);
4818                 best_regstart = REGEX_TALLOC(num_regs, re_char *);
4819                 best_regend = REGEX_TALLOC(num_regs, re_char *);
4820                 reg_info = REGEX_TALLOC(num_regs, register_info_type);
4821                 reg_dummy = REGEX_TALLOC(num_regs, re_char *);
4822                 reg_info_dummy = REGEX_TALLOC(num_regs, register_info_type);
4823
4824                 if (!
4825                     (regstart && regend && old_regstart && old_regend
4826                      && reg_info && best_regstart && best_regend && reg_dummy
4827                      && reg_info_dummy)) {
4828                         FREE_VARIABLES();
4829                         return -2;
4830                 }
4831         } else {
4832                 /* We must initialize all our variables to NULL, so that
4833                    `FREE_VARIABLES' doesn't try to free them.  */
4834                 regstart = regend = old_regstart = old_regend = best_regstart
4835                     = best_regend = reg_dummy = NULL;
4836                 reg_info = reg_info_dummy = (register_info_type *) NULL;
4837         }
4838 #endif                          /* MATCH_MAY_ALLOCATE */
4839
4840         /* The starting position is bogus.  */
4841         if (pos < 0 || pos > size1 + size2) {
4842                 FREE_VARIABLES();
4843                 return -1;
4844         }
4845
4846         /* Initialize subexpression text positions to -1 to mark ones that no
4847            start_memory/stop_memory has been seen for. Also initialize the
4848            register information struct.  */
4849         for (mcnt = 1; mcnt < num_regs; mcnt++) {
4850                 regstart[mcnt] = regend[mcnt]
4851                     = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
4852
4853                 REG_MATCH_NULL_STRING_P(reg_info[mcnt]) =
4854                     MATCH_NULL_UNSET_VALUE;
4855                 IS_ACTIVE(reg_info[mcnt]) = 0;
4856                 MATCHED_SOMETHING(reg_info[mcnt]) = 0;
4857                 EVER_MATCHED_SOMETHING(reg_info[mcnt]) = 0;
4858         }
4859         /* We move `string1' into `string2' if the latter's empty -- but not if
4860            `string1' is null.  */
4861         if (size2 == 0 && string1 != NULL) {
4862                 string2 = string1;
4863                 size2 = size1;
4864                 string1 = 0;
4865                 size1 = 0;
4866         }
4867         end1 = string1 + size1;
4868         end2 = string2 + size2;
4869
4870         /* Compute where to stop matching, within the two strings.  */
4871         if (stop <= size1) {
4872                 end_match_1 = string1 + stop;
4873                 end_match_2 = string2;
4874         } else {
4875                 end_match_1 = end1;
4876                 end_match_2 = string2 + stop - size1;
4877         }
4878
4879         /* `p' scans through the pattern as `d' scans through the data.
4880            `dend' is the end of the input string that `d' points within.  `d'
4881            is advanced into the following input string whenever necessary, but
4882            this happens before fetching; therefore, at the beginning of the
4883            loop, `d' can be pointing at the end of a string, but it cannot
4884            equal `string2'.  */
4885         if (size1 > 0 && pos <= size1) {
4886                 d = string1 + pos;
4887                 dend = end_match_1;
4888         } else {
4889                 d = string2 + pos - size1;
4890                 dend = end_match_2;
4891         }
4892
4893         DEBUG_PRINT1("The compiled pattern is: \n");
4894         DEBUG_PRINT_COMPILED_PATTERN(bufp, p, pend);
4895         DEBUG_PRINT1("The string to match is: `");
4896         DEBUG_PRINT_DOUBLE_STRING(d, string1, size1, string2, size2);
4897         DEBUG_PRINT1("'\n");
4898
4899         /* This loops over pattern commands.  It exits by returning from the
4900            function if the match is complete, or it drops through if the match
4901            fails at this starting point in the input data.  */
4902         for (;;) {
4903                 DEBUG_PRINT2("\n0x%lx: ", (long)p);
4904 #ifdef emacs                    /* XEmacs added, w/removal of immediate_quit */
4905                 if (!no_quit_in_re_search)
4906                         QUIT;
4907 #endif
4908
4909                 if (p == pend) {
4910                         /* End of pattern means we might have succeeded.  */
4911                         DEBUG_PRINT1("end of pattern ... ");
4912
4913                         /* If we haven't matched the entire string, and we want
4914                            the longest match, try backtracking.  */
4915                         if (d != end_match_2) {
4916                                 same_str_p = (FIRST_STRING_P(match_end)
4917                                               == MATCHING_IN_FIRST_STRING);
4918
4919                                 /* AIX compiler got confused when this was
4920                                    combined with the previous declaration.  */
4921                                 if (same_str_p)
4922                                         best_match_p = d > match_end;
4923                                 else
4924                                         best_match_p =
4925                                             !MATCHING_IN_FIRST_STRING;
4926
4927                                 DEBUG_PRINT1("backtracking.\n");
4928
4929                                 if (!FAIL_STACK_EMPTY()) {
4930                                         /* More failure points to try.  */
4931
4932                                         /* If exceeds best match so far, save
4933                                            it.  */
4934                                         if (!best_regs_set || best_match_p) {
4935                                                 best_regs_set = true;
4936                                                 match_end = d;
4937
4938                                                 DEBUG_PRINT1
4939                                                     ("\nSAVING match as best so far.\n");
4940
4941                                                 for (mcnt = 1; mcnt < num_regs;
4942                                                      mcnt++) {
4943                                                         best_regstart[mcnt] =
4944                                                             regstart[mcnt];
4945                                                         best_regend[mcnt] =
4946                                                             regend[mcnt];
4947                                                 }
4948                                         }
4949                                         goto fail;
4950                                 }
4951
4952                                 /* If no failure points, don't restore garbage.
4953                                    And if last match is real best match, don't
4954                                    restore second best one. */
4955                                 else if (best_regs_set && !best_match_p) {
4956                                       restore_best_regs:
4957                                         /* Restore best match.  It may happen
4958                                            that `dend == end_match_1' while the
4959                                            restored d is in string2.  For
4960                                            example, the pattern `x.*y.*z'
4961                                            against the strings `x-' and `y-z-',
4962                                            if the two strings are not
4963                                            consecutive in memory.  */
4964                                         DEBUG_PRINT1
4965                                             ("Restoring best registers.\n");
4966
4967                                         d = match_end;
4968                                         dend = ((d >= string1 && d <= end1)
4969                                                 ? end_match_1 : end_match_2);
4970
4971                                         for (mcnt = 1; mcnt < num_regs; mcnt++) {
4972                                                 regstart[mcnt] =
4973                                                     best_regstart[mcnt];
4974                                                 regend[mcnt] =
4975                                                     best_regend[mcnt];
4976                                         }
4977                                 }
4978                         }
4979                         /* d != end_match_2 */
4980                 succeed_label:
4981                         DEBUG_PRINT1("Accepting match.\n");
4982                         {
4983                                 int num_nonshy_regs = bufp->re_nsub + 1;
4984                                 /* If caller wants register contents data back,
4985                                    fill REGS.  */
4986                                 if (regs && !bufp->no_sub) {
4987                                         /* Have the register data arrays been
4988                                            allocated?  */
4989                                         if (bufp->regs_allocated == REGS_UNALLOCATED) {
4990                                           /* No.  So allocate them with malloc.
4991                                              We need one extra element beyond
4992                                              `num_regs' for the `-1' marker GNU
4993                                              code uses.  */
4994                                                 regs->num_regs = MAX(RE_NREGS, num_nonshy_regs + 1);
4995                                                 regs->start = TALLOC(regs->num_regs, regoff_t);
4996                                                 regs->end = TALLOC(regs->num_regs, regoff_t);
4997                                                 if (regs->start == NULL || regs->end == NULL) {
4998                                                         FREE_VARIABLES ();
4999                                                         return -2;
5000                                                 }
5001                                                 bufp->regs_allocated = REGS_REALLOCATE;
5002                                         } else if (bufp->regs_allocated == REGS_REALLOCATE) {
5003                                           /* Yes.  If we need more elements than were already
5004                                              allocated, reallocate them.  If we need fewer, just
5005                                              leave it alone.  */
5006                                                 if (regs->num_regs < num_nonshy_regs + 1) {
5007                                                         regs->num_regs = num_nonshy_regs + 1;
5008                                                         RETALLOC(regs->start, regs->num_regs, regoff_t);
5009                                                         RETALLOC(regs->end, regs->num_regs, regoff_t);
5010                                                         if (regs->start == NULL || regs->end == NULL) {
5011                                                                 FREE_VARIABLES();
5012                                                                 return -2;
5013                                                         }
5014                                                 }
5015                                         } else {
5016                                                 /* The braces fend off a "empty body in an else-statement"
5017                                                    warning under GCC when assert expands to nothing.  */
5018                                                 assert (bufp->regs_allocated == REGS_FIXED);
5019                                         }
5020
5021                                         /* Convert the pointer data in `regstart' and `regend' to
5022                                            indices.  Register zero has to be set differently,
5023                                            since we haven't kept track of any info for it.  */
5024                                         if (regs->num_regs > 0) {
5025                                                 regs->start[0] = pos;
5026                                                 regs->end[0] = (MATCHING_IN_FIRST_STRING
5027                                                                 ? ((regoff_t) (d - string1))
5028                                                                 : ((regoff_t) (d - string2 + size1)));
5029                                         }
5030
5031                                         /* Map over the NUM_NONSHY_REGS non-shy internal registers.
5032                                            Copy each into the corresponding external register.
5033                                            N.B. MCNT indexes external registers. */
5034                                         for (mcnt = 1;
5035                                              mcnt < MIN (num_nonshy_regs, regs->num_regs);
5036                                              mcnt++) {
5037                                                 int ireg = bufp->external_to_internal_register[mcnt];
5038
5039                                                 if (REG_UNSET (regstart[ireg]) || REG_UNSET (regend[ireg]))
5040                                                         regs->start[mcnt] = regs->end[mcnt] = -1;
5041                                                 else {
5042                                                         regs->start[mcnt]
5043                                                                 = (regoff_t) POINTER_TO_OFFSET (regstart[ireg]);
5044                                                         regs->end[mcnt]
5045                                                                 = (regoff_t) POINTER_TO_OFFSET (regend[ireg]);
5046                                                 }
5047                                         }
5048                                 } /* regs && !bufp->no_sub */
5049
5050                                 /* If we have regs and the regs structure has
5051                                    more elements than were in the pattern, set
5052                                    the extra elements to -1.  If we
5053                                    (re)allocated the registers, this is the
5054                                    case, because we always allocate enough to
5055                                    have at least one -1 at the end.
5056
5057                                    We do this even when no_sub is set because
5058                                    some applications (XEmacs) reuse register
5059                                    structures which may contain stale
5060                                    information, and permit attempts to access
5061                                    those registers.
5062
5063                                    It would be possible to require the caller to
5064                                    do this, but we'd have to change the API for
5065                                    this function to reflect that, and audit all
5066                                    callers. */
5067                                 if (regs && regs->num_regs > 0)
5068                                         for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++)
5069                                                 regs->start[mcnt] = regs->end[mcnt] = -1;
5070                         }
5071
5072                         DEBUG_PRINT4("%u failure points pushed, %u popped (%u remain).\n",
5073                                       nfailure_points_pushed, nfailure_points_popped,
5074                                       nfailure_points_pushed - nfailure_points_popped);
5075                         DEBUG_PRINT2("%u registers pushed.\n", num_regs_pushed);
5076
5077                         mcnt = d - pos - (MATCHING_IN_FIRST_STRING
5078                                           ? string1
5079                                           : string2 - size1);
5080
5081                         DEBUG_PRINT2("Returning %d from re_match_2.\n", mcnt);
5082
5083                         FREE_VARIABLES();
5084                         return mcnt;
5085                 }
5086
5087                 /* Otherwise match next pattern command.  */
5088                 switch (SWITCH_ENUM_CAST((re_opcode_t) *p++)) {
5089                         /* Ignore these.  Used to ignore the n of succeed_n's
5090                            which currently have n == 0.  */
5091                 case no_op:
5092                         DEBUG_PRINT1("EXECUTING no_op.\n");
5093                         break;
5094
5095                 case succeed:
5096                         DEBUG_PRINT1("EXECUTING succeed.\n");
5097                         goto succeed_label;
5098
5099                         /* Match the next n pattern characters exactly.  The
5100                            following byte in the pattern defines n, and the n
5101                            bytes after that are the characters to match.  */
5102                 case exactn:
5103                         mcnt = *p++;
5104                         DEBUG_PRINT2("EXECUTING exactn %d.\n", mcnt);
5105
5106                         /* This is written out as an if-else so we don't waste
5107                            time testing `translate' inside the loop.  */
5108                         if (TRANSLATE_P(translate)) {
5109                                 do {
5110 #ifdef MULE
5111                                         Emchar pat_ch, buf_ch;
5112                                         Bytecount pat_len;
5113
5114                                         REGEX_PREFETCH();
5115                                         pat_ch = charptr_emchar(p);
5116                                         buf_ch = charptr_emchar(d);
5117                                         if (RE_TRANSLATE(buf_ch) != pat_ch)
5118                                                 goto fail;
5119
5120                                         pat_len = charcount_to_bytecount(p, 1);
5121                                         p += pat_len;
5122                                         INC_CHARPTR(d);
5123
5124                                         mcnt -= pat_len;
5125 #else                           /* not MULE */
5126                                         REGEX_PREFETCH();
5127                                         if ((unsigned char)RE_TRANSLATE(*d++) !=
5128                                             *p++)
5129                                                 goto fail;
5130                                         mcnt--;
5131 #endif
5132                                 }
5133                                 while (mcnt > 0);
5134                         } else {
5135                                 do {
5136                                         REGEX_PREFETCH();
5137                                         if (*d++ != *p++)
5138                                                 goto fail;
5139                                 }
5140                                 while (--mcnt);
5141                         }
5142                         SET_REGS_MATCHED();
5143                         break;
5144
5145                         /* Match any character except possibly a newline or a
5146                            null.  */
5147                 case anychar:
5148                         DEBUG_PRINT1("EXECUTING anychar.\n");
5149
5150                         REGEX_PREFETCH();
5151
5152                         if ((!(bufp->syntax & RE_DOT_NEWLINE)
5153                              && TRANSLATE(*d) == '\n')
5154                             || (bufp->syntax & RE_DOT_NOT_NULL
5155                                 && TRANSLATE(*d) == '\000'))
5156                                 goto fail;
5157
5158                         SET_REGS_MATCHED();
5159                         DEBUG_PRINT2("  Matched `%d'.\n", *d);
5160                         INC_CHARPTR(d); /* XEmacs change */
5161                         break;
5162
5163                 case charset:
5164                 case charset_not: {
5165                         REGISTER unsigned char c;
5166                         re_bool not_p =
5167                                 (re_opcode_t) * (p - 1) == charset_not;
5168
5169                         DEBUG_PRINT2("EXECUTING charset%s.\n",
5170                                      not_p ? "_not" : "");
5171
5172                         REGEX_PREFETCH();
5173                         /* The character to match.  */
5174                         c = TRANSLATE(*d);
5175
5176                         /* Cast to `unsigned' instead of `unsigned char'
5177                            in case the bit list is a full 32 bytes
5178                            long.  */
5179                         if (c < (unsigned)(*p * BYTEWIDTH)
5180                             && p[1 +
5181                                  c / BYTEWIDTH] & (1 << (c %
5182                                                          BYTEWIDTH)))
5183                                 not_p = !not_p;
5184
5185                         p += 1 + *p;
5186
5187                         if (!not_p)
5188                                 goto fail;
5189
5190                         SET_REGS_MATCHED();
5191                         INC_CHARPTR(d); /* XEmacs change */
5192                         break;
5193                 }
5194
5195 #ifdef MULE
5196                 case charset_mule:
5197                 case charset_mule_not: {
5198                         REGISTER Emchar c;
5199                         re_bool not_p =
5200                                 (re_opcode_t) * (p - 1) == charset_mule_not;
5201
5202                         DEBUG_PRINT2("EXECUTING charset_mule%s.\n",
5203                                      not_p ? "_not" : "");
5204
5205                         REGEX_PREFETCH();
5206                         c = charptr_emchar((const Bufbyte *)d);
5207                         /* The character to match.  */
5208                         c = TRANSLATE(c);
5209
5210                         if (EQ
5211                             (Qt,
5212                              unified_range_table_lookup(p, c, Qnil)))
5213                                 not_p = !not_p;
5214
5215                         p += unified_range_table_bytes_used(p);
5216
5217                         if (!not_p)
5218                                 goto fail;
5219
5220                         SET_REGS_MATCHED();
5221                         INC_CHARPTR(d);
5222                         break;
5223                 }
5224 #endif                          /* MULE */
5225
5226                         /* The beginning of a group is represented by
5227                            start_memory.  The arguments are the register number
5228                            in the next byte, and the number of groups inner to
5229                            this one in the next.  The text matched within the
5230                            group is recorded (in the internal registers data
5231                            structure) under the register number.  */
5232                 case start_memory:
5233                         DEBUG_PRINT3("EXECUTING start_memory %d (%d):\n", *p,
5234                                      p[1]);
5235
5236                         /* Find out if this group can match the empty string.  */
5237                         p1 = p; /* To send to group_match_null_string_p.  */
5238
5239                         if (REG_MATCH_NULL_STRING_P(reg_info[*p]) ==
5240                             MATCH_NULL_UNSET_VALUE)
5241                                 REG_MATCH_NULL_STRING_P(reg_info[*p])
5242                                     = group_match_null_string_p(&p1, pend,
5243                                                                 reg_info);
5244
5245                         /* Save the position in the string where we were the
5246                            last time we were at this open-group operator in case
5247                            the group is operated upon by a repetition operator,
5248                            e.g., with `(a*)*b' against `ab'; then we want to
5249                            ignore where we are now in the string in case this
5250                            attempt to match fails.  */
5251                         old_regstart[*p] = REG_MATCH_NULL_STRING_P(reg_info[*p])
5252                             ? REG_UNSET(regstart[*p]) ? d : regstart[*p]
5253                             : regstart[*p];
5254                         DEBUG_PRINT2("  old_regstart: %d\n",
5255                                      POINTER_TO_OFFSET(old_regstart[*p]));
5256
5257                         regstart[*p] = d;
5258                         DEBUG_PRINT2("  regstart: %d\n",
5259                                      POINTER_TO_OFFSET(regstart[*p]));
5260
5261                         IS_ACTIVE(reg_info[*p]) = 1;
5262                         MATCHED_SOMETHING(reg_info[*p]) = 0;
5263
5264                         /* Clear this whenever we change the register activity
5265                            status.  */
5266                         set_regs_matched_done = 0;
5267
5268                         /* This is the new highest active register.  */
5269                         highest_active_reg = *p;
5270
5271                         /* If nothing was active before, this is the new lowest
5272                            active register.  */
5273                         if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
5274                                 lowest_active_reg = *p;
5275
5276                         /* Move past the register number and inner group
5277                            count.  */
5278                         p += 2;
5279                         just_past_start_mem = p;
5280
5281                         break;
5282
5283                         /* The stop_memory opcode represents the end of a group.
5284                            Its arguments are the same as start_memory's: the
5285                            register number, and the number of inner groups.  */
5286                 case stop_memory:
5287                         DEBUG_PRINT3("EXECUTING stop_memory %d (%d):\n", *p,
5288                                      p[1]);
5289
5290                         /* We need to save the string position the last time we
5291                            were at this close-group operator in case the group
5292                            is operated upon by a repetition operator, e.g., with
5293                            `((a*)*(b*)*)*' against `aba'; then we want to ignore
5294                            where we are now in the string in case this attempt
5295                            to match fails.  */
5296                         old_regend[*p] = REG_MATCH_NULL_STRING_P(reg_info[*p])
5297                             ? REG_UNSET(regend[*p]) ? d : regend[*p]
5298                             : regend[*p];
5299                         DEBUG_PRINT2("      old_regend: %d\n",
5300                                      POINTER_TO_OFFSET(old_regend[*p]));
5301
5302                         regend[*p] = d;
5303                         DEBUG_PRINT2("      regend: %d\n",
5304                                      POINTER_TO_OFFSET(regend[*p]));
5305
5306                         /* This register isn't active anymore.  */
5307                         IS_ACTIVE(reg_info[*p]) = 0;
5308
5309                         /* Clear this whenever we change the register activity
5310                            status.  */
5311                         set_regs_matched_done = 0;
5312
5313                         /* If this was the only register active, nothing is
5314                            active anymore.  */
5315                         if (lowest_active_reg == highest_active_reg) {
5316                                 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
5317                                 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
5318                         } else {        /* We must scan for the new highest
5319                                            active register, since it isn't
5320                                            necessarily one less than now:
5321                                            consider (a(b)c(d(e)f)g).  When group
5322                                            3 ends, after the f), the new highest
5323                                            active register is 1.  */
5324                                 unsigned char r = *p - 1;
5325                                 while (r > 0 && !IS_ACTIVE(reg_info[r]))
5326                                         r--;
5327
5328                                 /* If we end up at register zero, that means
5329                                    that we saved the registers as the result of
5330                                    an `on_failure_jump', not a `start_memory',
5331                                    and we jumped to past the innermost
5332                                    `stop_memory'.  For example, in ((.)*) we
5333                                    save registers 1 and 2 as a result of the *,
5334                                    but when we pop back to the second ), we are
5335                                    at the stop_memory 1.  Thus, nothing is
5336                                    active.  */
5337                                 if (r == 0) {
5338                                         lowest_active_reg =
5339                                             NO_LOWEST_ACTIVE_REG;
5340                                         highest_active_reg =
5341                                             NO_HIGHEST_ACTIVE_REG;
5342                                 } else {
5343                                         highest_active_reg = r;
5344
5345                                         /* 98/9/21 jhod: We've also gotta set
5346                                            lowest_active_reg, don't we? */
5347                                         r = 1;
5348                                         while (r < highest_active_reg
5349                                                && !IS_ACTIVE(reg_info[r]))
5350                                                 r++;
5351                                         lowest_active_reg = r;
5352                                 }
5353                         }
5354
5355                         /* If just failed to match something this time around
5356                            with a group that's operated on by a repetition
5357                            operator, try to force exit from the ``loop'', and
5358                            restore the register information for this group that
5359                            we had before trying this last match.  */
5360                         if ((!MATCHED_SOMETHING(reg_info[*p])
5361                              || just_past_start_mem == p - 1)
5362                             && (p + 2) < pend) {
5363                                 re_bool is_a_jump_n = false;
5364
5365                                 p1 = p + 2;
5366                                 mcnt = 0;
5367                                 switch ((unsigned int)(re_opcode_t)*p1++) {
5368                                 case jump_n:
5369                                         is_a_jump_n = true;
5370                                 case pop_failure_jump:
5371                                 case maybe_pop_jump:
5372                                 case jump:
5373                                 case dummy_failure_jump:
5374                                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
5375                                         if (is_a_jump_n)
5376                                                 p1 += 2;
5377                                         break;
5378
5379                                 default:
5380                                         /* do nothing */ ;
5381                                 }
5382                                 p1 += mcnt;
5383
5384                                 /* If the next operation is a jump backwards in
5385                                    the pattern to an on_failure_jump right
5386                                    before the start_memory corresponding to this
5387                                    stop_memory, exit from the loop by forcing a
5388                                    failure after pushing on the stack the
5389                                    on_failure_jump's jump in the pattern, and
5390                                    d.  */
5391                                 if (mcnt < 0
5392                                     && (re_opcode_t) * p1 == on_failure_jump
5393                                     && (re_opcode_t) p1[3] == start_memory
5394                                     && p1[4] == *p) {
5395                                         /* If this group ever matched anything,
5396                                            then restore what its registers were
5397                                            before trying this last failed match,
5398                                            e.g., with `(a*)*b' against `ab' for
5399                                            regstart[1], and, e.g., with
5400                                            `((a*)*(b*)*)*' against `aba' for
5401                                            regend[3].
5402
5403                                            Also restore the registers for inner
5404                                            groups for, e.g., `((a*)(b*))*'
5405                                            against `aba' (register 3 would
5406                                            otherwise get trashed).  */
5407
5408                                         if (EVER_MATCHED_SOMETHING
5409                                             (reg_info[*p])) {
5410                                                 int r;
5411
5412                                                 EVER_MATCHED_SOMETHING(reg_info
5413                                                                        [*p]) =
5414                                                     0;
5415
5416                                                 /* Restore this and inner
5417                                                    groups' (if any)
5418                                                    registers.  */
5419                                                 for (r = *p; r < *p + *(p + 1);
5420                                                      r++) {
5421                                                         regstart[r] =
5422                                                             old_regstart[r];
5423
5424                                                         /* xx why this test?  */
5425                                                         if (old_regend[r] >=
5426                                                             regstart[r])
5427                                                                 regend[r] =
5428                                                                     old_regend
5429                                                                     [r];
5430                                                 }
5431                                         }
5432                                         p1++;
5433                                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
5434                                         PUSH_FAILURE_POINT(p1 + mcnt, d, -2);
5435
5436                                         goto fail;
5437                                 }
5438                         }
5439
5440                         /* Move past the register number and the inner group
5441                            count.  */
5442                         p += 2;
5443                         break;
5444
5445                         /* \<digit> has been turned into a `duplicate' command
5446                            which is followed by the numeric value of <digit> as
5447                            the register number.  (Already passed through
5448                            external-to-internal-register mapping, so it refers
5449                            to the actual group number, not the non-shy-only
5450                            numbering used in the external world.) */
5451                 case duplicate:
5452                         {
5453                                 REGISTER re_char *d2, *dend2;
5454                                 /* Get which register to match against.  */
5455                                 int regno = *p++;
5456                                 DEBUG_PRINT2("EXECUTING duplicate %d.\n",
5457                                              regno);
5458
5459                                 /* Can't back reference a group which we've
5460                                    never matched.  */
5461                                 if (REG_UNSET(regstart[regno])
5462                                     || REG_UNSET(regend[regno]))
5463                                         goto fail;
5464
5465                                 /* Where in input to try to start matching.  */
5466                                 d2 = regstart[regno];
5467
5468                                 /* Where to stop matching; if both the place to
5469                                    start and the place to stop matching are in
5470                                    the same string, then set to the place to
5471                                    stop, otherwise, for now have to use the end
5472                                    of the first string.  */
5473
5474                                 dend2 = ((FIRST_STRING_P(regstart[regno])
5475                                           == FIRST_STRING_P(regend[regno]))
5476                                          ? regend[regno] : end_match_1);
5477                                 for (;;) {
5478                                         /* If necessary, advance to next segment
5479                                            in register contents.  */
5480                                         while (d2 == dend2) {
5481                                                 if (dend2 == end_match_2)
5482                                                         break;
5483                                                 if (dend2 == regend[regno])
5484                                                         break;
5485
5486                                                 /* End of string1 => advance to
5487                                                    string2. */
5488                                                 d2 = string2;
5489                                                 dend2 = regend[regno];
5490                                         }
5491                                         /* At end of register contents =>
5492                                            success */
5493                                         if (d2 == dend2)
5494                                                 break;
5495
5496                                         /* If necessary, advance to next segment
5497                                            in data.  */
5498                                         REGEX_PREFETCH();
5499
5500                                         /* How many characters left in this segment to match.  */
5501                                         mcnt = dend - d;
5502
5503                                         /* Want how many consecutive characters
5504                                            we can match in one shot, so, if
5505                                            necessary, adjust the count.  */
5506                                         if (mcnt > dend2 - d2)
5507                                                 mcnt = dend2 - d2;
5508
5509                                         /* Compare that many; failure if
5510                                            mismatch, else move past them.  */
5511                                         if (TRANSLATE_P(translate)
5512                                             ? bcmp_translate(
5513                                                     (const unsigned char*)d,
5514                                                     (const unsigned char*)d2,
5515                                                     mcnt, translate)
5516                                             : memcmp(d, d2, mcnt)) {
5517                                                 goto fail;
5518                                         }
5519                                         d += mcnt, d2 += mcnt;
5520
5521                                         /* Do this because we've match some
5522                                            characters.  */
5523                                         SET_REGS_MATCHED();
5524                                 }
5525                         }
5526                         break;
5527
5528                         /* begline matches the empty string at the beginning of
5529                            the string (unless `not_bol' is set in `bufp'), and,
5530                            if `newline_anchor' is set, after newlines.  */
5531                 case begline:
5532                         DEBUG_PRINT1("EXECUTING begline.\n");
5533
5534                         if (AT_STRINGS_BEG(d)) {
5535                                 if (!bufp->not_bol)
5536                                         break;
5537                         } else if (d[-1] == '\n' && bufp->newline_anchor) {
5538                                 break;
5539                         }
5540                         /* In all other cases, we fail.  */
5541                         goto fail;
5542
5543                         /* endline is the dual of begline.  */
5544                 case endline:
5545                         DEBUG_PRINT1("EXECUTING endline.\n");
5546
5547                         if (AT_STRINGS_END(d)) {
5548                                 if (!bufp->not_eol)
5549                                         break;
5550                         }
5551
5552                         /* We have to ``prefetch'' the next character.  */
5553                         else if ((d == end1 ? *string2 : *d) == '\n'
5554                                  && bufp->newline_anchor) {
5555                                 break;
5556                         }
5557                         goto fail;
5558
5559                         /* Match at the very beginning of the data.  */
5560                 case begbuf:
5561                         DEBUG_PRINT1("EXECUTING begbuf.\n");
5562                         if (AT_STRINGS_BEG(d))
5563                                 break;
5564                         goto fail;
5565
5566                         /* Match at the very end of the data.  */
5567                 case endbuf:
5568                         DEBUG_PRINT1("EXECUTING endbuf.\n");
5569                         if (AT_STRINGS_END(d))
5570                                 break;
5571                         goto fail;
5572
5573                         /* on_failure_keep_string_jump is used to optimize
5574                            `.*\n'.  It pushes NULL as the value for the string
5575                            on the stack.  Then `pop_failure_point' will keep the
5576                            current value for the string, instead of restoring
5577                            it.  To see why, consider matching `foo\nbar' against
5578                            `.*\n'.  The .* matches the foo; then the . fails
5579                            against the \n.  But the next thing we want to do is
5580                            match the \n against the \n; if we restored the
5581                            string value, we would be back at the foo.
5582
5583                            Because this is used only in specific cases, we don't
5584                            need to check all the things that `on_failure_jump'
5585                            does, to make sure the right things get saved on the
5586                            stack.  Hence we don't share its code.  The only
5587                            reason to push anything on the stack at all is that
5588                            otherwise we would have to change `anychar's code to
5589                            do something besides goto fail in this case; that
5590                            seems worse than this.  */
5591                 case on_failure_keep_string_jump:
5592                         DEBUG_PRINT1("EXECUTING on_failure_keep_string_jump");
5593
5594                         EXTRACT_NUMBER_AND_INCR(mcnt, p);
5595                         DEBUG_PRINT3(" %d (to 0x%lx):\n", mcnt,
5596                                      (long)(p + mcnt));
5597
5598                         PUSH_FAILURE_POINT(p + mcnt, (unsigned char *)0, -2);
5599                         break;
5600
5601                         /* Uses of on_failure_jump:
5602
5603                            Each alternative starts with an on_failure_jump that
5604                            points to the beginning of the next alternative.
5605                            Each alternative except the last ends with a jump
5606                            that in effect jumps past the rest of the
5607                            alternatives.  (They really jump to the ending jump
5608                            of the following alternative, because tensioning
5609                            these jumps is a hassle.)
5610
5611                            Repeats start with an on_failure_jump that points
5612                            past both the repetition text and either the
5613                            following jump or pop_failure_jump back to this
5614                            on_failure_jump.  */
5615                 case on_failure_jump:
5616                       on_failure:
5617                         DEBUG_PRINT1("EXECUTING on_failure_jump");
5618
5619                         EXTRACT_NUMBER_AND_INCR(mcnt, p);
5620                         DEBUG_PRINT3(" %d (to 0x%lx)", mcnt, (long)(p + mcnt));
5621
5622                         /* If this on_failure_jump comes right before a group
5623                            (i.e., the original * applied to a group), save the
5624                            information for that group and all inner ones, so
5625                            that if we fail back to this point, the group's
5626                            information will be correct.  For example, in
5627                            \(a*\)*\1, we need the preceding group, and in
5628                            \(\(a*\)b*\)\2, we need the inner group.  */
5629
5630                         /* We can't use `p' to check ahead because we push
5631                            a failure point to `p + mcnt' after we do this.  */
5632                         p1 = p;
5633
5634                         /* We need to skip no_op's before we look for the
5635                            start_memory in case this on_failure_jump is
5636                            happening as the result of a completed succeed_n, as
5637                            in \(a\)\{1,3\}b\1 against aba.  */
5638                         while (p1 < pend && (re_opcode_t) * p1 == no_op)
5639                                 p1++;
5640
5641                         if (p1 < pend && (re_opcode_t) * p1 == start_memory) {
5642                                 /* We have a new highest active register now.
5643                                    This will get reset at the start_memory we
5644                                    are about to get to, but we will have saved
5645                                    all the registers relevant to this repetition
5646                                    op, as described above.  */
5647                                 highest_active_reg = *(p1 + 1) + *(p1 + 2);
5648                                 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
5649                                         lowest_active_reg = *(p1 + 1);
5650                         }
5651
5652                         DEBUG_PRINT1(":\n");
5653                         PUSH_FAILURE_POINT(p + mcnt, d, -2);
5654                         break;
5655
5656                         /* A smart repeat ends with `maybe_pop_jump'.
5657                            We change it to either `pop_failure_jump' or `jump'.  */
5658                 case maybe_pop_jump:
5659                         EXTRACT_NUMBER_AND_INCR(mcnt, p);
5660                         DEBUG_PRINT2("EXECUTING maybe_pop_jump %d.\n", mcnt);
5661                         {
5662                                 REGISTER const unsigned char *p2 = p;
5663
5664                                 /* Compare the beginning of the repeat with what
5665                                    in the pattern follows its end. If we can
5666                                    establish that there is nothing that they
5667                                    would both match, i.e., that we would have to
5668                                    backtrack because of (as in, e.g., `a*a')
5669                                    then we can change to pop_failure_jump,
5670                                    because we'll never have to backtrack.
5671
5672                                    This is not true in the case of alternatives:
5673                                    in `(a|ab)*' we do need to backtrack to the
5674                                    `ab' alternative (e.g., if the string was
5675                                    `ab').  But instead of trying to detect that
5676                                    here, the alternative has put on a dummy
5677                                    failure point which is what we will end up
5678                                    popping.  */
5679
5680                                 /* Skip over open/close-group commands.  If what
5681                                    follows this loop is a ...+ construct, look
5682                                    at what begins its body, since we will have
5683                                    to match at least one of that.  */
5684                                 while (1) {
5685                                         if (p2 + 2 < pend
5686                                             && ((re_opcode_t) * p2 ==
5687                                                 stop_memory
5688                                                 || (re_opcode_t) * p2 ==
5689                                                 start_memory))
5690                                                 p2 += 3;
5691                                         else if (p2 + 6 < pend
5692                                                  && (re_opcode_t) * p2 ==
5693                                                  dummy_failure_jump)
5694                                                 p2 += 6;
5695                                         else
5696                                                 break;
5697                                 }
5698
5699                                 p1 = p + mcnt;
5700                                 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
5701                                    to the `maybe_finalize_jump' of this case.  Examine what
5702                                    follows.  */
5703
5704                                 /* If we're at the end of the pattern, we can change.  */
5705                                 if (p2 == pend) {
5706                                         /* Consider what happens when matching ":\(.*\)"
5707                                            against ":/".  I don't really understand this code
5708                                            yet.  */
5709                                         p[-3] = (unsigned char)pop_failure_jump;
5710                                         DEBUG_PRINT1
5711                                             ("  End of pattern: change to `pop_failure_jump'.\n");
5712                                 }
5713
5714                                 else if ((re_opcode_t) * p2 == exactn
5715                                          || (bufp->newline_anchor
5716                                              && (re_opcode_t) * p2 ==
5717                                              endline)) {
5718                                         REGISTER unsigned char c =
5719                                             *p2 ==
5720                                             (unsigned char)endline ? '\n' :
5721                                             p2[2];
5722
5723                                         if ((re_opcode_t) p1[3] == exactn
5724                                             && p1[5] != c) {
5725                                                 p[-3] =
5726                                                     (unsigned char)
5727                                                     pop_failure_jump;
5728                                                 DEBUG_PRINT3
5729                                                     ("  %c != %c => pop_failure_jump.\n",
5730                                                      c, p1[5]);
5731                                         }
5732
5733                                         else if ((re_opcode_t) p1[3] == charset
5734                                                  || (re_opcode_t) p1[3] ==
5735                                                  charset_not) {
5736                                                 int not_p =
5737                                                     (re_opcode_t) p1[3] ==
5738                                                     charset_not;
5739
5740                                                 if (c <
5741                                                     (unsigned char)(p1[4] *
5742                                                                     BYTEWIDTH)
5743                                                     && p1[5 +
5744                                                           c /
5745                                                           BYTEWIDTH] & (1 << (c
5746                                                                               %
5747                                                                               BYTEWIDTH)))
5748                                                         not_p = !not_p;
5749
5750                                                 /* `not_p' is equal to 1 if c would match, which means
5751                                                    that we can't change to pop_failure_jump.  */
5752                                                 if (!not_p) {
5753                                                         p[-3] =
5754                                                             (unsigned char)
5755                                                             pop_failure_jump;
5756                                                         DEBUG_PRINT1
5757                                                             ("  No match => pop_failure_jump.\n");
5758                                                 }
5759                                         }
5760                                 } else if ((re_opcode_t) * p2 == charset) {
5761 #ifdef REGEX_DEBUG_FLAG
5762                                         REGISTER unsigned char c
5763                                             =
5764                                             *p2 ==
5765                                             (unsigned char)endline ? '\n' :
5766                                             p2[2];
5767 #endif
5768
5769                                         if ((re_opcode_t) p1[3] == exactn
5770                                             && !((int)p2[1] * BYTEWIDTH >
5771                                                  (int)p1[5]
5772                                                  && (p2[2 + p1[5] / BYTEWIDTH]
5773                                                      & (1 <<
5774                                                         (p1[5] %
5775                                                          BYTEWIDTH))))) {
5776                                                 p[-3] =
5777                                                     (unsigned char)
5778                                                     pop_failure_jump;
5779                                                 DEBUG_PRINT3
5780                                                     ("  %c != %c => pop_failure_jump.\n",
5781                                                      c, p1[5]);
5782                                         }
5783
5784                                         else if ((re_opcode_t) p1[3] ==
5785                                                  charset_not) {
5786                                                 int idx;
5787                                                 /* We win if the charset_not inside the loop
5788                                                    lists every character listed in the charset after.  */
5789                                                 for (idx = 0; idx < (int)p2[1];
5790                                                      idx++)
5791                                                         if (!
5792                                                             (p2[2 + idx] == 0
5793                                                              || (idx <
5794                                                                  (int)p1[4]
5795                                                                  &&
5796                                                                  ((p2[2 + idx] &
5797                                                                    ~p1[5 +
5798                                                                        idx]) ==
5799                                                                   0))))
5800                                                                 break;
5801
5802                                                 if (idx == p2[1]) {
5803                                                         p[-3] =
5804                                                             (unsigned char)
5805                                                             pop_failure_jump;
5806                                                         DEBUG_PRINT1
5807                                                             ("  No match => pop_failure_jump.\n");
5808                                                 }
5809                                         } else if ((re_opcode_t) p1[3] ==
5810                                                    charset) {
5811                                                 int idx;
5812                                                 /* We win if the charset inside the loop
5813                                                    has no overlap with the one after the loop.  */
5814                                                 for (idx = 0;
5815                                                      idx < (int)p2[1]
5816                                                      && idx < (int)p1[4]; idx++)
5817                                                         if ((p2[2 + idx] &
5818                                                              p1[5 + idx]) != 0)
5819                                                                 break;
5820
5821                                                 if (idx == p2[1]
5822                                                     || idx == p1[4]) {
5823                                                         p[-3] =
5824                                                             (unsigned char)
5825                                                             pop_failure_jump;
5826                                                         DEBUG_PRINT1
5827                                                             ("  No match => pop_failure_jump.\n");
5828                                                 }
5829                                         }
5830                                 }
5831                         }
5832                         p -= 2; /* Point at relative address again.  */
5833                         if ((re_opcode_t) p[-1] != pop_failure_jump) {
5834                                 p[-1] = (unsigned char)jump;
5835                                 DEBUG_PRINT1("  Match => jump.\n");
5836                                 goto unconditional_jump;
5837                         }
5838                         /* Note fall through.  */
5839
5840                         /* The end of a simple repeat has a pop_failure_jump
5841                            back to its matching on_failure_jump, where the
5842                            latter will push a failure point.  The
5843                            pop_failure_jump takes off failure points put on by
5844                            this pop_failure_jump's matching on_failure_jump; we
5845                            got through the pattern to here from the matching
5846                            on_failure_jump, so didn't fail.  */
5847                 case pop_failure_jump: {
5848                         /* We need to pass separate storage for the
5849                            lowest and highest registers, even though we
5850                            don't care about the actual values.
5851                            Otherwise, we will restore only one register
5852                            from the stack, since lowest will == highest
5853                            in `pop_failure_point'.  */
5854                         int dummy_low_reg, dummy_high_reg;
5855                         const unsigned char *pdummy = NULL;
5856                         const unsigned char *sdummy = NULL;
5857
5858                         DEBUG_PRINT1("EXECUTING pop_failure_jump.\n");
5859                         POP_FAILURE_POINT(sdummy, pdummy,
5860                                           dummy_low_reg, dummy_high_reg,
5861                                           reg_dummy, reg_dummy,
5862                                           reg_info_dummy);
5863                         SXE_SET_UNUSED(pdummy), SXE_SET_UNUSED(sdummy);
5864                 }
5865                         /* Note fall through.  */
5866
5867                         /* Unconditionally jump (without popping any failure
5868                            points).  */
5869                 case jump:
5870                 unconditional_jump:
5871                         /* Get the amount to jump. */
5872                         EXTRACT_NUMBER_AND_INCR(mcnt, p);
5873                         DEBUG_PRINT2("EXECUTING jump %d ", mcnt);
5874                         p += mcnt;      /* Do the jump.  */
5875                         DEBUG_PRINT2("(to 0x%lx).\n", (long)p);
5876                         break;
5877
5878                         /* We need this opcode so we can detect where alternatives end
5879                            in `group_match_null_string_p' et al.  */
5880                 case jump_past_alt:
5881                         DEBUG_PRINT1("EXECUTING jump_past_alt.\n");
5882                         goto unconditional_jump;
5883
5884                         /* Normally, the on_failure_jump pushes a failure point, which
5885                            then gets popped at pop_failure_jump.  We will end up at
5886                            pop_failure_jump, also, and with a pattern of, say, `a+', we
5887                            are skipping over the on_failure_jump, so we have to push
5888                            something meaningless for pop_failure_jump to pop.  */
5889                 case dummy_failure_jump:
5890                         DEBUG_PRINT1("EXECUTING dummy_failure_jump.\n");
5891                         /* It doesn't matter what we push for the string here.  What
5892                            the code at `fail' tests is the value for the pattern.  */
5893                         PUSH_FAILURE_POINT(NULL, NULL, -2);
5894                         goto unconditional_jump;
5895
5896                         /* At the end of an alternative, we need to push a dummy failure
5897                            point in case we are followed by a `pop_failure_jump', because
5898                            we don't want the failure point for the alternative to be
5899                            popped.  For example, matching `(a|ab)*' against `aab'
5900                            requires that we match the `ab' alternative.  */
5901                 case push_dummy_failure:
5902                         DEBUG_PRINT1("EXECUTING push_dummy_failure.\n");
5903                         /* See comments just above at `dummy_failure_jump' about the
5904                            two zeroes.  */
5905                         PUSH_FAILURE_POINT(NULL, NULL, -2);
5906                         break;
5907
5908                         /* Have to succeed matching what follows at least n times.
5909                            After that, handle like `on_failure_jump'.  */
5910                 case succeed_n:
5911                         EXTRACT_NUMBER(mcnt, p + 2);
5912                         DEBUG_PRINT2("EXECUTING succeed_n %d.\n", mcnt);
5913
5914                         assert(mcnt >= 0);
5915                         /* Originally, this is how many times we HAVE to succeed.  */
5916                         if (mcnt > 0) {
5917                                 mcnt--;
5918                                 p += 2;
5919                                 STORE_NUMBER_AND_INCR(p, mcnt);
5920                                 DEBUG_PRINT3("  Setting 0x%lx to %d.\n",
5921                                              (long)p, mcnt);
5922                         } else if (mcnt == 0) {
5923                                 DEBUG_PRINT2
5924                                     ("  Setting two bytes from 0x%lx to no_op.\n",
5925                                      (long)(p + 2));
5926                                 p[2] = (unsigned char)no_op;
5927                                 p[3] = (unsigned char)no_op;
5928                                 goto on_failure;
5929                         }
5930                         break;
5931
5932                 case jump_n:
5933                         EXTRACT_NUMBER(mcnt, p + 2);
5934                         DEBUG_PRINT2("EXECUTING jump_n %d.\n", mcnt);
5935
5936                         /* Originally, this is how many times we CAN jump.  */
5937                         if (mcnt) {
5938                                 mcnt--;
5939                                 STORE_NUMBER(p + 2, mcnt);
5940                                 goto unconditional_jump;
5941                         }
5942                         /* If don't have to jump any more, skip over the rest of command.  */
5943                         else
5944                                 p += 4;
5945                         break;
5946
5947                 case set_number_at:
5948                         {
5949                                 DEBUG_PRINT1("EXECUTING set_number_at.\n");
5950
5951                                 EXTRACT_NUMBER_AND_INCR(mcnt, p);
5952                                 p1 = p + mcnt;
5953                                 EXTRACT_NUMBER_AND_INCR(mcnt, p);
5954                                 DEBUG_PRINT3("  Setting 0x%lx to %d.\n",
5955                                              (long)p1, mcnt);
5956                                 STORE_NUMBER(p1, mcnt);
5957                                 break;
5958                         }
5959
5960                 case wordbound:
5961                         DEBUG_PRINT1("EXECUTING wordbound.\n");
5962                         should_succeed = 1;
5963                       matchwordbound:
5964                         {
5965                                 /* XEmacs change */
5966                                 /* Straightforward and (I hope) correct implementation.
5967                                    Probably should be optimized by arranging to compute
5968                                    pos only once. */
5969                                 /* emch1 is the character before d, syn1 is the syntax of
5970                                    emch1, emch2 is the character at d, and syn2 is the
5971                                    syntax of emch2. */
5972                                 Emchar emch1, emch2;
5973                                 /* GCC isn't smart enough to see these are initialized if used. */
5974                                 int syn1 = 0, syn2 = 0;
5975                                 re_char *d_before, *d_after;
5976                                 int result,
5977                                     at_beg = AT_STRINGS_BEG(d),
5978                                     at_end = AT_STRINGS_END(d);
5979 #ifdef emacs
5980                                 int xpos;
5981 #endif
5982
5983                                 if (at_beg && at_end) {
5984                                         result = 0;
5985                                 } else {
5986                                         if (!at_beg) {
5987                                                 d_before =
5988                                                     POS_BEFORE_GAP_UNSAFE(d);
5989                                                 DEC_CHARPTR(d_before);
5990                                                 emch1 =
5991                                                     charptr_emchar(d_before);
5992 #ifdef emacs
5993                                                 xpos =
5994                                                     SYNTAX_CACHE_BYTE_TO_CHAR
5995                                                     (PTR_TO_OFFSET(d)) - 1;
5996                                                 UPDATE_SYNTAX_CACHE(xpos);
5997 #endif
5998                                                 syn1 = SYNTAX_FROM_CACHE
5999                                                     (XCHAR_TABLE
6000                                                      (regex_emacs_buffer->
6001                                                       mirror_syntax_table),
6002                                                      emch1);
6003                                         }
6004                                         if (!at_end) {
6005                                                 d_after =
6006                                                     POS_AFTER_GAP_UNSAFE(d);
6007                                                 emch2 = charptr_emchar(d_after);
6008 #ifdef emacs
6009                                                 xpos =
6010                                                     SYNTAX_CACHE_BYTE_TO_CHAR
6011                                                     (PTR_TO_OFFSET(d));
6012                                                 UPDATE_SYNTAX_CACHE_FORWARD(xpos
6013                                                                             +
6014                                                                             1);
6015 #endif
6016                                                 syn2 = SYNTAX_FROM_CACHE
6017                                                     (XCHAR_TABLE
6018                                                      (regex_emacs_buffer->
6019                                                       mirror_syntax_table),
6020                                                      emch2);
6021                                         }
6022
6023                                         if (at_beg)
6024                                                 result = (syn2 == Sword);
6025                                         else if (at_end)
6026                                                 result = (syn1 == Sword);
6027                                         else
6028                                                 result =
6029                                                     ((syn1 == Sword) !=
6030                                                      (syn2 == Sword));
6031                                 }
6032
6033                                 if (result == should_succeed)
6034                                         break;
6035                                 goto fail;
6036                         }
6037
6038                 case notwordbound:
6039                         DEBUG_PRINT1("EXECUTING notwordbound.\n");
6040                         should_succeed = 0;
6041                         goto matchwordbound;
6042
6043                 case wordbeg:
6044                         DEBUG_PRINT1("EXECUTING wordbeg.\n");
6045                         if (AT_STRINGS_END(d))
6046                                 goto fail;
6047                         {
6048                                 /* XEmacs: this originally read:
6049
6050                                    if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
6051                                    break;
6052
6053                                  */
6054                                 re_char *dtmp = POS_AFTER_GAP_UNSAFE(d);
6055                                 Emchar emch = charptr_emchar(dtmp);
6056 #ifdef emacs
6057                                 int charpos =
6058                                     SYNTAX_CACHE_BYTE_TO_CHAR(PTR_TO_OFFSET(d));
6059                                 UPDATE_SYNTAX_CACHE(charpos);
6060 #endif
6061                                 if (SYNTAX_FROM_CACHE
6062                                     (XCHAR_TABLE
6063                                      (regex_emacs_buffer->mirror_syntax_table),
6064                                      emch) != Sword)
6065                                         goto fail;
6066                                 if (AT_STRINGS_BEG(d))
6067                                         break;
6068                                 dtmp = POS_BEFORE_GAP_UNSAFE(d);
6069                                 DEC_CHARPTR(dtmp);
6070                                 emch = charptr_emchar(dtmp);
6071 #ifdef emacs
6072                                 UPDATE_SYNTAX_CACHE_BACKWARD(charpos - 1);
6073 #endif
6074                                 if (SYNTAX_FROM_CACHE
6075                                     (XCHAR_TABLE
6076                                      (regex_emacs_buffer->mirror_syntax_table),
6077                                      emch) != Sword)
6078                                         break;
6079                                 goto fail;
6080                         }
6081
6082                 case wordend:
6083                         DEBUG_PRINT1("EXECUTING wordend.\n");
6084                         if (AT_STRINGS_BEG(d))
6085                                 goto fail;
6086                         {
6087                                 /* XEmacs: this originally read:
6088
6089                                    if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
6090                                    && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
6091                                    break;
6092
6093                                    The or condition is incorrect (reversed).
6094                                  */
6095                                 re_char *dtmp;
6096                                 Emchar emch;
6097 #ifdef emacs
6098                                 int charpos =
6099                                     SYNTAX_CACHE_BYTE_TO_CHAR(PTR_TO_OFFSET(d))
6100                                     - 1;
6101                                 UPDATE_SYNTAX_CACHE(charpos);
6102 #endif
6103                                 dtmp = POS_BEFORE_GAP_UNSAFE(d);
6104                                 DEC_CHARPTR(dtmp);
6105                                 emch = charptr_emchar(dtmp);
6106                                 if (SYNTAX_FROM_CACHE
6107                                     (XCHAR_TABLE
6108                                      (regex_emacs_buffer->mirror_syntax_table),
6109                                      emch) != Sword)
6110                                         goto fail;
6111                                 if (AT_STRINGS_END(d))
6112                                         break;
6113                                 dtmp = POS_AFTER_GAP_UNSAFE(d);
6114                                 emch = charptr_emchar(dtmp);
6115 #ifdef emacs
6116                                 UPDATE_SYNTAX_CACHE_FORWARD(charpos + 1);
6117 #endif
6118                                 if (SYNTAX_FROM_CACHE
6119                                     (XCHAR_TABLE
6120                                      (regex_emacs_buffer->mirror_syntax_table),
6121                                      emch) != Sword)
6122                                         break;
6123                                 goto fail;
6124                         }
6125
6126 #ifdef emacs
6127                 case before_dot:
6128                         DEBUG_PRINT1("EXECUTING before_dot.\n");
6129                         if (!
6130                             (NILP(regex_match_object)
6131                              || BUFFERP(regex_match_object))
6132                             || (BUF_PTR_BYTE_POS(regex_emacs_buffer, d)
6133                                 >= BUF_PT(regex_emacs_buffer)))
6134                                 goto fail;
6135                         break;
6136
6137                 case at_dot:
6138                         DEBUG_PRINT1("EXECUTING at_dot.\n");
6139                         if (!
6140                             (NILP(regex_match_object)
6141                              || BUFFERP(regex_match_object))
6142                             || (BUF_PTR_BYTE_POS(regex_emacs_buffer, d)
6143                                 != BUF_PT(regex_emacs_buffer)))
6144                                 goto fail;
6145                         break;
6146
6147                 case after_dot:
6148                         DEBUG_PRINT1("EXECUTING after_dot.\n");
6149                         if (!
6150                             (NILP(regex_match_object)
6151                              || BUFFERP(regex_match_object))
6152                             || (BUF_PTR_BYTE_POS(regex_emacs_buffer, d)
6153                                 <= BUF_PT(regex_emacs_buffer)))
6154                                 goto fail;
6155                         break;
6156 #if 0                           /* not emacs19 */
6157                 case at_dot:
6158                         DEBUG_PRINT1("EXECUTING at_dot.\n");
6159                         if (BUF_PTR_BYTE_POS
6160                             (regex_emacs_buffer,
6161                              (unsigned char *)d) + 1 !=
6162                             BUF_PT(regex_emacs_buffer))
6163                                 goto fail;
6164                         break;
6165 #endif                          /* not emacs19 */
6166
6167                 case syntaxspec:
6168                         DEBUG_PRINT2("EXECUTING syntaxspec %d.\n", mcnt);
6169                         mcnt = *p++;
6170                         goto matchsyntax;
6171
6172                 case wordchar:
6173                         DEBUG_PRINT1("EXECUTING Emacs wordchar.\n");
6174                         mcnt = (int)Sword;
6175                       matchsyntax:
6176                         should_succeed = 1;
6177                       matchornotsyntax:
6178                         {
6179                                 int matches;
6180                                 Emchar emch;
6181
6182                                 REGEX_PREFETCH();
6183 #ifdef emacs
6184                                 {
6185                                         int charpos =
6186                                             SYNTAX_CACHE_BYTE_TO_CHAR
6187                                             (PTR_TO_OFFSET(d));
6188                                         UPDATE_SYNTAX_CACHE(charpos);
6189                                 }
6190 #endif
6191
6192                                 emch = charptr_emchar((const Bufbyte *)d);
6193                                 matches =
6194                                     (SYNTAX_FROM_CACHE
6195                                      (XCHAR_TABLE
6196                                       (regex_emacs_buffer->mirror_syntax_table),
6197                                       emch) == (enum syntaxcode)mcnt);
6198                                 INC_CHARPTR(d);
6199                                 if (matches != should_succeed)
6200                                         goto fail;
6201                                 SET_REGS_MATCHED();
6202                         }
6203                         break;
6204
6205                 case notsyntaxspec:
6206                         DEBUG_PRINT2("EXECUTING notsyntaxspec %d.\n", mcnt);
6207                         mcnt = *p++;
6208                         goto matchnotsyntax;
6209
6210                 case notwordchar:
6211                         DEBUG_PRINT1("EXECUTING Emacs notwordchar.\n");
6212                         mcnt = (int)Sword;
6213                       matchnotsyntax:
6214                         should_succeed = 0;
6215                         goto matchornotsyntax;
6216
6217 #ifdef MULE
6218 /* 97/2/17 jhod Mule category code patch */
6219                 case categoryspec:
6220                         should_succeed = 1;
6221                       matchornotcategory:
6222                         {
6223                                 Emchar emch;
6224
6225                                 mcnt = *p++;
6226                                 REGEX_PREFETCH();
6227                                 emch = charptr_emchar((const Bufbyte *)d);
6228                                 INC_CHARPTR(d);
6229                                 if (check_category_char
6230                                     (emch, regex_emacs_buffer->category_table,
6231                                      mcnt, should_succeed))
6232                                         goto fail;
6233                                 SET_REGS_MATCHED();
6234                         }
6235                         break;
6236
6237                 case notcategoryspec:
6238                         should_succeed = 0;
6239                         goto matchornotcategory;
6240 /* end of category patch */
6241 #endif                          /* MULE */
6242 #else                           /* not emacs */
6243                 case wordchar:
6244                         DEBUG_PRINT1("EXECUTING non-Emacs wordchar.\n");
6245                         REGEX_PREFETCH();
6246                         if (!WORDCHAR_P_UNSAFE((int)(*d)))
6247                                 goto fail;
6248                         SET_REGS_MATCHED();
6249                         d++;
6250                         break;
6251
6252                 case notwordchar:
6253                         DEBUG_PRINT1("EXECUTING non-Emacs notwordchar.\n");
6254                         REGEX_PREFETCH();
6255                         if (!WORDCHAR_P_UNSAFE((int)(*d)))
6256                                 goto fail;
6257                         SET_REGS_MATCHED();
6258                         d++;
6259                         break;
6260 #endif                          /* emacs */
6261
6262                 default:
6263                         abort();
6264                 }
6265                 /* Successfully executed one pattern command; keep going.  */
6266                 continue;
6267
6268                 /* We goto here if a matching operation fails. */
6269         fail:
6270                 if (!FAIL_STACK_EMPTY()) {
6271                         /* A restart point is known.  Restore to that state.  */
6272                         DEBUG_PRINT1("\nFAIL:\n");
6273                         
6274                         const unsigned char* cpat = p;
6275                         
6276                         POP_FAILURE_POINT(d, cpat, lowest_active_reg,
6277                                           highest_active_reg, regstart, regend,
6278                                           reg_info);
6279 #pragma GCC diagnostic push
6280 #pragma GCC diagnostic ignored "-Wcast-qual"
6281                         p = (unsigned char*)cpat;
6282 #pragma GCC diagnostic pop
6283
6284                         /* If this failure point is a dummy, try the next one.  */
6285                         if (!p)
6286                                 goto fail;
6287
6288                         /* If we failed to the end of the pattern, don't examine
6289                            *p.  */
6290                         assert(p <= pend);
6291                         if (p < pend) {
6292                                 re_bool is_a_jump_n = false;
6293
6294                                 /* If failed to a backwards jump that's part of
6295                                    a repetition loop, need to pop this failure
6296                                    point and use the next one.  */
6297                                 switch ((unsigned int)(re_opcode_t)*p) {
6298                                 case jump_n:
6299                                         is_a_jump_n = true;
6300                                 case maybe_pop_jump:
6301                                 case pop_failure_jump:
6302                                 case jump:
6303                                         p1 = p + 1;
6304                                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6305                                         p1 += mcnt;
6306
6307                                         if ((is_a_jump_n
6308                                              && (re_opcode_t) * p1 == succeed_n)
6309                                             || (!is_a_jump_n
6310                                                 && (re_opcode_t) * p1 ==
6311                                                 on_failure_jump))
6312                                                 goto fail;
6313                                         break;
6314                                 default:
6315                                         /* do nothing */ ;
6316                                 }
6317                         }
6318
6319                         if (d >= string1 && d <= end1)
6320                                 dend = end_match_1;
6321                 } else
6322                         break;  /* Matching at this starting point really fails.  */
6323         }                       /* for (;;) */
6324
6325         if (best_regs_set)
6326                 goto restore_best_regs;
6327
6328         FREE_VARIABLES();
6329
6330         return -1;              /* Failure to match.  */
6331 }                               /* re_match_2 */
6332
6333 \f
6334 /* Subroutine definitions for re_match_2.  */
6335
6336 /* We are passed P pointing to a register number after a start_memory.
6337
6338    Return true if the pattern up to the corresponding stop_memory can
6339    match the empty string, and false otherwise.
6340
6341    If we find the matching stop_memory, sets P to point to one past its number.
6342    Otherwise, sets P to an undefined byte less than or equal to END.
6343
6344    We don't handle duplicates properly (yet).  */
6345
6346 static re_bool
6347 group_match_null_string_p(unsigned char **p, unsigned char *end,
6348                           register_info_type * register_info)
6349 {
6350         int mcnt;
6351         /* Point to after the args to the start_memory.  */
6352         unsigned char *p1 = *p + 2;
6353
6354         while (p1 < end) {
6355                 /* Skip over opcodes that can match nothing, and return true or
6356                    false, as appropriate, when we get to one that can't, or to the
6357                    matching stop_memory.  */
6358
6359                 switch ((unsigned int)(re_opcode_t)*p1) {
6360                         /* Could be either a loop or a series of alternatives.  */
6361                 case on_failure_jump:
6362                         p1++;
6363                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6364
6365                         /* If the next operation is not a jump backwards in the
6366                            pattern.  */
6367
6368                         if (mcnt >= 0) {
6369                                 /* Go through the on_failure_jumps of the alternatives,
6370                                    seeing if any of the alternatives cannot match nothing.
6371                                    The last alternative starts with only a jump,
6372                                    whereas the rest start with on_failure_jump and end
6373                                    with a jump, e.g., here is the pattern for `a|b|c':
6374
6375                                    /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
6376                                    /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
6377                                    /exactn/1/c
6378
6379                                    So, we have to first go through the first (n-1)
6380                                    alternatives and then deal with the last one separately.  */
6381
6382                                 /* Deal with the first (n-1) alternatives, which start
6383                                    with an on_failure_jump (see above) that jumps to right
6384                                    past a jump_past_alt.  */
6385
6386                                 while ((re_opcode_t) p1[mcnt - 3] ==
6387                                        jump_past_alt) {
6388                                         /* `mcnt' holds how many bytes long the alternative
6389                                            is, including the ending `jump_past_alt' and
6390                                            its number.  */
6391
6392                                         if (!alt_match_null_string_p
6393                                             (p1, p1 + mcnt - 3, register_info))
6394                                                 return false;
6395
6396                                         /* Move to right after this alternative, including the
6397                                            jump_past_alt.  */
6398                                         p1 += mcnt;
6399
6400                                         /* Break if it's the beginning of an n-th alternative
6401                                            that doesn't begin with an on_failure_jump.  */
6402                                         if ((re_opcode_t) * p1 !=
6403                                             on_failure_jump)
6404                                                 break;
6405
6406                                         /* Still have to check that it's not an n-th
6407                                            alternative that starts with an on_failure_jump.  */
6408                                         p1++;
6409                                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6410                                         if ((re_opcode_t) p1[mcnt - 3] !=
6411                                             jump_past_alt) {
6412                                                 /* Get to the beginning of the n-th alternative.  */
6413                                                 p1 -= 3;
6414                                                 break;
6415                                         }
6416                                 }
6417
6418                                 /* Deal with the last alternative: go back and get number
6419                                    of the `jump_past_alt' just before it.  `mcnt' contains
6420                                    the length of the alternative.  */
6421                                 EXTRACT_NUMBER(mcnt, p1 - 2);
6422
6423                                 if (!alt_match_null_string_p
6424                                     (p1, p1 + mcnt, register_info))
6425                                         return false;
6426
6427                                 p1 += mcnt;     /* Get past the n-th alternative.  */
6428                         }       /* if mcnt > 0 */
6429                         break;
6430
6431                 case stop_memory:
6432                         assert(p1[1] == **p);
6433                         *p = p1 + 2;
6434                         return true;
6435
6436                 default:
6437                         if (!common_op_match_null_string_p(&p1, end, register_info))
6438                                 return false;
6439                 }
6440         }                       /* while p1 < end */
6441
6442         return false;
6443 }                               /* group_match_null_string_p */
6444
6445 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
6446    It expects P to be the first byte of a single alternative and END one
6447    byte past the last. The alternative can contain groups.  */
6448
6449 static re_bool
6450 alt_match_null_string_p(unsigned char *p, unsigned char *end,
6451                         register_info_type * register_info)
6452 {
6453         int mcnt;
6454         unsigned char *p1 = p;
6455
6456         while (p1 < end) {
6457                 /* Skip over opcodes that can match nothing, and break when we get
6458                    to one that can't.  */
6459
6460                 switch ((unsigned int)(re_opcode_t)*p1) {
6461                         /* It's a loop.  */
6462                 case on_failure_jump:
6463                         p1++;
6464                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6465                         p1 += mcnt;
6466                         break;
6467
6468                 default:
6469                         if (!common_op_match_null_string_p(&p1, end, register_info))
6470                                 return false;
6471                 }
6472         }                       /* while p1 < end */
6473
6474         return true;
6475 }                               /* alt_match_null_string_p */
6476
6477 /* Deals with the ops common to group_match_null_string_p and
6478    alt_match_null_string_p.
6479
6480    Sets P to one after the op and its arguments, if any.  */
6481
6482 static re_bool
6483 common_op_match_null_string_p(unsigned char **p, unsigned char *end,
6484                               register_info_type * register_info)
6485 {
6486         int mcnt;
6487         re_bool ret;
6488         int reg_no;
6489         unsigned char *p1 = *p;
6490
6491         switch ((unsigned int)(re_opcode_t)*p1++) {
6492         case no_op:
6493         case begline:
6494         case endline:
6495         case begbuf:
6496         case endbuf:
6497         case wordbeg:
6498         case wordend:
6499         case wordbound:
6500         case notwordbound:
6501 #ifdef emacs
6502         case before_dot:
6503         case at_dot:
6504         case after_dot:
6505 #endif
6506                 break;
6507
6508         case start_memory:
6509                 reg_no = *p1;
6510                 assert(reg_no > 0 && reg_no <= MAX_REGNUM);
6511                 ret = group_match_null_string_p(&p1, end, register_info);
6512
6513                 /* Have to set this here in case we're checking a group which
6514                    contains a group and a back reference to it.  */
6515
6516                 if (REG_MATCH_NULL_STRING_P(register_info[reg_no]) ==
6517                     MATCH_NULL_UNSET_VALUE)
6518                         REG_MATCH_NULL_STRING_P(register_info[reg_no]) = ret;
6519
6520                 if (!ret)
6521                         return false;
6522                 break;
6523
6524                 /* If this is an optimized succeed_n for zero times, make the jump.  */
6525         case jump:
6526                 EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6527                 if (mcnt >= 0)
6528                         p1 += mcnt;
6529                 else
6530                         return false;
6531                 break;
6532
6533         case succeed_n:
6534                 /* Get to the number of times to succeed.  */
6535                 p1 += 2;
6536                 EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6537
6538                 if (mcnt == 0) {
6539                         p1 -= 4;
6540                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6541                         p1 += mcnt;
6542                 } else
6543                         return false;
6544                 break;
6545
6546         case duplicate:
6547                 if (!REG_MATCH_NULL_STRING_P(register_info[*p1]))
6548                         return false;
6549                 break;
6550
6551         case set_number_at:
6552                 p1 += 4;
6553                 break;
6554
6555         default:
6556                 /* All other opcodes mean we cannot match the empty string.  */
6557                 return false;
6558         }
6559
6560         *p = p1;
6561         return true;
6562 }                               /* common_op_match_null_string_p */
6563
6564 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
6565    bytes; nonzero otherwise.  */
6566
6567 static int
6568 bcmp_translate(re_char *s1, re_char *s2,
6569                REGISTER int len, RE_TRANSLATE_TYPE translate)
6570 {
6571         REGISTER const unsigned char *p1 = s1, *p2 = s2;
6572 #ifdef MULE
6573         const unsigned char *p1_end = s1 + len;
6574         const unsigned char *p2_end = s2 + len;
6575
6576         while (p1 != p1_end && p2 != p2_end) {
6577                 Emchar p1_ch, p2_ch;
6578
6579                 p1_ch = charptr_emchar(p1);
6580                 p2_ch = charptr_emchar(p2);
6581
6582                 if (RE_TRANSLATE(p1_ch)
6583                     != RE_TRANSLATE(p2_ch))
6584                         return 1;
6585                 INC_CHARPTR(p1);
6586                 INC_CHARPTR(p2);
6587         }
6588 #else                           /* not MULE */
6589         while (len) {
6590                 if (RE_TRANSLATE(*p1++) != RE_TRANSLATE(*p2++))
6591                         return 1;
6592                 len--;
6593         }
6594 #endif                          /* MULE */
6595         return 0;
6596 }
6597 \f
6598 /* Entry points for GNU code.  */
6599
6600 /* re_compile_pattern is the GNU regular expression compiler: it
6601    compiles PATTERN (of length SIZE) and puts the result in BUFP.
6602    Returns 0 if the pattern was valid, otherwise an error string.
6603
6604    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
6605    are set in BUFP on entry.
6606
6607    We call regex_compile to do the actual compilation.  */
6608
6609 const char *re_compile_pattern(const char *pattern, int length,
6610                                struct re_pattern_buffer *bufp)
6611 {
6612         reg_errcode_t ret;
6613
6614         /* GNU code is written to assume at least RE_NREGS registers will be set
6615            (and at least one extra will be -1).  */
6616         bufp->regs_allocated = REGS_UNALLOCATED;
6617
6618         /* And GNU code determines whether or not to get register information
6619            by passing null for the REGS argument to re_match, etc., not by
6620            setting no_sub.  */
6621         bufp->no_sub = 0;
6622
6623         /* Match anchors at newline.  */
6624         bufp->newline_anchor = 1;
6625
6626         ret = regex_compile((const unsigned char*)pattern,
6627                             length, re_syntax_options, bufp);
6628
6629         if (!ret)
6630                 return NULL;
6631         return gettext(re_error_msgid[(int)ret]);
6632 }
6633 \f
6634 /* Entry points compatible with 4.2 BSD regex library.  We don't define
6635    them unless specifically requested.  */
6636
6637 #ifdef _REGEX_RE_COMP
6638
6639 /* BSD has one and only one pattern buffer.  */
6640 static struct re_pattern_buffer re_comp_buf;
6641
6642 char *re_comp(const char *s)
6643 {
6644         reg_errcode_t ret;
6645
6646         if (!s) {
6647                 if (!re_comp_buf.buffer)
6648                         return gettext("No previous regular expression");
6649                 return 0;
6650         }
6651
6652         if (!re_comp_buf.buffer) {
6653                 re_comp_buf.buffer = (unsigned char *)xmalloc_atomic(200);
6654                 if (re_comp_buf.buffer == NULL)
6655                         return gettext(re_error_msgid[(int)REG_ESPACE]);
6656                 re_comp_buf.allocated = 200;
6657
6658                 re_comp_buf.fastmap = (char *)xmalloc_atomic(1 << BYTEWIDTH);
6659                 if (re_comp_buf.fastmap == NULL)
6660                         return gettext(re_error_msgid[(int)REG_ESPACE]);
6661         }
6662
6663         /* Since `re_exec' always passes NULL for the `regs' argument, we
6664            don't need to initialize the pattern buffer fields which affect it.  */
6665
6666         /* Match anchors at newlines.  */
6667         re_comp_buf.newline_anchor = 1;
6668
6669         ret =
6670             regex_compile((unsigned char *)s, strlen(s), re_syntax_options,
6671                           &re_comp_buf);
6672
6673         if (!ret)
6674                 return NULL;
6675
6676         /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
6677         return (char *)gettext(re_error_msgid[(int)ret]);
6678 }
6679
6680 int re_exec(const char *s)
6681 {
6682         const int len = strlen(s);
6683         return
6684             0 <= re_search(&re_comp_buf, s, len, 0, len,
6685                            (struct re_registers *)0);
6686 }
6687 #endif                          /* _REGEX_RE_COMP */
6688 \f
6689 /* POSIX.2 functions.  Don't define these for Emacs.  */
6690
6691 #ifndef emacs
6692
6693 /* regcomp takes a regular expression as a string and compiles it.
6694
6695    PREG is a regex_t *.  We do not expect any fields to be initialized,
6696    since POSIX says we shouldn't.  Thus, we set
6697
6698      `buffer' to the compiled pattern;
6699      `used' to the length of the compiled pattern;
6700      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
6701        REG_EXTENDED bit in CFLAGS is set; otherwise, to
6702        RE_SYNTAX_POSIX_BASIC;
6703      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
6704      `fastmap' and `fastmap_accurate' to zero;
6705      `re_nsub' to the number of subexpressions in PATTERN.
6706      (non-shy of course.  POSIX probably doesn't know about
6707      shy ones, and in any case they should be invisible.)
6708
6709    PATTERN is the address of the pattern string.
6710
6711    CFLAGS is a series of bits which affect compilation.
6712
6713      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
6714      use POSIX basic syntax.
6715
6716      If REG_NEWLINE is set, then . and [^...] don't match newline.
6717      Also, regexec will try a match beginning after every newline.
6718
6719      If REG_ICASE is set, then we considers upper- and lowercase
6720      versions of letters to be equivalent when matching.
6721
6722      If REG_NOSUB is set, then when PREG is passed to regexec, that
6723      routine will report only success or failure, and nothing about the
6724      registers.
6725
6726    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
6727    the return codes and their meanings.)  */
6728
6729 int regcomp(regex_t * preg, const char *pattern, int cflags)
6730 {
6731         reg_errcode_t ret;
6732         unsigned syntax
6733             = (cflags & REG_EXTENDED) ?
6734             RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
6735
6736         /* regex_compile will allocate the space for the compiled pattern.  */
6737         preg->buffer = 0;
6738         preg->allocated = 0;
6739         preg->used = 0;
6740
6741         /* Don't bother to use a fastmap when searching.  This simplifies the
6742            REG_NEWLINE case: if we used a fastmap, we'd have to put all the
6743            characters after newlines into the fastmap.  This way, we just try
6744            every character.  */
6745         preg->fastmap = 0;
6746
6747         if (cflags & REG_ICASE) {
6748                 int i;
6749
6750                 preg->translate = (char *)xmalloc_atomic(CHAR_SET_SIZE);
6751                 if (preg->translate == NULL)
6752                         return (int)REG_ESPACE;
6753
6754                 /* Map uppercase characters to corresponding lowercase ones.  */
6755                 for (i = 0; i < CHAR_SET_SIZE; i++)
6756                         preg->translate[i] = ISUPPER(i) ? tolower(i) : i;
6757         } else
6758                 preg->translate = NULL;
6759
6760         /* If REG_NEWLINE is set, newlines are treated differently.  */
6761         if (cflags & REG_NEWLINE) {
6762                 /* REG_NEWLINE implies neither . nor [^...] match newline.  */
6763                 syntax &= ~RE_DOT_NEWLINE;
6764                 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
6765                 /* It also changes the matching behavior.  */
6766                 preg->newline_anchor = 1;
6767         } else {
6768                 preg->newline_anchor = 0;
6769         }
6770         preg->no_sub = !!(cflags & REG_NOSUB);
6771
6772         /* POSIX says a null character in the pattern terminates it, so we
6773            can use strlen here in compiling the pattern.  */
6774         ret = regex_compile((const unsigned char*)pattern,
6775                             strlen(pattern), syntax, preg);
6776
6777         /* POSIX doesn't distinguish between an unmatched open-group and an
6778            unmatched close-group: both are REG_EPAREN.  */
6779         if (ret == REG_ERPAREN)
6780                 ret = REG_EPAREN;
6781
6782         return (int)ret;
6783 }
6784
6785 /* regexec searches for a given pattern, specified by PREG, in the
6786    string STRING.
6787
6788    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
6789    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
6790    least NMATCH elements, and we set them to the offsets of the
6791    corresponding matched substrings.
6792
6793    EFLAGS specifies `execution flags' which affect matching: if
6794    REG_NOTBOL is set, then ^ does not match at the beginning of the
6795    string; if REG_NOTEOL is set, then $ does not match at the end.
6796
6797    We return 0 if we find a match and REG_NOMATCH if not.  */
6798
6799 int
6800 regexec(const regex_t * preg, const char *string, Element_count nmatch,
6801         regmatch_t pmatch[], int eflags)
6802 {
6803         int ret;
6804         struct re_registers regs;
6805         regex_t private_preg;
6806         int len = strlen(string);
6807         re_bool want_reg_info = !preg->no_sub && nmatch > 0;
6808
6809         private_preg = *preg;
6810
6811         private_preg.not_bol = !!(eflags & REG_NOTBOL);
6812         private_preg.not_eol = !!(eflags & REG_NOTEOL);
6813
6814         /* The user has told us exactly how many registers to return
6815            information about, via `nmatch'.  We have to pass that on to the
6816            matching routines.  */
6817         private_preg.regs_allocated = REGS_FIXED;
6818
6819         if (want_reg_info) {
6820                 regs.num_regs = nmatch;
6821                 regs.start = TALLOC(nmatch, regoff_t);
6822                 regs.end = TALLOC(nmatch, regoff_t);
6823                 if (regs.start == NULL || regs.end == NULL)
6824                         return (int)REG_NOMATCH;
6825         }
6826
6827         /* Perform the searching operation.  */
6828         ret = re_search(&private_preg, string, len,
6829                         /* start: */ 0, /* range: */ len,
6830                         want_reg_info ? &regs : (struct re_registers *)0);
6831
6832         /* Copy the register information to the POSIX structure.  */
6833         if (want_reg_info) {
6834                 if (ret >= 0) {
6835                         Element_count r;
6836
6837                         for (r = 0; r < nmatch; r++) {
6838                                 pmatch[r].rm_so = regs.start[r];
6839                                 pmatch[r].rm_eo = regs.end[r];
6840                         }
6841                 }
6842
6843                 /* If we needed the temporary register info, free the space now.  */
6844                 xfree(regs.start);
6845                 xfree(regs.end);
6846         }
6847
6848         /* We want zero return to mean success, unlike `re_search'.  */
6849         return ret >= 0 ? (int)REG_NOERROR : (int)REG_NOMATCH;
6850 }
6851
6852 /* Returns a message corresponding to an error code, ERRCODE, returned
6853    from either regcomp or regexec.   We don't use PREG here.  */
6854
6855 Memory_count
6856 regerror(int errcode, const regex_t * preg, char *errbuf,
6857          Memory_count errbuf_size)
6858 {
6859         const char *msg;
6860         Memory_count msg_size;
6861
6862         if (errcode < 0 || (size_t) errcode >= (sizeof(re_error_msgid)
6863                                                 / sizeof(re_error_msgid[0])))
6864                 /* Only error codes returned by the rest of the code should be passed
6865                    to this routine.  If we are given anything else, or if other regex
6866                    code generates an invalid error code, then the program has a bug.
6867                    Dump core so we can fix it.  */
6868                 abort();
6869
6870         msg = gettext(re_error_msgid[errcode]);
6871
6872         msg_size = strlen(msg) + 1;     /* Includes the null.  */
6873
6874         if (errbuf_size != 0) {
6875                 strncpy(errbuf, msg, errbuf_size - 1);
6876                 errbuf[errbuf_size-1]='\0';
6877         }
6878
6879         return msg_size;
6880 }
6881
6882 /* Free dynamically allocated space used by PREG.  */
6883
6884 void regfree(regex_t * preg)
6885 {
6886         if (preg->buffer != NULL) {
6887                 xfree(preg->buffer);
6888         }
6889         preg->buffer = NULL;
6890
6891         preg->allocated = 0;
6892         preg->used = 0;
6893
6894         if (preg->fastmap != NULL) {
6895                 xfree(preg->fastmap);
6896         }
6897         preg->fastmap = NULL;
6898         preg->fastmap_accurate = 0;
6899
6900         if (preg->translate != NULL) {
6901                 xfree(preg->translate);
6902         }
6903         preg->translate = NULL;
6904 }
6905
6906 #endif                          /* not emacs  */
6907 \f
6908 /*
6909 Local variables:
6910 make-backup-files: t
6911 version-control: t
6912 trim-versions-without-asking: nil
6913 End:
6914 */