Initial git import
[sxemacs] / src / regex.c
1 /* Extended regular expression matching and search library,
2    version 0.12, extended for XEmacs.
3    (Implements POSIX draft P10003.2/D11.2, except for
4    internationalization features.)
5
6    Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
7    Copyright (C) 1995 Sun Microsystems, Inc.
8    Copyright (C) 1995 Ben Wing.
9
10 This file is part of SXEmacs
11
12 SXEmacs is free software: you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation, either version 3 of the License, or
15 (at your option) any later version.
16
17 SXEmacs is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program.  If not, see <http://www.gnu.org/licenses/>. */
24
25
26 /* Synched up with: FSF 19.29. */
27
28 /* Changes made for XEmacs:
29
30    (1) the REGEX_BEGLINE_CHECK code from the XEmacs v18 regex routines
31        was added.  This causes a huge speedup in font-locking.
32    (2) Rel-alloc is disabled when the MMAP version of rel-alloc is
33        being used, because it's too slow -- all those calls to mmap()
34        add humongous overhead.
35    (3) Lots and lots of changes for Mule.  They are bracketed by
36        `#ifdef MULE' or with comments that have `XEmacs' in them.
37  */
38
39 #ifdef HAVE_CONFIG_H
40 #include <config.h>
41 #endif
42
43 #ifndef REGISTER                /* Rigidly enforced as of 20.3 */
44 #define REGISTER
45 #endif
46
47 #ifndef _GNU_SOURCE
48 #define _GNU_SOURCE 1
49 #endif
50
51 #ifdef emacs
52 /* Converts the pointer to the char to BEG-based offset from the start.  */
53 #define PTR_TO_OFFSET(d) (MATCHING_IN_FIRST_STRING                      \
54                           ? (d) - string1 : (d) - (string2 - size1))
55 #else
56 #define PTR_TO_OFFSET(d) 0
57 #endif
58
59 /* We assume non-Mule if emacs isn't defined. */
60 #ifndef emacs
61 #undef MULE
62 #endif
63
64 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
65 #include <sys/types.h>
66
67 /* This is for other GNU distributions with internationalized messages.  */
68 #if defined (I18N3) && (defined (HAVE_LIBINTL_H) || defined (_LIBC))
69 # include <libintl.h>
70 #else
71 # define gettext(msgid) (msgid)
72 #endif
73
74 /* XEmacs: define this to add in a speedup for patterns anchored at
75    the beginning of a line.  Keep the ifdefs so that it's easier to
76    tell where/why this code has diverged from v19. */
77 #define REGEX_BEGLINE_CHECK
78
79 /* XEmacs: the current mmap-based ralloc handles small blocks very
80    poorly, so we disable it here. */
81
82 #if (defined (REL_ALLOC) && defined (HAVE_MMAP)) || defined(DOUG_LEA_MALLOC)
83 # undef REL_ALLOC
84 #endif
85
86 /* The `emacs' switch turns on certain matching commands
87    that make sense only in Emacs. */
88 #ifdef emacs
89
90 #include "lisp.h"
91 #include "buffer.h"
92 #include "syntax.h"
93
94 #ifdef MULE
95
96 Lisp_Object Vthe_lisp_rangetab;
97
98 void complex_vars_of_regex(void)
99 {
100         Vthe_lisp_rangetab = Fmake_range_table();
101         staticpro(&Vthe_lisp_rangetab);
102 }
103
104 #else  /* not MULE */
105
106 void complex_vars_of_regex(void)
107 {
108 }
109
110 #endif  /* MULE */
111
112 #define RE_TRANSLATE(ch) TRT_TABLE_OF (translate, (Emchar) ch)
113 #define TRANSLATE_P(tr) (!NILP (tr) && CHAR_TABLEP(tr))
114
115 /* just massage that xfree thing */
116 #undef xfree
117 #if defined HAVE_BDWGC && defined EF_USE_BDWGC
118 # define xfree          zfree
119 #else  /* !BDWGC */
120 # define xfree          yfree
121 #endif  /* BDWGC */
122
123 #else  /* !emacs */
124
125 /* If we are not linking with Emacs proper,
126    we can't use the relocating allocator
127    even if config.h says that we can.  */
128 #undef REL_ALLOC
129
130 #if defined (STDC_HEADERS) || defined (_LIBC)
131 # include <stdlib.h>
132 # include <stdbool.h>
133 #else
134 char *malloc();
135 char *realloc();
136 #endif
137
138 /* Types normally included via lisp.h */
139 #include <stddef.h>             /* for ptrdiff_t */
140
141 #ifdef REGEX_MALLOC
142 #ifndef DECLARE_NOTHING
143 #define DECLARE_NOTHING struct nosuchstruct
144 #endif
145 #endif
146
147 typedef int Emchar;
148
149 #define charptr_emchar(str)             ((Emchar) (str)[0])
150
151 #define INC_CHARPTR(p) ((p)++)
152 #define DEC_CHARPTR(p) ((p)--)
153
154 #include <string.h>
155
156 /* Define the syntax stuff for \<, \>, etc.  */
157
158 /* This must be nonzero for the wordchar and notwordchar pattern
159    commands in re_match_2.  */
160 #ifndef Sword
161 #define Sword 1
162 #endif
163
164 #ifdef SYNTAX_TABLE
165
166 extern char *re_syntax_table;
167
168 #else                           /* not SYNTAX_TABLE */
169
170 /* How many characters in the character set.  */
171 #define CHAR_SET_SIZE 256
172
173 static char re_syntax_table[CHAR_SET_SIZE];
174
175 static void init_syntax_once(void)
176 {
177         static int done = 0;
178
179         if (!done) {
180                 const char *word_syntax_chars =
181                     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_";
182
183                 memset(re_syntax_table, 0, sizeof(re_syntax_table));
184
185                 while (*word_syntax_chars)
186                         re_syntax_table[(unsigned int)(*word_syntax_chars++)] =
187                             Sword;
188
189                 done = 1;
190         }
191 }
192
193 #endif                          /* SYNTAX_TABLE */
194
195 #define SYNTAX_UNSAFE(ignored, c) re_syntax_table[c]
196 #undef SYNTAX_FROM_CACHE
197 #define SYNTAX_FROM_CACHE SYNTAX_UNSAFE
198
199 #define RE_TRANSLATE(c) translate[(unsigned char) (c)]
200 #define TRANSLATE_P(tr) tr
201
202 #if defined HAVE_BDWGC && defined EF_USE_BDWGC
203 # if defined HAVE_GC_GC_H
204 #  include "gc/gc.h"
205 # elif defined HAVE_GC_H
206 #  include "gc.h"
207 # endif
208 # define xmalloc        GC_MALLOC
209 # define xmalloc_atomic GC_MALLOC_ATOMIC
210 # define xrealloc       GC_REALLOC
211 # define xfree          GC_FREE
212 # define xstrdup        GC_STRDUP
213 #else  /* !HAVE_BDWGC */
214 # define xmalloc        malloc
215 # define xmalloc_atomic malloc
216 # define xrealloc       realloc
217 # define xfree          free
218 # define xstrdup        strdup
219 #endif  /* HAVE_BDWGC */
220
221 #endif  /* emacs */
222
223
224 /* Under XEmacs, this is needed because we don't define it elsewhere. */
225 #ifdef SWITCH_ENUM_BUG
226 #define SWITCH_ENUM_CAST(x) ((int)(x))
227 #else
228 #define SWITCH_ENUM_CAST(x) (x)
229 #endif
230 \f
231 /* Get the interface, including the syntax bits.  */
232 #include "regex.h"
233
234 /* isalpha etc. are used for the character classes.  */
235 #include <ctype.h>
236
237 /* Jim Meyering writes:
238
239    "... Some ctype macros are valid only for character codes that
240    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
241    using /bin/cc or gcc but without giving an ansi option).  So, all
242    ctype uses should be through macros like ISPRINT...  If
243    STDC_HEADERS is defined, then autoconf has verified that the ctype
244    macros don't need to be guarded with references to isascii. ...
245    Defining isascii to 1 should let any compiler worth its salt
246    eliminate the && through constant folding."  */
247
248 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
249 #define ISASCII_1(c) 1
250 #else
251 #define ISASCII_1(c) isascii(c)
252 #endif
253
254 #ifdef MULE
255 /* The IS*() macros can be passed any character, including an extended
256    one.  We need to make sure there are no crashes, which would occur
257    otherwise due to out-of-bounds array references. */
258 #define ISASCII(c) (((EMACS_UINT) (c)) < 0x100 && ISASCII_1 (c))
259 #else
260 #define ISASCII(c) ISASCII_1 (c)
261 #endif                          /* MULE */
262
263 #ifdef isblank
264 #define ISBLANK(c) (ISASCII (c) && isblank (c))
265 #else
266 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
267 #endif
268 #ifdef isgraph
269 #define ISGRAPH(c) (ISASCII (c) && isgraph (c))
270 #else
271 #define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
272 #endif
273
274 /* stolen from FSF/src/charset.h */
275 #define SINGLE_BYTE_CHAR_P(c)   (((unsigned int)(c) & 0xFF) == (c))
276 /* 1 if C is an ASCII character.  */
277 #define IS_REAL_ASCII(c) ((c) < 128)
278 /* 1 if C is a unibyte character.  */
279 #define ISUNIBYTE(c)    (SINGLE_BYTE_CHAR_P(c))
280
281 #define ISPRINT(c) (ISASCII (c) && isprint (c))
282 #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
283 #define ISALNUM(c) (ISASCII (c) && isalnum (c))
284 #define ISALPHA(c) (ISASCII (c) && isalpha (c))
285 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
286 #define ISLOWER(c) (ISASCII (c) && islower (c))
287 #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
288 #define ISSPACE(c) (ISASCII (c) && isspace (c))
289 #define ISUPPER(c) (ISASCII (c) && isupper (c))
290 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
291
292 #if defined emacs
293 static inline bool
294 ISWORD(int c)
295         __attribute__((always_inline));
296 static inline bool
297 ISWORD(int c)
298 {
299         Lisp_Object ct = regex_emacs_buffer->mirror_syntax_table;
300
301         return (SYNTAX(XCHAR_TABLE(ct), c) == Sword);
302 }
303 #else
304 # define ISWORD(c)      (ISALPHA(c))
305 #endif  /* emacs */
306
307 #ifndef NULL
308 #define NULL (void *)0
309 #endif
310
311 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
312    since ours (we hope) works properly with all combinations of
313    machines, compilers, `char' and `unsigned char' argument types.
314    (Per Bothner suggested the basic approach.)  */
315 #undef SIGN_EXTEND_CHAR
316 #if __STDC__
317 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
318 #else                           /* not __STDC__ */
319 /* As in Harbison and Steele.  */
320 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
321 #endif
322 \f
323 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
324    use `alloca' instead of `malloc'.  This is because using malloc in
325    re_search* or re_match* could cause memory leaks when C-g is used in
326    Emacs; also, malloc is slower and causes storage fragmentation.  On
327    the other hand, malloc is more portable, and easier to debug.
328
329    Because we sometimes use alloca, some routines have to be macros,
330    not functions -- `alloca'-allocated space disappears at the end of the
331    function it is called in.  */
332
333 #ifdef REGEX_MALLOC
334
335 #define REGEX_ALLOCATE  xmalloc
336 #define REGEX_REALLOCATE(source, osize, nsize) xrealloc(source, nsize)
337 #define REGEX_FREE      xfree
338
339 #else  /* !REGEX_MALLOC */
340
341 /* Emacs already defines alloca, sometimes.  */
342 #ifndef alloca
343
344 /* Make alloca work the best possible way.  */
345 #ifdef __GNUC__
346 #define alloca __builtin_alloca
347 #else                           /* not __GNUC__ */
348 #if HAVE_ALLOCA_H
349 #include <alloca.h>
350 #else                           /* not __GNUC__ or HAVE_ALLOCA_H */
351 #ifndef _AIX                    /* Already did AIX, up at the top.  */
352 void *alloca();
353 #endif                          /* not _AIX */
354 #endif                          /* HAVE_ALLOCA_H */
355 #endif                          /* __GNUC__ */
356
357 #endif                          /* not alloca */
358
359 #define REGEX_ALLOCATE alloca
360
361 /* Assumes a `char *destination' variable.  */
362 #define REGEX_REALLOCATE(source, osize, nsize)                          \
363   (destination = (char *) alloca (nsize),                               \
364    memmove (destination, source, osize),                                \
365    destination)
366
367 /* No need to do anything to free, after alloca.  */
368 #define REGEX_FREE(arg) ((void)0)       /* Do nothing!  But inhibit gcc warning.  */
369
370 #endif  /* REGEX_MALLOC */
371
372 /* Define how to allocate the failure stack.  */
373
374 #ifdef REL_ALLOC
375 #error not again
376 #define REGEX_ALLOCATE_STACK(size)                              \
377         r_alloc ((char **) &failure_stack_ptr, (size))
378 #define REGEX_REALLOCATE_STACK(source, osize, nsize)            \
379         r_re_alloc ((char **) &failure_stack_ptr, (nsize))
380 #define REGEX_FREE_STACK(ptr)                                   \
381         r_alloc_free ((void **) &failure_stack_ptr)
382
383 #else  /* !REL_ALLOC */
384
385 #ifdef REGEX_MALLOC
386
387 #define REGEX_ALLOCATE_STACK    xmalloc
388 #define REGEX_REALLOCATE_STACK(source, osize, nsize) xrealloc(source, nsize)
389 #define REGEX_FREE_STACK        xfree
390
391 #else  /* !REGEX_MALLOC */
392
393 #define REGEX_ALLOCATE_STACK alloca
394
395 #define REGEX_REALLOCATE_STACK(source, osize, nsize)                    \
396    REGEX_REALLOCATE (source, osize, nsize)
397 /* No need to explicitly free anything.  */
398 #define REGEX_FREE_STACK(arg)
399
400 #endif  /* REGEX_MALLOC */
401 #endif  /* REL_ALLOC */
402
403 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
404    `string1' or just past its end.  This works if PTR is NULL, which is
405    a good thing.  */
406 #define FIRST_STRING_P(ptr)                                     \
407   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
408
409 /* (Re)Allocate N items of type T using malloc, or fail.  */
410 #define TALLOC(n, t)                            \
411         ((t *)xmalloc((n) * sizeof (t)))
412 #define TALLOC_ATOMIC(n, t)                     \
413         ((t *)xmalloc_atomic((n) * sizeof (t)))
414 #define RETALLOC(addr, n, t)                                    \
415         ((addr) = (t *)xrealloc(addr, (n) * sizeof (t)))
416 #define RETALLOC_IF(addr, n, t)                                 \
417         if (addr) {                                             \
418                 RETALLOC((addr), (n), t);                       \
419         } else                                                  \
420                 (addr) = TALLOC ((n), t)
421 #define REGEX_TALLOC(n, t)                              \
422         ((t *)REGEX_ALLOCATE((n) * sizeof (t)))
423
424 #define BYTEWIDTH 8             /* In bits.  */
425
426 #define STREQ(s1, s2) (strcmp (s1, s2) == 0)
427
428 #undef MAX
429 #undef MIN
430 #define MAX(a, b) ((a) > (b) ? (a) : (b))
431 #define MIN(a, b) ((a) < (b) ? (a) : (b))
432
433 /* Type of source-pattern and string chars.  */
434 typedef const unsigned char re_char;
435
436 typedef char re_bool;
437 #define false 0
438 #define true 1
439 \f
440 /* These are the command codes that appear in compiled regular
441    expressions.  Some opcodes are followed by argument bytes.  A
442    command code can specify any interpretation whatsoever for its
443    arguments.  Zero bytes may appear in the compiled regular expression.  */
444
445 typedef enum {
446         no_op = 0,
447
448         /* Succeed right away--no more backtracking.  */
449         succeed,
450
451         /* Followed by one byte giving n, then by n literal bytes.  */
452         exactn,
453
454         /* Matches any (more or less) character.  */
455         anychar,
456
457         /* Matches any one char belonging to specified set.  First
458            following byte is number of bitmap bytes.  Then come bytes
459            for a bitmap saying which chars are in.  Bits in each byte
460            are ordered low-bit-first.  A character is in the set if its
461            bit is 1.  A character too large to have a bit in the map is
462            automatically not in the set.  */
463         charset,
464
465         /* Same parameters as charset, but match any character that is
466            not one of those specified.  */
467         charset_not,
468
469         /* Start remembering the text that is matched, for storing in a
470            register.  Followed by one byte with the register number, in
471            the range 1 to the pattern buffer's re_ngroups
472            field.  Then followed by one byte with the number of groups
473            inner to this one.  (This last has to be part of the
474            start_memory only because we need it in the on_failure_jump
475            of re_match_2.)  */
476         start_memory,
477
478         /* Stop remembering the text that is matched and store it in a
479            memory register.  Followed by one byte with the register
480            number, in the range 1 to `re_ngroups' in the
481            pattern buffer, and one byte with the number of inner groups,
482            just like `start_memory'.  (We need the number of inner
483            groups here because we don't have any easy way of finding the
484            corresponding start_memory when we're at a stop_memory.)  */
485         stop_memory,
486
487         /* Match a duplicate of something remembered. Followed by one
488            byte containing the register number.  */
489         duplicate,
490
491         /* Fail unless at beginning of line.  */
492         begline,
493
494         /* Fail unless at end of line.  */
495         endline,
496
497         /* Succeeds if at beginning of buffer (if emacs) or at beginning
498            of string to be matched (if not).  */
499         begbuf,
500
501         /* Analogously, for end of buffer/string.  */
502         endbuf,
503
504         /* Followed by two byte relative address to which to jump.  */
505         jump,
506
507         /* Same as jump, but marks the end of an alternative.  */
508         jump_past_alt,
509
510         /* Followed by two-byte relative address of place to resume at
511            in case of failure.  */
512         on_failure_jump,
513
514         /* Like on_failure_jump, but pushes a placeholder instead of the
515            current string position when executed.  */
516         on_failure_keep_string_jump,
517
518         /* Throw away latest failure point and then jump to following
519            two-byte relative address.  */
520         pop_failure_jump,
521
522         /* Change to pop_failure_jump if know won't have to backtrack to
523            match; otherwise change to jump.  This is used to jump
524            back to the beginning of a repeat.  If what follows this jump
525            clearly won't match what the repeat does, such that we can be
526            sure that there is no use backtracking out of repetitions
527            already matched, then we change it to a pop_failure_jump.
528            Followed by two-byte address.  */
529         maybe_pop_jump,
530
531         /* Jump to following two-byte address, and push a dummy failure
532            point. This failure point will be thrown away if an attempt
533            is made to use it for a failure.  A `+' construct makes this
534            before the first repeat.  Also used as an intermediary kind
535            of jump when compiling an alternative.  */
536         dummy_failure_jump,
537
538         /* Push a dummy failure point and continue.  Used at the end of
539            alternatives.  */
540         push_dummy_failure,
541
542         /* Followed by two-byte relative address and two-byte number n.
543            After matching N times, jump to the address upon failure.  */
544         succeed_n,
545
546         /* Followed by two-byte relative address, and two-byte number n.
547            Jump to the address N times, then fail.  */
548         jump_n,
549
550         /* Set the following two-byte relative address to the
551            subsequent two-byte number.  The address *includes* the two
552            bytes of number.  */
553         set_number_at,
554
555         wordchar,               /* Matches any word-constituent character.  */
556         notwordchar,            /* Matches any char that is not a word-constituent.  */
557
558         wordbeg,                /* Succeeds if at word beginning.  */
559         wordend,                /* Succeeds if at word end.  */
560
561         wordbound,              /* Succeeds if at a word boundary.  */
562         notwordbound            /* Succeeds if not at a word boundary.  */
563 #ifdef emacs
564             , before_dot,       /* Succeeds if before point.  */
565         at_dot,                 /* Succeeds if at point.  */
566         after_dot,              /* Succeeds if after point.  */
567
568         /* Matches any character whose syntax is specified.  Followed by
569            a byte which contains a syntax code, e.g., Sword.  */
570         syntaxspec,
571
572         /* Matches any character whose syntax is not that specified.  */
573         notsyntaxspec
574 #endif                          /* emacs */
575 #ifdef MULE
576             /* need extra stuff to be able to properly work with XEmacs/Mule
577                characters (which may take up more than one byte) */
578             , charset_mule,     /* Matches any character belonging to specified set.
579                                    The set is stored in "unified range-table
580                                    format"; see rangetab.c.  Unlike the `charset'
581                                    opcode, this can handle arbitrary characters. */
582
583         charset_mule_not        /* Same parameters as charset_mule, but match any
584                                    character that is not one of those specified.  */
585             /* 97/2/17 jhod: The following two were merged back in from the Mule
586                2.3 code to enable some language specific processing */
587             , categoryspec,     /* Matches entries in the character category tables */
588         notcategoryspec         /* The opposite of the above */
589 #endif                          /* MULE */
590 } re_opcode_t;
591 \f
592 /* Common operations on the compiled pattern.  */
593
594 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
595
596 #define STORE_NUMBER(destination, number)                               \
597   do {                                                                  \
598     (destination)[0] = (number) & 0377;                                 \
599     (destination)[1] = (number) >> 8;                                   \
600   } while (0)
601
602 /* Same as STORE_NUMBER, except increment DESTINATION to
603    the byte after where the number is stored.  Therefore, DESTINATION
604    must be an lvalue.  */
605
606 #define STORE_NUMBER_AND_INCR(destination, number)                      \
607   do {                                                                  \
608     STORE_NUMBER (destination, number);                                 \
609     (destination) += 2;                                                 \
610   } while (0)
611
612 /* Put into DESTINATION a number stored in two contiguous bytes starting
613    at SOURCE.  */
614
615 #define EXTRACT_NUMBER(destination, source)                             \
616   do {                                                                  \
617     (destination) = *(source) & 0377;                                   \
618     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;           \
619   } while (0)
620
621 #ifdef REGEX_DEBUG_FLAG
622 static void extract_number(int *dest, re_char * source)
623 {
624         int temp = SIGN_EXTEND_CHAR(*(source + 1));
625         *dest = *source & 0377;
626         *dest += temp << 8;
627 }
628
629 #ifndef EXTRACT_MACROS          /* To debug the macros.  */
630 #undef EXTRACT_NUMBER
631 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
632 #endif                          /* not EXTRACT_MACROS */
633
634 #endif                          /* REGEX_DEBUG_FLAG */
635
636 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
637    SOURCE must be an lvalue.  */
638
639 #define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
640         do {                                                            \
641                 EXTRACT_NUMBER (destination, source);                   \
642                 (source) += 2;                                          \
643         } while (0)
644
645 #ifdef REGEX_DEBUG_FLAG
646 static void
647 extract_number_and_incr(int *destination, unsigned char **source)
648 {
649         extract_number(destination, *source);
650         *source += 2;
651 }
652
653 #ifndef EXTRACT_MACROS
654 #undef EXTRACT_NUMBER_AND_INCR
655 #define EXTRACT_NUMBER_AND_INCR(dest, src)      \
656         extract_number_and_incr (&dest, &src)
657 #endif                          /* not EXTRACT_MACROS */
658
659 #endif                          /* REGEX_DEBUG_FLAG */
660 \f
661 /* If REGEX_DEBUG_FLAG is defined, Regex prints many voluminous messages about
662    what it is doing (if the variable `debug' is nonzero).  If linked with the
663    main program in `iregex.c', you can enter patterns and strings interactively.
664    And if linked with the main program in `main.c' and the other test files, you
665    can run the already-written tests.  */
666
667 #if defined (REGEX_DEBUG_FLAG)
668
669 /* We use standard I/O for debugging.  */
670 #include <stdio.h>
671
672 #ifndef emacs
673 /* XEmacs provides its own version of assert() */
674 /* It is useful to test things that ``must'' be true when debugging.  */
675 #include <assert.h>
676 #endif
677
678 #if 0
679 /* no point in having this one, since we do not offer any accessor to this */
680 static int debug = 0;
681 #endif
682
683 #define DEBUG_STATEMENT(e)              e
684 #define DEBUG_PRINT1(x)                 printf(x)
685 #define DEBUG_PRINT2(x1, x2)            printf(x1, x2)
686 #define DEBUG_PRINT3(x1, x2, x3)        printf(x1, x2, x3)
687 #define DEBUG_PRINT4(x1, x2, x3, x4)    printf(x1, x2, x3, x4)
688 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                           \
689         print_partial_compiled_pattern(s, e)
690 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                  \
691         print_double_string (w, s1, sz1, s2, sz2)
692
693 /* Print the fastmap in human-readable form.  */
694
695 static void print_fastmap(char *fastmap)
696 {
697         unsigned was_a_range = 0;
698         unsigned i = 0;
699
700         while (i < (1 << BYTEWIDTH)) {
701                 if (fastmap[i++]) {
702                         was_a_range = 0;
703                         putchar(i - 1);
704                         while (i < (1 << BYTEWIDTH) && fastmap[i]) {
705                                 was_a_range = 1;
706                                 i++;
707                         }
708                         if (was_a_range) {
709                                 putchar('-');
710                                 putchar(i - 1);
711                         }
712                 }
713         }
714         putchar('\n');
715 }
716
717 /* Print a compiled pattern string in human-readable form, starting at
718    the START pointer into it and ending just before the pointer END.  */
719
720 /* can't help it ... the ugly mule code changes `start' upon alignment,
721  * so this can't have the const qualifier ... bugger */
722 static void
723 print_partial_compiled_pattern(unsigned char *start, re_char *end)
724 {
725         int mcnt, mcnt2;
726         unsigned char *p = start;
727         re_char *pend = end;
728
729         if (start == NULL) {
730                 puts("(null)");
731                 return;
732         }
733
734         /* Loop over pattern commands.  */
735         while (p < pend) {
736                 printf("%ld:\t", (long)(p - start));
737
738                 switch ((const re_opcode_t)*p++) {
739                 case no_op:
740                         printf("/no_op");
741                         break;
742
743                 case exactn:
744                         mcnt = *p++;
745                         printf("/exactn/%d", mcnt);
746                         do {
747                                 putchar('/');
748                                 putchar(*p++);
749                         }
750                         while (--mcnt);
751                         break;
752
753                 case start_memory:
754                         mcnt = *p++;
755                         printf("/start_memory/%d/%d", mcnt, *p++);
756                         break;
757
758                 case stop_memory:
759                         mcnt = *p++;
760                         printf("/stop_memory/%d/%d", mcnt, *p++);
761                         break;
762
763                 case duplicate:
764                         printf("/duplicate/%d", *p++);
765                         break;
766
767                 case anychar:
768                         printf("/anychar");
769                         break;
770
771                 case charset:
772                 case charset_not: {
773                         REGISTER int c, last = -100;
774                         REGISTER int in_range = 0;
775
776                         printf("/charset [%s",
777                                (re_opcode_t) * (p - 1) == charset_not
778                                ? "^" : "");
779
780                         assert(p + *p < pend);
781
782                         for (c = 0; c < 256; c++)
783                                 if (((unsigned char)(c / 8) < *p)
784                                     && (p[1 + (c / 8)] &
785                                         (1 << (c % 8)))) {
786                                         /* Are we starting a range?  */
787                                         if (last + 1 == c && !in_range) {
788                                                 putchar('-');
789                                                 in_range = 1;
790                                         }
791                                         /* Have we broken a range?  */
792                                         else if (last + 1 != c
793                                                  && in_range) {
794                                                 putchar(last);
795                                                 in_range = 0;
796                                         }
797
798                                         if (!in_range)
799                                                 putchar(c);
800
801                                         last = c;
802                                 }
803
804                         if (in_range)
805                                 putchar(last);
806
807                         putchar(']');
808
809                         p += 1 + *p;
810                         break;
811                 }
812
813 #ifdef MULE
814                 case charset_mule:
815                 case charset_mule_not: {
816                         int nentries, i;
817
818                         printf("/charset_mule [%s",
819                                (re_opcode_t) * (p - 1) == charset_mule_not
820                                ? "^" : "");
821                         /* u_r_t_nentries() CHANGES its arg, why on earth
822                          * is this marked const here then? :O -hrop */
823                         nentries = unified_range_table_nentries(p);
824                         for (i = 0; i < nentries; i++) {
825                                 EMACS_INT first, last;
826                                 Lisp_Object dummy_val;
827
828                                 /* u_r_t_get_range CHANGES its arg ...
829                                  * p can't be const thence */
830                                 unified_range_table_get_range(p, i,
831                                                               &first,
832                                                               &last,
833                                                               &dummy_val);
834                                 if (first < 0x100)
835                                         putchar(first);
836                                 else
837                                         printf("(0x%lx)", (long)first);
838                                 if (first != last) {
839                                         putchar('-');
840                                         if (last < 0x100)
841                                                 putchar(last);
842                                         else
843                                                 printf("(0x%lx)",
844                                                        (long)last);
845                                 }
846                         }
847                         putchar(']');
848                         p += unified_range_table_bytes_used(p);
849                 }
850                         break;
851 #endif
852
853                 case begline:
854                         printf("/begline");
855                         break;
856
857                 case endline:
858                         printf("/endline");
859                         break;
860
861                 case on_failure_jump:
862                         extract_number_and_incr(&mcnt, &p);
863                         printf("/on_failure_jump to %ld",
864                                (long)(p + mcnt - start));
865                         break;
866
867                 case on_failure_keep_string_jump:
868                         extract_number_and_incr(&mcnt, &p);
869                         printf("/on_failure_keep_string_jump to %ld",
870                                (long)(p + mcnt - start));
871                         break;
872
873                 case dummy_failure_jump:
874                         extract_number_and_incr(&mcnt, &p);
875                         printf("/dummy_failure_jump to %ld",
876                                (long)(p + mcnt - start));
877                         break;
878
879                 case push_dummy_failure:
880                         printf("/push_dummy_failure");
881                         break;
882
883                 case maybe_pop_jump:
884                         extract_number_and_incr(&mcnt, &p);
885                         printf("/maybe_pop_jump to %ld",
886                                (long)(p + mcnt - start));
887                         break;
888
889                 case pop_failure_jump:
890                         extract_number_and_incr(&mcnt, &p);
891                         printf("/pop_failure_jump to %ld",
892                                (long)(p + mcnt - start));
893                         break;
894
895                 case jump_past_alt:
896                         extract_number_and_incr(&mcnt, &p);
897                         printf("/jump_past_alt to %ld",
898                                (long)(p + mcnt - start));
899                         break;
900
901                 case jump:
902                         extract_number_and_incr(&mcnt, &p);
903                         printf("/jump to %ld", (long)(p + mcnt - start));
904                         break;
905
906                 case succeed_n:
907                         extract_number_and_incr(&mcnt, &p);
908                         extract_number_and_incr(&mcnt2, &p);
909                         printf("/succeed_n to %ld, %d times",
910                                (long)(p + mcnt - start), mcnt2);
911                         break;
912
913                 case jump_n:
914                         extract_number_and_incr(&mcnt, &p);
915                         extract_number_and_incr(&mcnt2, &p);
916                         printf("/jump_n to %ld, %d times",
917                                (long)(p + mcnt - start), mcnt2);
918                         break;
919
920                 case set_number_at:
921                         extract_number_and_incr(&mcnt, &p);
922                         extract_number_and_incr(&mcnt2, &p);
923                         printf("/set_number_at location %ld to %d",
924                                (long)(p + mcnt - start), mcnt2);
925                         break;
926
927                 case wordbound:
928                         printf("/wordbound");
929                         break;
930
931                 case notwordbound:
932                         printf("/notwordbound");
933                         break;
934
935                 case wordbeg:
936                         printf("/wordbeg");
937                         break;
938
939                 case wordend:
940                         printf("/wordend");
941
942 #ifdef emacs
943                 case before_dot:
944                         printf("/before_dot");
945                         break;
946
947                 case at_dot:
948                         printf("/at_dot");
949                         break;
950
951                 case after_dot:
952                         printf("/after_dot");
953                         break;
954
955                 case syntaxspec:
956                         printf("/syntaxspec");
957                         mcnt = *p++;
958                         printf("/%d", mcnt);
959                         break;
960
961                 case notsyntaxspec:
962                         printf("/notsyntaxspec");
963                         mcnt = *p++;
964                         printf("/%d", mcnt);
965                         break;
966
967 #ifdef MULE
968 /* 97/2/17 jhod Mule category patch */
969                 case categoryspec:
970                         printf("/categoryspec");
971                         mcnt = *p++;
972                         printf("/%d", mcnt);
973                         break;
974
975                 case notcategoryspec:
976                         printf("/notcategoryspec");
977                         mcnt = *p++;
978                         printf("/%d", mcnt);
979                         break;
980 /* end of category patch */
981 #endif                          /* MULE */
982 #endif                          /* emacs */
983
984                 case wordchar:
985                         printf("/wordchar");
986                         break;
987
988                 case notwordchar:
989                         printf("/notwordchar");
990                         break;
991
992                 case begbuf:
993                         printf("/begbuf");
994                         break;
995
996                 case endbuf:
997                         printf("/endbuf");
998                         break;
999
1000                 case succeed:
1001                 default:
1002                         printf("?%d", *(p - 1));
1003                 }
1004
1005                 putchar('\n');
1006         }
1007
1008         printf("%ld:\tend of pattern.\n", (long)(p - start));
1009 }
1010
1011 static void
1012 print_compiled_pattern(struct re_pattern_buffer *bufp)
1013 {
1014 #if 0
1015         /* cant be const */
1016         re_char *buffer = bufp->buffer;
1017 #else
1018         unsigned char *buffer = bufp->buffer;
1019 #endif
1020
1021         print_partial_compiled_pattern(buffer, buffer + bufp->used);
1022         printf("%ld bytes used/%ld bytes allocated.\n", bufp->used,
1023                bufp->allocated);
1024
1025         if (bufp->fastmap_accurate && bufp->fastmap) {
1026                 printf("fastmap: ");
1027                 print_fastmap(bufp->fastmap);
1028         }
1029
1030         printf("re_nsub: %ld\t", (long)bufp->re_nsub);
1031         printf("re_ngroups: %ld\t", (long)bufp->re_ngroups);
1032         printf("regs_alloc: %d\t", bufp->regs_allocated);
1033         printf("can_be_null: %d\t", bufp->can_be_null);
1034         printf("newline_anchor: %d\n", bufp->newline_anchor);
1035         printf("no_sub: %d\t", bufp->no_sub);
1036         printf("not_bol: %d\t", bufp->not_bol);
1037         printf("not_eol: %d\t", bufp->not_eol);
1038         printf("syntax: %d\n", bufp->syntax);
1039         /* Perhaps we should print the translate table?  */
1040         /* and maybe the category table? */
1041
1042         if (bufp->external_to_internal_register) {
1043                 int i;
1044
1045                 printf("external_to_internal_register:\n");
1046                 for (i = 0; i <= bufp->re_nsub; i++) {
1047                         if (i > 0)
1048                                 printf(", ");
1049                         printf("%d -> %d", i,
1050                                bufp->external_to_internal_register[i]);
1051                 }
1052                 printf("\n");
1053         }
1054 }
1055
1056 static void
1057 print_double_string(re_char * where, re_char * string1, int size1,
1058                     re_char * string2, int size2)
1059 {
1060         if (where == NULL)
1061                 printf("(null)");
1062         else {
1063                 Element_count this_char;
1064
1065                 if (FIRST_STRING_P(where)) {
1066                         for (this_char = where - string1; this_char < size1;
1067                              this_char++)
1068                                 putchar(string1[this_char]);
1069
1070                         where = string2;
1071                 }
1072
1073                 for (this_char = where - string2; this_char < size2;
1074                      this_char++)
1075                         putchar(string2[this_char]);
1076         }
1077 }
1078
1079 #else                           /* not REGEX_DEBUG_FLAG */
1080
1081 #undef assert
1082 #define assert(e)
1083
1084 #define DEBUG_STATEMENT(e)
1085 #define DEBUG_PRINT1(x)
1086 #define DEBUG_PRINT2(x1, x2)
1087 #define DEBUG_PRINT3(x1, x2, x3)
1088 #define DEBUG_PRINT4(x1, x2, x3, x4)
1089 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1090 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1091
1092 #endif                          /* REGEX_DEBUG_FLAG */
1093 \f
1094 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
1095    also be assigned to arbitrarily: each pattern buffer stores its own
1096    syntax, so it can be changed between regex compilations.  */
1097 /* This has no initializer because initialized variables in Emacs
1098    become read-only after dumping.  */
1099 reg_syntax_t re_syntax_options;
1100
1101 /* Specify the precise syntax of regexps for compilation.  This provides
1102    for compatibility for various utilities which historically have
1103    different, incompatible syntaxes.
1104
1105    The argument SYNTAX is a bit mask comprised of the various bits
1106    defined in regex.h.  We return the old syntax.  */
1107
1108 reg_syntax_t re_set_syntax(reg_syntax_t syntax)
1109 {
1110         reg_syntax_t ret = re_syntax_options;
1111
1112         re_syntax_options = syntax;
1113         return ret;
1114 }
1115 \f
1116 /* This table gives an error message for each of the error codes listed
1117    in regex.h.  Obviously the order here has to be same as there.
1118    POSIX doesn't require that we do anything for REG_NOERROR,
1119    but why not be nice?  */
1120
1121 static const char *re_error_msgid[] = {
1122         "Success",              /* REG_NOERROR */
1123         "No match",             /* REG_NOMATCH */
1124         "Invalid regular expression",   /* REG_BADPAT */
1125         "Invalid collation character",  /* REG_ECOLLATE */
1126         "Invalid character class name", /* REG_ECTYPE */
1127         "Trailing backslash",   /* REG_EESCAPE */
1128         "Invalid back reference",       /* REG_ESUBREG */
1129         "Unmatched [ or [^",    /* REG_EBRACK */
1130         "Unmatched ( or \\(",   /* REG_EPAREN */
1131         "Unmatched \\{",        /* REG_EBRACE */
1132         "Invalid content of \\{\\}",    /* REG_BADBR */
1133         "Invalid range end",    /* REG_ERANGE */
1134         "Memory exhausted",     /* REG_ESPACE */
1135         "Invalid preceding regular expression", /* REG_BADRPT */
1136         "Premature end of regular expression",  /* REG_EEND */
1137         "Regular expression too big",   /* REG_ESIZE */
1138         "Unmatched ) or \\)",   /* REG_ERPAREN */
1139 #ifdef emacs
1140         "Invalid syntax designator",    /* REG_ESYNTAX */
1141 #endif
1142 #ifdef MULE
1143         "Ranges may not span charsets", /* REG_ERANGESPAN */
1144         "Invalid category designator",  /* REG_ECATEGORY */
1145 #endif
1146 };
1147 \f
1148 /* Avoiding alloca during matching, to placate r_alloc.  */
1149
1150 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1151    searching and matching functions should not call alloca.  On some
1152    systems, alloca is implemented in terms of malloc, and if we're
1153    using the relocating allocator routines, then malloc could cause a
1154    relocation, which might (if the strings being searched are in the
1155    ralloc heap) shift the data out from underneath the regexp
1156    routines.
1157
1158    Here's another reason to avoid allocation: Emacs
1159    processes input from X in a signal handler; processing X input may
1160    call malloc; if input arrives while a matching routine is calling
1161    malloc, then we're scrod.  But Emacs can't just block input while
1162    calling matching routines; then we don't notice interrupts when
1163    they come in.  So, Emacs blocks input around all regexp calls
1164    except the matching calls, which it leaves unprotected, in the
1165    faith that they will not malloc.  */
1166
1167 /* Normally, this is fine.  */
1168 #define MATCH_MAY_ALLOCATE
1169
1170 /* When using GNU C, we are not REALLY using the C alloca, no matter
1171    what config.h may say.  So don't take precautions for it.  */
1172 #ifdef __GNUC__
1173 #undef C_ALLOCA
1174 #endif
1175
1176 /* The match routines may not allocate if (1) they would do it with malloc
1177    and (2) it's not safe for them to use malloc.
1178    Note that if REL_ALLOC is defined, matching would not use malloc for the
1179    failure stack, but we would still use it for the register vectors;
1180    so REL_ALLOC should not affect this.  */
1181 #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
1182 #undef MATCH_MAY_ALLOCATE
1183 #endif
1184 \f
1185 /* Failure stack declarations and macros; both re_compile_fastmap and
1186    re_match_2 use a failure stack.  These have to be macros because of
1187    REGEX_ALLOCATE_STACK.  */
1188
1189 /* Number of failure points for which to initially allocate space
1190    when matching.  If this number is exceeded, we allocate more
1191    space, so it is not a hard limit.  */
1192 #ifndef INIT_FAILURE_ALLOC
1193 #define INIT_FAILURE_ALLOC 20
1194 #endif
1195
1196 /* Roughly the maximum number of failure points on the stack.  Would be
1197    exactly that if always used MAX_FAILURE_SPACE each time we failed.
1198    This is a variable only so users of regex can assign to it; we never
1199    change it ourselves.  */
1200 #if defined (MATCH_MAY_ALLOCATE) || defined (REGEX_MALLOC)
1201 /* 4400 was enough to cause a crash on Alpha OSF/1,
1202    whose default stack limit is 2mb.  */
1203 int re_max_failures = 50000;
1204 #else
1205 int re_max_failures = 2000;
1206 #endif
1207
1208 union fail_stack_elt {
1209         const unsigned char *pointer;
1210         int integer;
1211 };
1212
1213 typedef union fail_stack_elt fail_stack_elt_t;
1214
1215 typedef struct {
1216         fail_stack_elt_t *stack;
1217         Element_count size;
1218         Element_count avail;    /* Offset of next open position.  */
1219 } fail_stack_type;
1220
1221 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1222 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1223 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1224
1225 /* Define macros to initialize and free the failure stack.
1226    Do `return -2' if the alloc fails.  */
1227
1228 #ifdef MATCH_MAY_ALLOCATE
1229 #define INIT_FAIL_STACK()                                               \
1230         do {                                                            \
1231                 fail_stack.stack = (fail_stack_elt_t *)                 \
1232                         REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC *      \
1233                                               sizeof (fail_stack_elt_t)); \
1234                                                                         \
1235                 if (fail_stack.stack == NULL)                           \
1236                         return -2;                                      \
1237                                                                         \
1238                 fail_stack.size = INIT_FAILURE_ALLOC;                   \
1239                 fail_stack.avail = 0;                                   \
1240         } while (0)
1241
1242 #define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1243 #else
1244 #define INIT_FAIL_STACK()                       \
1245         do {                                    \
1246                 fail_stack.avail = 0;           \
1247         } while (0)
1248
1249 #define RESET_FAIL_STACK()
1250 #endif
1251
1252 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1253
1254    Return 1 if succeeds, and 0 if either ran out of memory
1255    allocating space for it or it was already too large.
1256
1257    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1258
1259 #define DOUBLE_FAIL_STACK(fail_stack)                                   \
1260   ((int) (fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS        \
1261    ? 0                                                                  \
1262    : ((fail_stack).stack = (fail_stack_elt_t *)                         \
1263         REGEX_REALLOCATE_STACK ((fail_stack).stack,                     \
1264           (fail_stack).size * sizeof (fail_stack_elt_t),                \
1265           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),        \
1266                                                                         \
1267       (fail_stack).stack == NULL                                        \
1268       ? 0                                                               \
1269       : ((fail_stack).size <<= 1,                                       \
1270          1)))
1271
1272 /* Push pointer POINTER on FAIL_STACK.
1273    Return 1 if was able to do so and 0 if ran out of memory allocating
1274    space to do so.  */
1275 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)                            \
1276   ((FAIL_STACK_FULL ()                                                  \
1277     && !DOUBLE_FAIL_STACK (FAIL_STACK))                                 \
1278    ? 0                                                                  \
1279    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,       \
1280       1))
1281
1282 /* Push a pointer value onto the failure stack.
1283    Assumes the variable `fail_stack'.  Probably should only
1284    be called from within `PUSH_FAILURE_POINT'.  */
1285 #define PUSH_FAILURE_POINTER(item)                                      \
1286         fail_stack.stack[fail_stack.avail++].pointer =                  \
1287                 (const unsigned char*)(item)
1288
1289 /* This pushes an integer-valued item onto the failure stack.
1290    Assumes the variable `fail_stack'.  Probably should only
1291    be called from within `PUSH_FAILURE_POINT'.  */
1292 #define PUSH_FAILURE_INT(item)                                  \
1293         fail_stack.stack[fail_stack.avail++].integer = (item)
1294
1295 /* Push a fail_stack_elt_t value onto the failure stack.
1296    Assumes the variable `fail_stack'.  Probably should only
1297    be called from within `PUSH_FAILURE_POINT'.  */
1298 #define PUSH_FAILURE_ELT(item)                                  \
1299         fail_stack.stack[fail_stack.avail++] =  (item)
1300
1301 /* These three POP... operations complement the three PUSH... operations.
1302    All assume that `fail_stack' is nonempty.  */
1303 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1304 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1305 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1306
1307 /* Used to omit pushing failure point id's when we're not debugging.  */
1308 #ifdef REGEX_DEBUG_FLAG
1309 #define DEBUG_PUSH PUSH_FAILURE_INT
1310 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1311 #else
1312 #define DEBUG_PUSH(item)
1313 #define DEBUG_POP(item_addr)
1314 #endif
1315
1316 /* Push the information about the state we will need
1317    if we ever fail back to it.
1318
1319    Requires variables fail_stack, regstart, regend, reg_info, and
1320    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
1321    declared.
1322
1323    Does `return FAILURE_CODE' if runs out of memory.  */
1324
1325 #if !defined (REGEX_MALLOC) && !defined (REL_ALLOC)
1326 #define DECLARE_DESTINATION char *destination
1327 #else
1328 #define DECLARE_DESTINATION DECLARE_NOTHING
1329 #endif
1330
1331 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)   \
1332         do {                                                            \
1333                 DECLARE_DESTINATION;                                    \
1334                 /* Must be int, so when we don't save any registers, the\
1335                    arithmetic \ of 0 + -1 isn't done as unsigned.  */   \
1336                 int this_reg;                                           \
1337                                                                         \
1338                 DEBUG_STATEMENT (failure_id++);                         \
1339                 DEBUG_STATEMENT (nfailure_points_pushed++);             \
1340                 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
1341                 DEBUG_PRINT2 ("  Before push, next avail: %lu\n",       \
1342                               (long unsigned int)(fail_stack).avail);   \
1343                 DEBUG_PRINT2 ("                     size: %lu\n",       \
1344                               (long unsigned int)(fail_stack).size);    \
1345                                                                         \
1346                 DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS); \
1347                 DEBUG_PRINT2 ("     available: %ld\n",                  \
1348                               (long) REMAINING_AVAIL_SLOTS);            \
1349                                                                         \
1350                 /* Ensure we have enough space allocated for what       \
1351                  * we will push.  */                                    \
1352                 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) {     \
1353                         if (!DOUBLE_FAIL_STACK (fail_stack))            \
1354                                 return failure_code;                    \
1355                                                                         \
1356                         DEBUG_PRINT2 ("\n  Doubled stack; size now: %lu\n", \
1357                                       (unsigned long) (fail_stack).size); \
1358                         DEBUG_PRINT2 ("  slots available: %ld\n",       \
1359                                       (long) REMAINING_AVAIL_SLOTS);    \
1360                 }                                                       \
1361                                                                         \
1362                 /* Push the info, starting with the registers.  */      \
1363                 DEBUG_PRINT1 ("\n");                                    \
1364                                                                         \
1365                 for (this_reg = lowest_active_reg;                      \
1366                      this_reg <= highest_active_reg;                    \
1367                      this_reg++) {                                      \
1368                         DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg); \
1369                         DEBUG_STATEMENT (num_regs_pushed++);            \
1370                                                                         \
1371                         DEBUG_PRINT2("    start: %p\n",                 \
1372                                      regstart[this_reg]);               \
1373                         PUSH_FAILURE_POINTER(regstart[this_reg]);       \
1374                                                                         \
1375                         DEBUG_PRINT2 ("    end: %p\n",                  \
1376                                       regend[this_reg]);                \
1377                         PUSH_FAILURE_POINTER(regend[this_reg]); \
1378                                                                         \
1379                         DEBUG_PRINT2 ("    info: 0x%p\n      ",         \
1380                                       &reg_info[this_reg]);             \
1381                         DEBUG_PRINT2 (" match_null=%d",                 \
1382                                       REG_MATCH_NULL_STRING_P           \
1383                                       (reg_info[this_reg]));            \
1384                         DEBUG_PRINT2 (" active=%d",                     \
1385                                       IS_ACTIVE (reg_info[this_reg]));  \
1386                         DEBUG_PRINT2 (" matched_something=%d",          \
1387                                       MATCHED_SOMETHING                 \
1388                                       (reg_info[this_reg]));            \
1389                         DEBUG_PRINT2 (" ever_matched_something=%d",     \
1390                                       EVER_MATCHED_SOMETHING            \
1391                                       (reg_info[this_reg]));            \
1392                         DEBUG_PRINT1 ("\n");                            \
1393                         PUSH_FAILURE_ELT(reg_info[this_reg].word);      \
1394                 }                                                       \
1395                                                                         \
1396                 DEBUG_PRINT2 ("  Pushing  low active reg: %d\n",        \
1397                               lowest_active_reg);                       \
1398                 PUSH_FAILURE_INT (lowest_active_reg);                   \
1399                                                                         \
1400                 DEBUG_PRINT2 ("  Pushing high active reg: %d\n",        \
1401                               highest_active_reg);                      \
1402                 PUSH_FAILURE_INT (highest_active_reg);                  \
1403                                                                         \
1404                 DEBUG_PRINT2 ("  Pushing pattern 0x%lx: \n",            \
1405                               (long) pattern_place);                    \
1406                 DEBUG_PRINT_COMPILED_PATTERN(bufp, pattern_place, pend); \
1407                 PUSH_FAILURE_POINTER (pattern_place);                   \
1408                                                                         \
1409                 DEBUG_PRINT2("  Pushing string %p: `",                  \
1410                              string_place);                             \
1411                 DEBUG_PRINT_DOUBLE_STRING(string_place,                 \
1412                                           string1, size1,               \
1413                                           string2, size2);              \
1414                 DEBUG_PRINT1("'\n");                                    \
1415                 PUSH_FAILURE_POINTER (string_place);                    \
1416                                                                         \
1417                 DEBUG_PRINT2("  Pushing failure id: %u\n", failure_id); \
1418                 DEBUG_PUSH(failure_id);                                 \
1419         } while (0)
1420
1421 /* This is the number of items that are pushed and popped on the stack
1422    for each register.  */
1423 #define NUM_REG_ITEMS  3
1424
1425 /* Individual items aside from the registers.  */
1426 #ifdef DEBUG
1427 #define NUM_NONREG_ITEMS 5      /* Includes failure point id.  */
1428 #else
1429 #define NUM_NONREG_ITEMS 4
1430 #endif
1431
1432 /* We push at most this many items on the stack.  */
1433 /* We used to use (num_regs - 1), which is the number of registers
1434    this regexp will save; but that was changed to 5
1435    to avoid stack overflow for a regexp with lots of parens.  */
1436 #define MAX_FAILURE_ITEMS (5 * (NUM_REG_ITEMS + NUM_NONREG_ITEMS))
1437
1438 /* We actually push this many items.  */
1439 #define NUM_FAILURE_ITEMS                                               \
1440   ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS         \
1441     + NUM_NONREG_ITEMS)
1442
1443 /* How many items can still be added to the stack without overflowing it.  */
1444 #define REMAINING_AVAIL_SLOTS ((int) ((fail_stack).size - (fail_stack).avail))
1445
1446 /* Pops what PUSH_FAIL_STACK pushes.
1447
1448    We restore into the parameters, all of which should be lvalues:
1449      STR -- the saved data position.
1450      PAT -- the saved pattern position.
1451      LOW_REG, HIGH_REG -- the highest and lowest active registers.
1452      REGSTART, REGEND -- arrays of string positions.
1453      REG_INFO -- array of information about each subexpression.
1454
1455    Also assumes the variables `fail_stack' and (if debugging), `bufp',
1456    `pend', `string1', `size1', `string2', and `size2'.  */
1457
1458 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg,                  \
1459                           regstart, regend, reg_info)                   \
1460         do {                                                            \
1461                 DEBUG_STATEMENT (fail_stack_elt_t ffailure_id;)         \
1462                         int this_reg;                                   \
1463                 const unsigned char *string_temp;                       \
1464                                                                         \
1465                 assert (!FAIL_STACK_EMPTY ());                          \
1466                                                                         \
1467                 /* Remove failure points and point to                   \
1468                  * how many regs pushed.  */                            \
1469                 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                  \
1470                 DEBUG_PRINT2 ("  Before pop, next avail: %lu\n",        \
1471                               (unsigned long) fail_stack.avail);        \
1472                 DEBUG_PRINT2 ("                    size: %lu\n",        \
1473                               (unsigned long) fail_stack.size);         \
1474                                                                         \
1475                 assert (fail_stack.avail >= NUM_NONREG_ITEMS);          \
1476                                                                         \
1477                 DEBUG_POP (&ffailure_id.integer);                       \
1478                 DEBUG_PRINT2 ("  Popping failure id: %u\n",             \
1479                               * (unsigned int *) &ffailure_id);         \
1480                                                                         \
1481                 /* If the saved string location is NULL, it             \
1482                    came from an on_failure_keep_string_jump opcode,     \
1483                    and we want to throw away the saved NULL, thus       \
1484                    retaining our current position in the string.  */    \
1485                 string_temp = POP_FAILURE_POINTER ();                   \
1486                 if (string_temp != NULL)                                \
1487                         str = string_temp;                              \
1488                                                                         \
1489                 DEBUG_PRINT2("  Popping string %p: `", str);            \
1490                 DEBUG_PRINT_DOUBLE_STRING(str,                          \
1491                                           string1, size1,               \
1492                                           string2, size2);              \
1493                 DEBUG_PRINT1("'\n");                                    \
1494                                                                         \
1495                 pat = (unsigned char *) POP_FAILURE_POINTER();          \
1496                 DEBUG_PRINT2 ("  Popping pattern %p: ", pat);           \
1497                 DEBUG_PRINT_COMPILED_PATTERN(bufp, pat, pend);          \
1498                                                                         \
1499                 /* Restore register info.  */                           \
1500                 high_reg = (unsigned) POP_FAILURE_INT ();               \
1501                 DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg); \
1502                                                                         \
1503                 low_reg = (unsigned) POP_FAILURE_INT ();                \
1504                 DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg); \
1505                                                                         \
1506                 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) { \
1507                         DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg); \
1508                                                                         \
1509                         reg_info[this_reg].word = POP_FAILURE_ELT ();   \
1510                         DEBUG_PRINT2 ("      info: %p\n",               \
1511                                       &reg_info[this_reg]);             \
1512                                                                         \
1513                         regend[this_reg] = POP_FAILURE_POINTER ();      \
1514                         DEBUG_PRINT2 ("      end: %p\n",                \
1515                                       regend[this_reg]);                \
1516                                                                         \
1517                         regstart[this_reg] = POP_FAILURE_POINTER ();    \
1518                         DEBUG_PRINT2 ("      start: %p\n",              \
1519                                       regstart[this_reg]);              \
1520                 }                                                       \
1521                                                                         \
1522                 set_regs_matched_done = 0;                              \
1523                 DEBUG_STATEMENT (nfailure_points_popped++);             \
1524         } while (0)                     /* POP_FAILURE_POINT */
1525 \f
1526 /* Structure for per-register (a.k.a. per-group) information.
1527    Other register information, such as the
1528    starting and ending positions (which are addresses), and the list of
1529    inner groups (which is a bits list) are maintained in separate
1530    variables.
1531
1532    We are making a (strictly speaking) nonportable assumption here: that
1533    the compiler will pack our bit fields into something that fits into
1534    the type of `word', i.e., is something that fits into one item on the
1535    failure stack.  */
1536
1537 typedef union {
1538         fail_stack_elt_t word;
1539         struct {
1540                 /* This field is one if this group can match the empty string,
1541                    zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1542 #define MATCH_NULL_UNSET_VALUE 3
1543                 unsigned match_null_string_p:2;
1544                 unsigned is_active:1;
1545                 unsigned matched_something:1;
1546                 unsigned ever_matched_something:1;
1547         } bits;
1548 } register_info_type;
1549
1550 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1551 #define IS_ACTIVE(R)  ((R).bits.is_active)
1552 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1553 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1554
1555 /* Call this when have matched a real character; it sets `matched' flags
1556    for the subexpressions which we are currently inside.  Also records
1557    that those subexprs have matched.  */
1558 #define SET_REGS_MATCHED()                                              \
1559   do                                                                    \
1560     {                                                                   \
1561       if (!set_regs_matched_done)                                       \
1562         {                                                               \
1563           Element_count r;                                              \
1564           set_regs_matched_done = 1;                                    \
1565           for (r = lowest_active_reg; r <= highest_active_reg; r++)     \
1566             {                                                           \
1567               MATCHED_SOMETHING (reg_info[r])                           \
1568                 = EVER_MATCHED_SOMETHING (reg_info[r])                  \
1569                 = 1;                                                    \
1570             }                                                           \
1571         }                                                               \
1572     }                                                                   \
1573   while (0)
1574
1575 /* Registers are set to a sentinel when they haven't yet matched.  */
1576 static unsigned char reg_unset_dummy;
1577 #define REG_UNSET_VALUE (&reg_unset_dummy)
1578 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1579 \f
1580 /* Subroutine declarations and macros for regex_compile.  */
1581
1582 /* Fetch the next character in the uncompiled pattern---translating it
1583    if necessary.  Also cast from a signed character in the constant
1584    string passed to us by the user to an unsigned char that we can use
1585    as an array index (in, e.g., `translate').  */
1586 #define PATFETCH(c)                                                     \
1587   do {                                                                  \
1588     PATFETCH_RAW (c);                                                   \
1589     c = TRANSLATE (c);                                                  \
1590   } while (0)
1591
1592 /* Fetch the next character in the uncompiled pattern, with no
1593    translation.  */
1594 #define PATFETCH_RAW(c)                                                 \
1595   do {if (p == pend) return REG_EEND;                                   \
1596     assert (p < pend);                                                  \
1597     c = charptr_emchar (p);                                             \
1598     INC_CHARPTR (p);                                                    \
1599   } while (0)
1600
1601 /* Go backwards one character in the pattern.  */
1602 #define PATUNFETCH DEC_CHARPTR (p)
1603
1604 #ifdef MULE
1605
1606 #define PATFETCH_EXTENDED(emch)                                         \
1607   do {if (p == pend) return REG_EEND;                                   \
1608     assert (p < pend);                                                  \
1609     emch = charptr_emchar ((const Bufbyte *) p);                        \
1610     INC_CHARPTR (p);                                                    \
1611     if (TRANSLATE_P (translate) && emch < 0x80)                         \
1612       emch = (Emchar) (unsigned char) RE_TRANSLATE (emch);              \
1613   } while (0)
1614
1615 #define PATFETCH_RAW_EXTENDED(emch)                                     \
1616   do {if (p == pend) return REG_EEND;                                   \
1617     assert (p < pend);                                                  \
1618     emch = charptr_emchar ((const Bufbyte *) p);                        \
1619     INC_CHARPTR (p);                                                    \
1620   } while (0)
1621
1622 #define PATUNFETCH_EXTENDED DEC_CHARPTR (p)
1623
1624 #define PATFETCH_EITHER(emch)                   \
1625   do {                                          \
1626     if (has_extended_chars)                     \
1627       PATFETCH_EXTENDED (emch);                 \
1628     else                                        \
1629       PATFETCH (emch);                          \
1630   } while (0)
1631
1632 #define PATFETCH_RAW_EITHER(emch)               \
1633   do {                                          \
1634     if (has_extended_chars)                     \
1635       PATFETCH_RAW_EXTENDED (emch);             \
1636     else                                        \
1637       PATFETCH_RAW (emch);                      \
1638   } while (0)
1639
1640 #define PATUNFETCH_EITHER                       \
1641   do {                                          \
1642     if (has_extended_chars)                     \
1643       PATUNFETCH_EXTENDED (emch);               \
1644     else                                        \
1645       PATUNFETCH (emch);                        \
1646   } while (0)
1647
1648 #else                           /* not MULE */
1649
1650 #define PATFETCH_EITHER(emch) PATFETCH (emch)
1651 #define PATFETCH_RAW_EITHER(emch) PATFETCH_RAW (emch)
1652 #define PATUNFETCH_EITHER PATUNFETCH
1653
1654 #endif                          /* MULE */
1655
1656 /* If `translate' is non-null, return translate[D], else just D.  We
1657    cast the subscript to translate because some data is declared as
1658    `char *', to avoid warnings when a string constant is passed.  But
1659    when we use a character as a subscript we must make it unsigned.  */
1660 #define TRANSLATE(d) (TRANSLATE_P (translate) ? RE_TRANSLATE (d) : (d))
1661
1662 /* Macros for outputting the compiled pattern into `buffer'.  */
1663
1664 /* If the buffer isn't allocated when it comes in, use this.  */
1665 #define INIT_BUF_SIZE  32
1666
1667 /* Make sure we have at least N more bytes of space in buffer.  */
1668 #define GET_BUFFER_SPACE(n)                                             \
1669     while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated)  \
1670       EXTEND_BUFFER ()
1671
1672 /* Make sure we have one more byte of buffer space and then add C to it.  */
1673 #define BUF_PUSH(c)                                                     \
1674   do {                                                                  \
1675     GET_BUFFER_SPACE (1);                                               \
1676     *buf_end++ = (unsigned char) (c);                                   \
1677   } while (0)
1678
1679 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1680 #define BUF_PUSH_2(c1, c2)                                              \
1681   do {                                                                  \
1682     GET_BUFFER_SPACE (2);                                               \
1683     *buf_end++ = (unsigned char) (c1);                                  \
1684     *buf_end++ = (unsigned char) (c2);                                  \
1685   } while (0)
1686
1687 /* As with BUF_PUSH_2, except for three bytes.  */
1688 #define BUF_PUSH_3(c1, c2, c3)                                          \
1689   do {                                                                  \
1690     GET_BUFFER_SPACE (3);                                               \
1691     *buf_end++ = (unsigned char) (c1);                                  \
1692     *buf_end++ = (unsigned char) (c2);                                  \
1693     *buf_end++ = (unsigned char) (c3);                                  \
1694   } while (0)
1695
1696 /* Store a jump with opcode OP at LOC to location TO.  We store a
1697    relative address offset by the three bytes the jump itself occupies.  */
1698 #define STORE_JUMP(op, loc, to) \
1699   store_op1 (op, loc, (to) - (loc) - 3)
1700
1701 /* Likewise, for a two-argument jump.  */
1702 #define STORE_JUMP2(op, loc, to, arg) \
1703   store_op2 (op, loc, (to) - (loc) - 3, arg)
1704
1705 /* Like `STORE_JUMP', but for inserting.  Assume `buf_end' is the
1706    buffer end.  */
1707 #define INSERT_JUMP(op, loc, to) \
1708   insert_op1 (op, loc, (to) - (loc) - 3, buf_end)
1709
1710 /* Like `STORE_JUMP2', but for inserting.  Assume `buf_end' is the
1711    buffer end.  */
1712 #define INSERT_JUMP2(op, loc, to, arg) \
1713   insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end)
1714
1715 /* This is not an arbitrary limit: the arguments which represent offsets
1716    into the pattern are two bytes long.  So if 2^16 bytes turns out to
1717    be too small, many things would have to change.  */
1718 #define MAX_BUF_SIZE (1L << 16)
1719
1720 /* Extend the buffer by twice its current size via realloc and
1721    reset the pointers that pointed into the old block to point to the
1722    correct places in the new one.  If extending the buffer results in it
1723    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1724 #define EXTEND_BUFFER()                                                 \
1725         do {                                                            \
1726                 re_char *old_buffer = bufp->buffer;                     \
1727                 if (bufp->allocated == MAX_BUF_SIZE)                    \
1728                         return REG_ESIZE;                               \
1729                 bufp->allocated <<= 1;                                  \
1730                 if (bufp->allocated > MAX_BUF_SIZE)                     \
1731                         bufp->allocated = MAX_BUF_SIZE;                 \
1732                 bufp->buffer = (unsigned char *)xrealloc(               \
1733                         bufp->buffer, bufp->allocated);                 \
1734                 if (bufp->buffer == NULL) {                             \
1735                         return REG_ESPACE;                              \
1736                 }                                                       \
1737                 /* If the buffer moved, move all the pointers into it.  */ \
1738                 if (old_buffer != bufp->buffer) {                       \
1739                         buf_end = (buf_end - old_buffer) + bufp->buffer; \
1740                         begalt = (begalt - old_buffer) + bufp->buffer;  \
1741                         if (fixup_alt_jump) {                           \
1742                                 fixup_alt_jump =                        \
1743                                         (fixup_alt_jump - old_buffer) + \
1744                                         bufp->buffer;                   \
1745                         }                                               \
1746                         if (laststart) {                                \
1747                                 laststart = (laststart - old_buffer) +  \
1748                                         bufp->buffer;                   \
1749                         }                                               \
1750                         if (pending_exact) {                            \
1751                                 pending_exact =                         \
1752                                         (pending_exact - old_buffer) +  \
1753                                         bufp->buffer;                   \
1754                         }                                               \
1755                 }                                                       \
1756         } while (0)
1757
1758 /* Since we have one byte reserved for the register number argument to
1759    {start,stop}_memory, the maximum number of groups we can report
1760    things about is what fits in that byte.  */
1761 #define MAX_REGNUM 255
1762
1763 /* But patterns can have more than `MAX_REGNUM' registers.  We just
1764    ignore the excess.  */
1765 typedef unsigned regnum_t;
1766
1767 #define INIT_REG_TRANSLATE_SIZE 5
1768
1769 /* Macros for the compile stack.  */
1770
1771 /* Since offsets can go either forwards or backwards, this type needs to
1772    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1773 typedef int pattern_offset_t;
1774
1775 typedef struct {
1776         pattern_offset_t begalt_offset;
1777         pattern_offset_t fixup_alt_jump;
1778         pattern_offset_t inner_group_offset;
1779         pattern_offset_t laststart_offset;
1780         regnum_t regnum;
1781 } compile_stack_elt_t;
1782
1783 typedef struct {
1784         compile_stack_elt_t *stack;
1785         unsigned size;
1786         unsigned avail;         /* Offset of next open position.  */
1787 } compile_stack_type;
1788
1789 #define INIT_COMPILE_STACK_SIZE 32
1790
1791 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1792 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1793
1794 /* The next available element.  */
1795 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1796
1797 /* Set the bit for character C in a bit vector.  */
1798 #define SET_LIST_BIT(c)                         \
1799   (buf_end[((unsigned char) (c)) / BYTEWIDTH]   \
1800    |= 1 << (((unsigned char) c) % BYTEWIDTH))
1801
1802 #ifdef MULE
1803
1804 /* Set the "bit" for character C in a range table. */
1805 #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt)
1806
1807 /* Set the "bit" for character c in the appropriate table. */
1808 #define SET_EITHER_BIT(c)                       \
1809         do {                                    \
1810                 if (has_extended_chars)         \
1811                         SET_RANGETAB_BIT (c);   \
1812                 else                            \
1813                         SET_LIST_BIT (c);       \
1814         } while (0)
1815
1816 #else                           /* not MULE */
1817
1818 #define SET_EITHER_BIT(c) SET_LIST_BIT (c)
1819
1820 #endif
1821
1822 /* Get the next unsigned number in the uncompiled pattern.  */
1823 #define GET_UNSIGNED_NUMBER(num)                                        \
1824   { if (p != pend)                                                      \
1825      {                                                                  \
1826        PATFETCH (c);                                                    \
1827        while (ISDIGIT (c))                                              \
1828          {                                                              \
1829            if (num < 0)                                                 \
1830               num = 0;                                                  \
1831            num = num * 10 + c - '0';                                    \
1832            if (p == pend)                                               \
1833               break;                                                    \
1834            PATFETCH (c);                                                \
1835          }                                                              \
1836        }                                                                \
1837     }
1838
1839 #define CHAR_CLASS_MAX_LENGTH  9        /* Namely, `multibyte'.  */
1840
1841 \f
1842 static void store_op1(re_opcode_t op, unsigned char *loc, int arg);
1843 static void store_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2);
1844 static void insert_op1(re_opcode_t op, unsigned char *loc, int arg,
1845                        unsigned char *end);
1846 static void insert_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2,
1847                        unsigned char *end);
1848 static re_bool at_begline_loc_p(re_char*, re_char*, reg_syntax_t);
1849 static re_bool at_endline_loc_p(re_char*, re_char*, int syntax);
1850 static re_bool group_in_compile_stack(compile_stack_type compile_stack,
1851                                       regnum_t regnum);
1852 static reg_errcode_t compile_range(re_char ** p_ptr, re_char * pend,
1853                                    RE_TRANSLATE_TYPE translate,
1854                                    reg_syntax_t syntax, unsigned char *b);
1855 #ifdef MULE
1856 static reg_errcode_t compile_extended_range(re_char ** p_ptr,
1857                                             re_char * pend,
1858                                             RE_TRANSLATE_TYPE translate,
1859                                             reg_syntax_t syntax,
1860                                             Lisp_Object rtab);
1861 #endif                          /* MULE */
1862 static re_bool group_match_null_string_p(unsigned char **p,
1863                                          unsigned char *end,
1864                                          register_info_type * reg_info);
1865 static re_bool alt_match_null_string_p(unsigned char *p, unsigned char *end,
1866                                        register_info_type * reg_info);
1867 static re_bool common_op_match_null_string_p(unsigned char **p,
1868                                              unsigned char *end,
1869                                              register_info_type * reg_info);
1870 static int bcmp_translate(const unsigned char *s1, const unsigned char *s2,
1871                           REGISTER int len, RE_TRANSLATE_TYPE translate);
1872 static int re_match_2_internal(struct re_pattern_buffer *bufp,
1873                                re_char * string1, int size1,
1874                                re_char * string2, int size2, int pos,
1875                                struct re_registers *regs, int stop);
1876 \f
1877 #ifndef MATCH_MAY_ALLOCATE
1878
1879 /* If we cannot allocate large objects within re_match_2_internal,
1880    we make the fail stack and register vectors global.
1881    The fail stack, we grow to the maximum size when a regexp
1882    is compiled.
1883    The register vectors, we adjust in size each time we
1884    compile a regexp, according to the number of registers it needs.  */
1885
1886 static fail_stack_type fail_stack;
1887
1888 /* Size with which the following vectors are currently allocated.
1889    That is so we can make them bigger as needed,
1890    but never make them smaller.  */
1891 static int regs_allocated_size;
1892
1893 static re_char **regstart, **regend;
1894 static re_char **old_regstart, **old_regend;
1895 static re_char **best_regstart, **best_regend;
1896 static register_info_type *reg_info;
1897 static re_char **reg_dummy;
1898 static register_info_type *reg_info_dummy;
1899
1900 /* Make the register vectors big enough for NUM_REGS registers,
1901    but don't make them smaller.  */
1902
1903 static void regex_grow_registers(int num_regs)
1904 {
1905         if (num_regs > regs_allocated_size) {
1906                 RETALLOC_IF(regstart, num_regs, re_char *);
1907                 RETALLOC_IF(regend, num_regs, re_char *);
1908                 RETALLOC_IF(old_regstart, num_regs, re_char *);
1909                 RETALLOC_IF(old_regend, num_regs, re_char *);
1910                 RETALLOC_IF(best_regstart, num_regs, re_char *);
1911                 RETALLOC_IF(best_regend, num_regs, re_char *);
1912                 RETALLOC_IF(reg_info, num_regs, register_info_type);
1913                 RETALLOC_IF(reg_dummy, num_regs, re_char *);
1914                 RETALLOC_IF(reg_info_dummy, num_regs, register_info_type);
1915
1916                 regs_allocated_size = num_regs;
1917         }
1918 }
1919
1920 #endif                          /* not MATCH_MAY_ALLOCATE */
1921
1922 /* auxiliary stuff, FSF theft */
1923 /* Character classes.  */
1924 typedef enum {
1925         RECC_ERROR = 0,
1926         RECC_ALNUM, RECC_ALPHA, RECC_WORD,
1927         RECC_GRAPH, RECC_PRINT,
1928         RECC_LOWER, RECC_UPPER,
1929         RECC_PUNCT, RECC_CNTRL,
1930         RECC_DIGIT, RECC_XDIGIT,
1931         RECC_BLANK, RECC_SPACE,
1932         RECC_MULTIBYTE, RECC_NONASCII,
1933         RECC_ASCII, RECC_UNIBYTE
1934 } re_wctype_t;
1935
1936 /* Map a string to the char class it names (if any).  */
1937 static inline re_wctype_t
1938 re_wctype(re_char *str)
1939 {
1940         const char *string = (const char*)str;
1941
1942         if (STREQ (string, "alnum")) {
1943                 return RECC_ALNUM;
1944         } else if (STREQ (string, "alpha")) {
1945                 return RECC_ALPHA;
1946         } else if (STREQ (string, "digit")) {
1947                 return RECC_DIGIT;
1948         } else if (STREQ (string, "graph")) {
1949                 return RECC_GRAPH;
1950         } else if (STREQ (string, "space")) {
1951                 return RECC_SPACE;
1952         } else if (STREQ (string, "upper")) {
1953                 return RECC_UPPER;
1954         } else if (STREQ (string, "lower")) {
1955                 return RECC_LOWER;
1956         } else if (STREQ (string, "word")) {
1957                 return RECC_WORD;
1958         } else if (STREQ (string, "print")) {
1959                 return RECC_PRINT;
1960         } else if (STREQ (string, "punct")) {
1961                 return RECC_PUNCT;
1962         } else if (STREQ (string, "xdigit")) {
1963                 return RECC_XDIGIT;
1964         } else if (STREQ (string, "cntrl")) {
1965                 return RECC_CNTRL;
1966         } else if (STREQ (string, "blank")) {
1967                 return RECC_BLANK;
1968         } else if (STREQ (string, "ascii")) {
1969                 return RECC_ASCII;
1970         } else if (STREQ (string, "nonascii")) {
1971                 return RECC_NONASCII;
1972         } else if (STREQ (string, "unibyte")) {
1973                 return RECC_UNIBYTE;
1974         } else if (STREQ (string, "multibyte")) {
1975                 return RECC_MULTIBYTE;
1976         } else {
1977                 return 0;
1978         }
1979 }
1980
1981 /* True if CH is in the char class CC.  */
1982 static inline bool
1983 re_iswctype(int ch, re_wctype_t cc)
1984 {
1985         switch (cc) {
1986         case RECC_ALNUM:
1987                 return ISALNUM (ch);
1988         case RECC_ALPHA:
1989                 return ISALPHA (ch);
1990         case RECC_BLANK:
1991                 return ISBLANK (ch);
1992         case RECC_CNTRL:
1993                 return ISCNTRL (ch);
1994         case RECC_DIGIT:
1995                 return ISDIGIT (ch);
1996         case RECC_GRAPH:
1997                 return ISGRAPH (ch);
1998         case RECC_LOWER:
1999                 return ISLOWER (ch);
2000         case RECC_PRINT:
2001                 return ISPRINT (ch);
2002         case RECC_PUNCT:
2003                 return ISPUNCT (ch);
2004         case RECC_SPACE:
2005                 return ISSPACE (ch);
2006         case RECC_UPPER:
2007                 return ISUPPER (ch);
2008         case RECC_XDIGIT:
2009                 return ISXDIGIT (ch);
2010         case RECC_ASCII:
2011                 return IS_REAL_ASCII (ch);
2012         case RECC_NONASCII:
2013                 return !IS_REAL_ASCII (ch);
2014         case RECC_WORD:
2015                 return ISWORD(ch);
2016         case RECC_UNIBYTE:
2017                 return ISUNIBYTE((unsigned int)ch);
2018         case RECC_MULTIBYTE:
2019                 return !ISUNIBYTE((unsigned int)ch);
2020         case RECC_ERROR:
2021                 return false;
2022         default:
2023                 abort();
2024         }
2025         return false;
2026 }
2027
2028 \f
2029 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
2030    Returns one of error codes defined in `regex.h', or zero for success.
2031
2032    Assumes the `allocated' (and perhaps `buffer') and `translate'
2033    fields are set in BUFP on entry.
2034
2035    If it succeeds, results are put in BUFP (if it returns an error, the
2036    contents of BUFP are undefined):
2037      `buffer' is the compiled pattern;
2038      `syntax' is set to SYNTAX;
2039      `used' is set to the length of the compiled pattern;
2040      `fastmap_accurate' is zero;
2041      `re_ngroups' is the number of groups/subexpressions (including shy
2042         groups) in PATTERN;
2043      `re_nsub' is the number of non-shy groups in PATTERN;
2044      `not_bol' and `not_eol' are zero;
2045
2046    The `fastmap' and `newline_anchor' fields are neither
2047    examined nor set.  */
2048
2049 static reg_errcode_t
2050 regex_compile(re_char * pattern, int size, reg_syntax_t syntax,
2051               struct re_pattern_buffer *bufp)
2052 {
2053         /* We fetch characters from PATTERN here.  We declare these as int
2054            (or possibly long) so that chars above 127 can be used as
2055            array indices.  The macros that fetch a character from the pattern
2056            make sure to coerce to unsigned char before assigning, so we won't
2057            get bitten by negative numbers here. */
2058         /* XEmacs change: used to be unsigned char. */
2059         REGISTER EMACS_INT c, c1;
2060
2061         /* A random temporary spot in PATTERN.  */
2062         re_char *p1;
2063
2064         /* Points to the end of the buffer, where we should append.  */
2065         REGISTER unsigned char *buf_end;
2066
2067         /* Keeps track of unclosed groups.  */
2068         compile_stack_type compile_stack;
2069
2070         /* Points to the current (ending) position in the pattern.  */
2071         re_char *p = pattern;
2072         re_char *pend = pattern + size;
2073
2074         /* How to translate the characters in the pattern.  */
2075         RE_TRANSLATE_TYPE translate = bufp->translate;
2076
2077         /* Address of the count-byte of the most recently inserted `exactn'
2078            command.  This makes it possible to tell if a new exact-match
2079            character can be added to that command or if the character requires
2080            a new `exactn' command.  */
2081         unsigned char *pending_exact = 0;
2082
2083         /* Address of start of the most recently finished expression.
2084            This tells, e.g., postfix * where to find the start of its
2085            operand.  Reset at the beginning of groups and alternatives.  */
2086         unsigned char *laststart = 0;
2087
2088         /* Address of beginning of regexp, or inside of last group.  */
2089         unsigned char *begalt;
2090
2091         /* Place in the uncompiled pattern (i.e., the {) to
2092            which to go back if the interval is invalid.  */
2093         re_char *beg_interval;
2094
2095         /* Address of the place where a forward jump should go to the end of
2096            the containing expression.  Each alternative of an `or' -- except the
2097            last -- ends with a forward jump of this sort.  */
2098         unsigned char *fixup_alt_jump = 0;
2099
2100         /* Counts open-groups as they are encountered.  Remembered for the
2101            matching close-group on the compile stack, so the same register
2102            number is put in the stop_memory as the start_memory.  */
2103         regnum_t regnum = 0;
2104         auto inline void free_stack(void) __attribute__((always_inline));
2105
2106         /* we need to close over compile_stack */
2107         auto inline void free_stack(void)
2108         {
2109                 xfree(compile_stack.stack);
2110                 return;
2111         }
2112
2113 #ifdef REGEX_DEBUG_FLAG
2114         DEBUG_PRINT1("\nCompiling pattern: ");
2115         if (debug) {
2116                 int debug_count;
2117
2118                 for (debug_count = 0; debug_count < size; debug_count++)
2119                         putchar(pattern[debug_count]);
2120                 putchar('\n');
2121         }
2122 #endif                          /* DEBUG */
2123
2124         /* Initialize the compile stack.  */
2125         compile_stack.stack =
2126                 TALLOC(INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2127         if (compile_stack.stack == NULL) {
2128                 return REG_ESPACE;
2129         }
2130         compile_stack.size = INIT_COMPILE_STACK_SIZE;
2131         compile_stack.avail = 0;
2132
2133         /* Initialize the pattern buffer.  */
2134         bufp->syntax = syntax;
2135         bufp->fastmap_accurate = 0;
2136         bufp->not_bol = bufp->not_eol = 0;
2137
2138         /* Set `used' to zero, so that if we return an error, the pattern
2139            printer (for debugging) will think there's no pattern.  We reset it
2140            at the end.  */
2141         bufp->used = 0;
2142
2143         /* Always count groups, whether or not bufp->no_sub is set.  */
2144         bufp->re_nsub = 0;
2145         bufp->re_ngroups = 0;
2146
2147         /* Allocate index translation array if needed. */
2148         if (bufp->external_to_internal_register == 0) {
2149                 bufp->external_to_internal_register_size =
2150                     INIT_REG_TRANSLATE_SIZE;
2151                 RETALLOC(bufp->external_to_internal_register,
2152                          bufp->external_to_internal_register_size, int);
2153         }
2154         /* Initialize translations to impossible value to aid debugging. */
2155         {
2156                 int i;
2157
2158                 bufp->external_to_internal_register[0] = 0;
2159                 for (i = 1; i < bufp->external_to_internal_register_size; i++)
2160                         bufp->external_to_internal_register[i] =
2161                             (int)0xDEADBEEF;
2162         }
2163
2164 #if !defined (emacs) && !defined (SYNTAX_TABLE)
2165         /* Initialize the syntax table.  */
2166         init_syntax_once();
2167 #endif
2168
2169         if (bufp->allocated == 0) {
2170                 if (bufp->buffer) {
2171                         /* If zero allocated, but buffer is non-null, try to
2172                            realloc enough space.  This loses if buffer's address
2173                            is bogus, but that is the user's responsibility.  */
2174                         RETALLOC(bufp->buffer, INIT_BUF_SIZE, unsigned char);
2175                 } else {
2176                         /* Caller did not allocate a buffer.  Do it for them. */
2177                         bufp->buffer = TALLOC(INIT_BUF_SIZE, unsigned char);
2178                 }
2179                 if (!bufp->buffer) {
2180                         free_stack();
2181                         return REG_ESPACE;
2182                 }
2183
2184                 bufp->allocated = INIT_BUF_SIZE;
2185         }
2186
2187         begalt = buf_end = bufp->buffer;
2188
2189         /* Loop through the uncompiled pattern until we're at the end.  */
2190         while (p != pend) {
2191                 PATFETCH(c);
2192
2193                 switch (c) {
2194                 case '^': {
2195                         if (    /* If at start of pattern, it's an operator.  */
2196                                 p == pattern + 1
2197                                 /* If context independent, it's an operator.  */
2198                                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2199                                 /* Otherwise, depends on what's come before.  */
2200                                 || at_begline_loc_p(pattern, p, syntax))
2201                                 BUF_PUSH(begline);
2202                         else
2203                                 goto normal_char;
2204                 }
2205                         break;
2206
2207                 case '$': {
2208                         if (    /* If at end of pattern, it's an
2209                                    operator.  */
2210                                 p == pend
2211                                 /* If context independent, it's an
2212                                    operator.  */
2213                                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2214                                 /* Otherwise, depends on what's
2215                                    next.  */
2216                                 || at_endline_loc_p(p, pend, syntax))
2217                                 BUF_PUSH(endline);
2218                         else
2219                                 goto normal_char;
2220                 }
2221                         break;
2222
2223                 case '+':
2224                 case '?':
2225                         if ((syntax & RE_BK_PLUS_QM) ||
2226                             (syntax & RE_LIMITED_OPS)) {
2227                                 goto normal_char;
2228                         }
2229                         /* pardon? */
2230                 handle_plus:
2231                 case '*': {
2232                         re_bool zero_times_ok;
2233                         re_bool many_times_ok;
2234                         re_bool minimal;
2235
2236                         /* If there is no previous pattern... */
2237                         if (!laststart) {
2238                                 if (syntax & RE_CONTEXT_INVALID_OPS) {
2239                                         free_stack();
2240                                         return REG_BADRPT;
2241                                 } else if (!(syntax & RE_CONTEXT_INDEP_OPS)) {
2242                                         goto normal_char;
2243                                 }
2244                         }
2245
2246                         /* true means zero/many matches are allowed. */
2247                         zero_times_ok = c != '+';
2248                         many_times_ok = c != '?';
2249
2250                         /* true means match shortest string possible. */
2251                         minimal = false;
2252
2253                         /* If there is a sequence of repetition chars, collapse it
2254                            down to just one (the right one).  We can't combine
2255                            interval operators with these because of, e.g., `a{2}*',
2256                            which should only match an even number of `a's.  */
2257                         while (p != pend) {
2258                                 PATFETCH(c);
2259
2260                                 if (c == '*'
2261                                     || (!(syntax & RE_BK_PLUS_QM)
2262                                         && (c == '+' || c == '?'))) ;
2263
2264                                 else if (syntax & RE_BK_PLUS_QM
2265                                          && c == '\\') {
2266                                         if (p == pend) {
2267                                                 free_stack();
2268                                                 return REG_EESCAPE;
2269                                         }
2270
2271                                         PATFETCH(c1);
2272                                         if (!(c1 == '+' || c1 == '?')) {
2273                                                 PATUNFETCH;
2274                                                 PATUNFETCH;
2275                                                 break;
2276                                         }
2277
2278                                         c = c1;
2279                                 } else {
2280                                         PATUNFETCH;
2281                                         break;
2282                                 }
2283
2284                                 /* If we get here, we found another repeat
2285                                    character.  */
2286                                 if (!(syntax & RE_NO_MINIMAL_MATCHING)) {
2287                                         /* "*?" and "+?" and "??" are okay (and
2288                                            mean match minimally), but other
2289                                            sequences (such as "*??" and "+++")
2290                                            are rejected (reserved for future
2291                                            use). */
2292                                         if (minimal || c != '?') {
2293                                                 free_stack();
2294                                                 return REG_BADRPT;
2295                                         }
2296                                         minimal = true;
2297                                 } else {
2298                                         zero_times_ok |= c != '+';
2299                                         many_times_ok |= c != '?';
2300                                 }
2301                         }
2302
2303                         /* Star, etc. applied to an empty pattern is equivalent
2304                            to an empty pattern.  */
2305                         if (!laststart) {
2306                                 break;
2307                         }
2308
2309                         /* Now we know whether zero matches is allowed
2310                            and whether two or more matches is allowed
2311                            and whether we want minimal or maximal matching. */
2312                         if (minimal) {
2313                                 if (!many_times_ok) {
2314                                         /* "a??" becomes:
2315                                            0: /on_failure_jump to 6
2316                                            3: /jump to 9
2317                                            6: /exactn/1/A
2318                                            9: end of pattern.
2319                                         */
2320                                         GET_BUFFER_SPACE(6);
2321                                         INSERT_JUMP(jump, laststart,
2322                                                     buf_end + 3);
2323                                         buf_end += 3;
2324                                         INSERT_JUMP(on_failure_jump,
2325                                                     laststart,
2326                                                     laststart + 6);
2327                                         buf_end += 3;
2328                                 } else if (zero_times_ok) {
2329                                         /* "a*?" becomes:
2330                                            0: /jump to 6
2331                                            3: /exactn/1/A
2332                                            6: /on_failure_jump to 3
2333                                            9: end of pattern.
2334                                         */
2335                                         GET_BUFFER_SPACE(6);
2336                                         INSERT_JUMP(jump, laststart,
2337                                                     buf_end + 3);
2338                                         buf_end += 3;
2339                                         STORE_JUMP(on_failure_jump,
2340                                                    buf_end,
2341                                                    laststart + 3);
2342                                         buf_end += 3;
2343                                 } else {
2344                                         /* "a+?" becomes:
2345                                            0: /exactn/1/A
2346                                            3: /on_failure_jump to 0
2347                                            6: end of pattern.
2348                                         */
2349                                         GET_BUFFER_SPACE(3);
2350                                         STORE_JUMP(on_failure_jump,
2351                                                    buf_end, laststart);
2352                                         buf_end += 3;
2353                                 }
2354                         } else {
2355                                 /* Are we optimizing this jump?  */
2356                                 re_bool keep_string_p = false;
2357
2358                                 if (many_times_ok) {
2359                                         /* More than one repetition is allowed,
2360                                            so put in at the end a backward
2361                                            relative jump from `buf_end' to
2362                                            before the next jump we're going to
2363                                            put in below (which jumps from
2364                                            laststart to after this jump).
2365
2366                                            But if we are at the `*' in the exact
2367                                            sequence `.*\n', insert an
2368                                            unconditional jump backwards to the
2369                                            ., instead of the beginning of the
2370                                            loop.  This way we only push a
2371                                            failure point once, instead of every
2372                                            time through the loop.  */
2373                                         assert(p - 1 > pattern);
2374
2375                                         /* Allocate the space for the jump.  */
2376                                         GET_BUFFER_SPACE(3);
2377
2378                                         /* We know we are not at the first
2379                                            character of the pattern, because
2380                                            laststart was nonzero.  And we've
2381                                            already incremented `p', by the way,
2382                                            to be the character after the `*'.
2383                                            Do we have to do something analogous
2384                                            here for null bytes, because of
2385                                            RE_DOT_NOT_NULL? */
2386                                         if (*(p - 2) == '.' &&
2387                                             zero_times_ok &&
2388                                             p < pend &&
2389                                             *p == '\n' &&
2390                                             !(syntax & RE_DOT_NEWLINE)) {
2391                                                 /* We have .*\n.  */
2392                                                 STORE_JUMP(jump,
2393                                                            buf_end,
2394                                                            laststart);
2395                                                 keep_string_p = true;
2396                                         } else {
2397                                                 /* Anything else.  */
2398                                                 STORE_JUMP
2399                                                         (maybe_pop_jump,
2400                                                          buf_end,
2401                                                          laststart - 3);
2402                                         }
2403                                         /* We've added more stuff to the
2404                                            buffer.  */
2405                                         buf_end += 3;
2406                                 }
2407
2408                                 /* On failure, jump from laststart to buf_end +
2409                                    3, which will be the end of the buffer after
2410                                    this jump is inserted.  */
2411                                 GET_BUFFER_SPACE(3);
2412                                 INSERT_JUMP(keep_string_p ?
2413                                             on_failure_keep_string_jump
2414                                             : on_failure_jump,
2415                                             laststart, buf_end + 3);
2416                                 buf_end += 3;
2417
2418                                 if (!zero_times_ok) {
2419                                         /* At least one repetition is required,
2420                                            so insert a `dummy_failure_jump'
2421                                            before the initial `on_failure_jump'
2422                                            instruction of the loop. This effects
2423                                            a skip over that instruction the
2424                                            first time we hit that loop.  */
2425                                         GET_BUFFER_SPACE(3);
2426                                         INSERT_JUMP(dummy_failure_jump,
2427                                                     laststart,
2428                                                     laststart + 6);
2429                                         buf_end += 3;
2430                                 }
2431                         }
2432                         pending_exact = 0;
2433                 }
2434                         break;
2435
2436                 case '.':
2437                         laststart = buf_end;
2438                         BUF_PUSH(anychar);
2439                         break;
2440
2441                 case '[': {
2442                         /* XEmacs change: this whole section */
2443                         re_bool had_char_class = false;
2444 #ifdef MULE
2445                         re_bool has_extended_chars = false;
2446                         REGISTER Lisp_Object rtab = Qnil;
2447 #endif
2448
2449                         if (p == pend) {
2450                                 free_stack();
2451                                 return REG_EBRACK;
2452                         }
2453
2454                         /* Ensure that we have enough space to push a charset:
2455                            the opcode, the length count, and the bitset; 34
2456                            bytes in all.  */
2457                         GET_BUFFER_SPACE(34);
2458
2459                         laststart = buf_end;
2460
2461                         /* We test `*p == '^' twice, instead of using an if
2462                            statement, so we only need one BUF_PUSH.  */
2463                         BUF_PUSH(*p == '^' ? charset_not : charset);
2464                         if (*p == '^')
2465                                 p++;
2466
2467                         /* Remember the first position in the bracket
2468                            expression.  */
2469                         p1 = p;
2470
2471                         /* Push the number of bytes in the bitmap.  */
2472                         BUF_PUSH((1 << BYTEWIDTH) / BYTEWIDTH);
2473
2474                         /* Clear the whole map.  */
2475                         memset(buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
2476
2477                         /* charset_not matches newline according to a syntax
2478                            bit.  */
2479                         if ((re_opcode_t) buf_end[-2] == charset_not
2480                             && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2481                                 SET_LIST_BIT('\n');
2482
2483 #ifdef MULE
2484                         start_over_with_extended:
2485                         if (has_extended_chars) {
2486                                 /* There are extended chars here, which means we
2487                                    need to start over and shift to unified
2488                                    range-table format. */
2489                                 if (buf_end[-2] == charset)
2490                                         buf_end[-2] = charset_mule;
2491                                 else
2492                                         buf_end[-2] = charset_mule_not;
2493                                 buf_end--;
2494                                 p = p1;
2495                                 /* go back to the beginning of the charset,
2496                                    after a possible ^. */
2497                                 rtab = Vthe_lisp_rangetab;
2498                                 Fclear_range_table(rtab);
2499
2500                                 /* charset_not matches newline according to a
2501                                    syntax bit.  */
2502                                 if ((re_opcode_t) buf_end[-1] ==
2503                                     charset_mule_not
2504                                     && (syntax &
2505                                         RE_HAT_LISTS_NOT_NEWLINE))
2506                                         SET_EITHER_BIT('\n');
2507                         }
2508 #endif                          /* MULE */
2509
2510                                 /* Read in characters and ranges, setting map bits.  */
2511                         for (;;) {
2512                                 if (p == pend) {
2513                                         free_stack();
2514                                         return REG_EBRACK;
2515                                 }
2516                                 PATFETCH(c);
2517
2518 #ifdef MULE
2519                                 if (c >= 0x80 && !has_extended_chars) {
2520                                         has_extended_chars = 1;
2521                                         /* Frumble-bumble, we've found some
2522                                            extended chars.  Need to start over,
2523                                            process everything using the general
2524                                            extended-char mechanism, and need to
2525                                            use charset_mule and charset_mule_not
2526                                            instead of charset and
2527                                            charset_not. */
2528                                         goto start_over_with_extended;
2529                                 }
2530 #endif                          /* MULE */
2531                                 /* \ might escape characters inside [...] and
2532                                    [^...].  */
2533                                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) &&
2534                                     c == '\\') {
2535                                         if (p == pend) {
2536                                                 free_stack();
2537                                                 return REG_EESCAPE;
2538                                         }
2539
2540                                         PATFETCH(c1);
2541 #ifdef MULE
2542                                         if (c1 >= 0x80
2543                                             && !has_extended_chars) {
2544                                                 has_extended_chars = 1;
2545                                                 goto start_over_with_extended;
2546                                         }
2547 #endif                          /* MULE */
2548                                         SET_EITHER_BIT(c1);
2549                                         continue;
2550                                 }
2551
2552                                 /* Could be the end of the bracket
2553                                    expression.  If it's not (i.e., when
2554                                    the bracket expression is `[]' so
2555                                    far), the ']' character bit gets set
2556                                    way below.  */
2557                                 if (c == ']' && p != p1 + 1)
2558                                         break;
2559
2560                                 /* Look ahead to see if it's a range
2561                                    when the last thing was a character
2562                                    class.  */
2563                                 if (had_char_class && c == '-'
2564                                     && *p != ']') {
2565                                         free_stack();
2566                                         return REG_ERANGE;
2567                                 }
2568
2569                                 /* Look ahead to see if it's a range
2570                                    when the last thing was a character:
2571                                    if this is a hyphen not at the
2572                                    beginning or the end of a list, then
2573                                    it's the range operator.  */
2574                                 if (c == '-'
2575                                     && !(p - 2 >= pattern
2576                                          && p[-2] == '[')
2577                                     && !(p - 3 >= pattern
2578                                          && p[-3] == '['
2579                                          && p[-2] == '^')
2580                                     && *p != ']') {
2581                                         reg_errcode_t ret;
2582
2583 #ifdef MULE
2584                                         if (*(const unsigned char *)p >= 0x80
2585                                             && !has_extended_chars) {
2586                                                 has_extended_chars = 1;
2587                                                 goto start_over_with_extended;
2588                                         }
2589                                         if (has_extended_chars)
2590                                                 ret = compile_extended_range
2591                                                         (&p, pend, translate,
2592                                                          syntax, rtab);
2593                                         else
2594 #endif                          /* MULE */
2595                                                 ret = compile_range
2596                                                         (&p, pend, translate,
2597                                                          syntax, buf_end);
2598                                         if (ret != REG_NOERROR) {
2599                                                 free_stack();
2600                                                 return ret;
2601                                         }
2602                                 }
2603
2604                                 else if (p[0] == '-' && p[1] != ']') {
2605                                         /* This handles ranges made up of
2606                                            characters only.  */
2607                                         reg_errcode_t ret;
2608
2609                                         /* Move past the `-'.  */
2610                                         PATFETCH(c1);
2611
2612 #ifdef MULE
2613                                         if (*(const unsigned char*)p >= 0x80
2614                                             && !has_extended_chars) {
2615                                                 has_extended_chars = 1;
2616                                                 goto start_over_with_extended;
2617                                         }
2618                                         if (has_extended_chars)
2619                                                 ret = compile_extended_range(
2620                                                         &p, pend, translate,
2621                                                         syntax, rtab);
2622                                         else
2623 #endif                          /* MULE */
2624                                                 ret = compile_range(
2625                                                         &p, pend, translate,
2626                                                         syntax, buf_end);
2627                                         if (ret != REG_NOERROR) {
2628                                                 free_stack();
2629                                                 return ret;
2630                                         }
2631                                 }
2632
2633                                 /* See if we're at the beginning of a possible
2634                                    character class.  */
2635
2636                                 else if (syntax & RE_CHAR_CLASSES &&
2637                                          c == '[' && *p == ':') {       
2638                                         /* Leave room for the null.  */
2639                                         char str[CHAR_CLASS_MAX_LENGTH + 1];
2640
2641                                         PATFETCH(c);
2642                                         c1 = 0;
2643
2644                                         /* If pattern is `[[:'.  */
2645                                         if (p == pend) {
2646                                                 free_stack();
2647                                                 return REG_EBRACK;
2648                                         }
2649                                         for (;;) {
2650                                                 /* #### This code is unused.
2651                                                    Correctness is not checked
2652                                                    after TRT table change.  */
2653                                                 PATFETCH(c);
2654                                                 if (c == ':' || c == ']'
2655                                                     || p == pend
2656                                                     || c1 ==
2657                                                     CHAR_CLASS_MAX_LENGTH)
2658                                                         break;
2659                                                 str[c1++] = (char)c;
2660                                         }
2661                                         str[c1] = '\0';
2662
2663                                         /* If isn't a word bracketed by
2664                                            `[:' and `:]': undo the
2665                                            ending character, the
2666                                            letters, and leave the
2667                                            leading `:' and `[' (but set
2668                                            bits for them).  */
2669                                         if (c == ':' && *p == ']') {
2670                                                 re_wctype_t wct =
2671                                                         re_wctype(
2672                                                                 (unsigned char*)
2673                                                                 str);
2674
2675                                                 if (wct == RECC_ERROR) {
2676                                                         free_stack();
2677                                                         return REG_ECTYPE;
2678                                                 }
2679
2680                                                 /* Throw away the ] at
2681                                                    the end of the
2682                                                    character class.  */
2683                                                 PATFETCH(c);
2684
2685                                                 if (p == pend) {
2686                                                         free_stack();
2687                                                         return REG_EBRACK;
2688                                                 }
2689
2690                                                 for (int ch = 0;
2691                                                      ch < 1<<BYTEWIDTH;
2692                                                      ch++) {
2693                                                         /* rmsmacs has
2694                                                            this */
2695 #if 0
2696                                                         int translated =
2697                                                                 TRANSLATE(ch);
2698                                                         if (translated <
2699                                                             (1 << BYTEWIDTH) &&
2700                                                             re_iswctype(ch, wct)) {
2701                                                                 SET_EITHER_BIT(ch);
2702                                                         }
2703 #else
2704                                                         if (re_iswctype(ch, wct)) {
2705                                                                 SET_EITHER_BIT(ch);
2706                                                         }
2707 #endif
2708                                                 }
2709                                                 had_char_class = true;
2710                                         } else {
2711                                                 c1++;
2712                                                 while (c1--) {
2713                                                         PATUNFETCH;
2714                                                 }
2715                                                 SET_EITHER_BIT('[');
2716                                                 SET_EITHER_BIT(':');
2717                                                 had_char_class = false;
2718                                         }
2719                                 } else {
2720                                         had_char_class = false;
2721                                         SET_EITHER_BIT(c);
2722                                 }
2723                         }
2724
2725 #ifdef MULE
2726                         if (has_extended_chars) {
2727                                 /* We have a range table, not a bit vector. */
2728                                 int bytes_needed =
2729                                         unified_range_table_bytes_needed
2730                                         (rtab);
2731                                 GET_BUFFER_SPACE(bytes_needed);
2732                                 unified_range_table_copy_data(rtab,
2733                                                               buf_end);
2734                                 buf_end +=
2735                                         unified_range_table_bytes_used
2736                                         (buf_end);
2737                                 break;
2738                         }
2739 #endif                          /* MULE */
2740                                 /* Discard any (non)matching list bytes that are
2741                                    all 0 at the end of the map.  Decrease the
2742                                    map-length byte too.  */
2743                         while ((int)buf_end[-1] > 0
2744                                && buf_end[buf_end[-1] - 1] == 0)
2745                                 buf_end[-1]--;
2746                         buf_end += buf_end[-1];
2747                 }
2748                         break;
2749
2750                 case '(':
2751                         if (syntax & RE_NO_BK_PARENS)
2752                                 goto handle_open;
2753                         else
2754                                 goto normal_char;
2755
2756                 case ')':
2757                         if (syntax & RE_NO_BK_PARENS)
2758                                 goto handle_close;
2759                         else
2760                                 goto normal_char;
2761
2762                 case '\n':
2763                         if (syntax & RE_NEWLINE_ALT)
2764                                 goto handle_alt;
2765                         else
2766                                 goto normal_char;
2767
2768                 case '|':
2769                         if (syntax & RE_NO_BK_VBAR)
2770                                 goto handle_alt;
2771                         else
2772                                 goto normal_char;
2773
2774                 case '{':
2775                         if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2776                                 goto handle_interval;
2777                         else
2778                                 goto normal_char;
2779
2780                 case '\\':
2781                         if (p == pend) {
2782                                 free_stack();
2783                                 return REG_EESCAPE;
2784                         }
2785
2786                         /* Do not translate the character after the \, so that we can
2787                            distinguish, e.g., \B from \b, even if we normally would
2788                            translate, e.g., B to b.  */
2789                         PATFETCH_RAW(c);
2790
2791                         switch (c) {
2792                         case '(':
2793                                 if (syntax & RE_NO_BK_PARENS)
2794                                         goto normal_backslash;
2795
2796                               handle_open:
2797                                 {
2798                                         regnum_t r;
2799                                         int shy = 0;
2800
2801                                         if (!(syntax & RE_NO_SHY_GROUPS)
2802                                             && p != pend && *p == '?') {
2803                                                 p++;
2804                                                 PATFETCH(c);
2805                                                 switch (c) {
2806                                                 case ':':
2807                                                         /* shy groups */
2808                                                         shy = 1;
2809                                                         break;
2810
2811                                                         /* All others are
2812                                                            reserved for future
2813                                                            constructs. */
2814                                                 default:
2815                                                         free_stack();
2816                                                         return REG_BADPAT;
2817                                                 }
2818                                         }
2819
2820                                         r = ++regnum;
2821                                         bufp->re_ngroups++;
2822                                         if (!shy) {
2823                                                 /* Record the translation from capturing group index to
2824                                                    register number, reallocating table as needed. */
2825                                                 bufp->re_nsub++;
2826                                                 while (bufp->
2827                                                        external_to_internal_register_size
2828                                                        <= bufp->re_nsub) {
2829                                                         int i;
2830                                                         int old_size =
2831                                                             bufp->
2832                                                             external_to_internal_register_size;
2833                                                         bufp->
2834                                                             external_to_internal_register_size
2835                                                             += 5;
2836                                                         RETALLOC(bufp->
2837                                                                  external_to_internal_register,
2838                                                                  bufp->
2839                                                                  external_to_internal_register_size,
2840                                                                  int);
2841                                                         /* debugging */
2842                                                         for (i = old_size;
2843                                                              i <
2844                                                              bufp->
2845                                                              external_to_internal_register_size;
2846                                                              i++)
2847                                                                 bufp->
2848                                                                     external_to_internal_register
2849                                                                     [i] =
2850                                                                     (int)
2851                                                                     0xDEADBEEF;
2852                                                 }
2853
2854                                                 bufp->
2855                                                     external_to_internal_register
2856                                                     [bufp->re_nsub] =
2857                                                     bufp->re_ngroups;
2858                                         }
2859
2860                                         if (COMPILE_STACK_FULL) {
2861                                                 RETALLOC(compile_stack.stack,
2862                                                          compile_stack.
2863                                                          size << 1,
2864                                                          compile_stack_elt_t);
2865                                                 if (compile_stack.stack == NULL)
2866                                                         return REG_ESPACE;
2867
2868                                                 compile_stack.size <<= 1;
2869                                         }
2870
2871                                         /* These are the values to restore when
2872                                            we hit end of this group.  They are
2873                                            all relative offsets, so that if the
2874                                            whole pattern moves because of
2875                                            realloc, they will still be
2876                                            valid.  */
2877                                         COMPILE_STACK_TOP.begalt_offset =
2878                                                 begalt - bufp->buffer;
2879                                         COMPILE_STACK_TOP.fixup_alt_jump =
2880                                                 fixup_alt_jump
2881                                                 ? fixup_alt_jump -
2882                                                 bufp->buffer + 1
2883                                                 : 0;
2884                                         COMPILE_STACK_TOP.laststart_offset =
2885                                                 buf_end - bufp->buffer;
2886                                         COMPILE_STACK_TOP.regnum = r;
2887
2888                                         /* We will eventually replace the 0 with
2889                                            the number of groups inner to this
2890                                            one.  But do not push a start_memory
2891                                            for groups beyond the last one we can
2892                                            represent in the compiled pattern.
2893                                            #### bad bad bad.  this will fail in
2894                                            lots of ways, if we ever have to
2895                                            backtrack for these groups.
2896                                          */
2897                                         if (r <= MAX_REGNUM) {
2898                                                 COMPILE_STACK_TOP.
2899                                                         inner_group_offset =
2900                                                         buf_end - bufp->buffer +
2901                                                         2;
2902                                                 BUF_PUSH_3(start_memory, r, 0);
2903                                         }
2904
2905                                         compile_stack.avail++;
2906
2907                                         fixup_alt_jump = 0;
2908                                         laststart = 0;
2909                                         begalt = buf_end;
2910                                         /* If we've reached MAX_REGNUM groups,
2911                                            then this open won't actually
2912                                            generate any code, so we'll have to
2913                                            clear pending_exact explicitly.  */
2914                                         pending_exact = 0;
2915                                 }
2916                                 break;
2917
2918                         case ')':
2919                                 if (syntax & RE_NO_BK_PARENS)
2920                                         goto normal_backslash;
2921
2922                                 if (COMPILE_STACK_EMPTY) {
2923                                         if (syntax &
2924                                             RE_UNMATCHED_RIGHT_PAREN_ORD) {
2925                                                 goto normal_backslash;
2926                                         } else {
2927                                                 free_stack();
2928                                                 return REG_ERPAREN;
2929                                         }
2930                                 }
2931
2932                               handle_close:
2933                                 if (fixup_alt_jump) {
2934                                         /* Push a dummy failure point at the end
2935                                            of the alternative for a possible
2936                                            future `pop_failure_jump' to pop.
2937                                            See comments at `push_dummy_failure'
2938                                            in `re_match_2'. */
2939                                         BUF_PUSH(push_dummy_failure);
2940
2941                                         /* We allocated space for this jump when
2942                                            we assigned to `fixup_alt_jump', in
2943                                            the `handle_alt' case below.  */
2944                                         STORE_JUMP(jump_past_alt,
2945                                                    fixup_alt_jump, buf_end - 1);
2946                                 }
2947
2948                                 /* See similar code for backslashed left paren
2949                                    above.  */
2950                                 if (COMPILE_STACK_EMPTY) {
2951                                         if (syntax &
2952                                             RE_UNMATCHED_RIGHT_PAREN_ORD) {
2953                                                 goto normal_char;
2954                                         } else {
2955                                                 free_stack();
2956                                                 return REG_EPAREN;
2957                                         }
2958                                 }
2959
2960                                 /* Since we just checked for an empty stack
2961                                    above, this ``can't happen''.  */
2962                                 assert(compile_stack.avail != 0);
2963                                 {
2964                                         /* We don't just want to restore into
2965                                            `regnum', because later groups should
2966                                            continue to be numbered higher, as in
2967                                            `(ab)c(de)' -- the second group is
2968                                            #2.  */
2969                                         regnum_t this_group_regnum;
2970
2971                                         compile_stack.avail--;
2972                                         begalt =
2973                                                 bufp->buffer +
2974                                                 COMPILE_STACK_TOP.begalt_offset;
2975                                         fixup_alt_jump =
2976                                                 COMPILE_STACK_TOP.
2977                                                 fixup_alt_jump
2978                                                 ? bufp->buffer +
2979                                                 COMPILE_STACK_TOP.
2980                                                 fixup_alt_jump - 1
2981                                                 : 0;
2982                                         laststart =
2983                                                 bufp->buffer +
2984                                                 COMPILE_STACK_TOP.
2985                                                 laststart_offset;
2986                                         this_group_regnum =
2987                                                 COMPILE_STACK_TOP.regnum;
2988                                         /* If we've reached MAX_REGNUM groups,
2989                                            then this open won't actually
2990                                            generate any code, so we'll have to
2991                                            clear pending_exact explicitly.  */
2992                                         pending_exact = 0;
2993
2994                                         /* We're at the end of the group, so now
2995                                            we know how many groups were inside
2996                                            this one.  */
2997                                         if (this_group_regnum <= MAX_REGNUM) {
2998                                                 unsigned char *inner_group_loc
2999                                                         =
3000                                                         bufp->buffer +
3001                                                         COMPILE_STACK_TOP.
3002                                                         inner_group_offset;
3003
3004                                                 *inner_group_loc =
3005                                                         regnum -
3006                                                         this_group_regnum;
3007                                                 BUF_PUSH_3(stop_memory,
3008                                                            this_group_regnum,
3009                                                            regnum -
3010                                                            this_group_regnum);
3011                                         }
3012                                 }
3013                                 break;
3014
3015                         case '|':       /* `\|'.  */
3016                                 if (syntax & RE_LIMITED_OPS
3017                                     || syntax & RE_NO_BK_VBAR) {
3018                                         goto normal_backslash;
3019                                 }
3020                         handle_alt:
3021                                 if (syntax & RE_LIMITED_OPS) {
3022                                         goto normal_char;
3023                                 }
3024
3025                                 /* Insert before the previous alternative a jump
3026                                    which jumps to this alternative if the former
3027                                    fails.  */
3028                                 GET_BUFFER_SPACE(3);
3029                                 INSERT_JUMP(on_failure_jump, begalt,
3030                                             buf_end + 6);
3031                                 pending_exact = 0;
3032                                 buf_end += 3;
3033
3034                                 /* The alternative before this one has a jump after it
3035                                    which gets executed if it gets matched.  Adjust that
3036                                    jump so it will jump to this alternative's analogous
3037                                    jump (put in below, which in turn will jump to the next
3038                                    (if any) alternative's such jump, etc.).  The last such
3039                                    jump jumps to the correct final destination.  A picture:
3040                                    _____ _____
3041                                    |   | |   |
3042                                    |   v |   v
3043                                    a | b   | c
3044
3045                                    If we are at `b', then fixup_alt_jump right now points to a
3046                                    three-byte space after `a'.  We'll put in the jump, set
3047                                    fixup_alt_jump to right after `b', and leave behind three
3048                                    bytes which we'll fill in when we get to after `c'.  */
3049
3050                                 if (fixup_alt_jump)
3051                                         STORE_JUMP(jump_past_alt,
3052                                                    fixup_alt_jump, buf_end);
3053
3054                                 /* Mark and leave space for a jump after this alternative,
3055                                    to be filled in later either by next alternative or
3056                                    when know we're at the end of a series of alternatives.  */
3057                                 fixup_alt_jump = buf_end;
3058                                 GET_BUFFER_SPACE(3);
3059                                 buf_end += 3;
3060
3061                                 laststart = 0;
3062                                 begalt = buf_end;
3063                                 break;
3064
3065                         case '{':
3066                                 /* If \{ is a literal.  */
3067                                 if (!(syntax & RE_INTERVALS)
3068                                     /* If we're at `\{' and it's not the open-interval
3069                                        operator.  */
3070                                     || ((syntax & RE_INTERVALS)
3071                                         && (syntax & RE_NO_BK_BRACES))
3072                                     || (p - 2 == pattern && p == pend))
3073                                         goto normal_backslash;
3074
3075                               handle_interval:
3076                                 {
3077                                         /* If got here, then the syntax allows
3078                                            intervals.  */
3079
3080                                         /* At least (most) this many matches
3081                                            must be made.  */
3082                                         int lower_bound = -1, upper_bound = -1;
3083
3084                                         beg_interval = p - 1;
3085
3086                                         if (p == pend) {
3087                                                 if (syntax & RE_NO_BK_BRACES) {
3088                                                         goto unfetch_interval;
3089                                                 } else {
3090                                                         free_stack();
3091                                                         return REG_EBRACE;
3092                                                 }
3093                                         }
3094
3095                                         GET_UNSIGNED_NUMBER(lower_bound);
3096
3097                                         if (c == ',') {
3098                                                 GET_UNSIGNED_NUMBER(
3099                                                         upper_bound);
3100                                                 if (upper_bound < 0) {
3101                                                         upper_bound =
3102                                                                 RE_DUP_MAX;
3103                                                 }
3104                                         } else {
3105                                                 /* Interval such as `{1}' =>
3106                                                    match exactly once. */
3107                                                 upper_bound = lower_bound;
3108                                         }
3109
3110                                         if (lower_bound < 0
3111                                             || upper_bound > RE_DUP_MAX
3112                                             || lower_bound > upper_bound) {
3113                                                 if (syntax & RE_NO_BK_BRACES) {
3114                                                         goto unfetch_interval;
3115                                                 } else {
3116                                                         free_stack();
3117                                                         return REG_BADBR;
3118                                                 }
3119                                         }
3120
3121                                         if (!(syntax & RE_NO_BK_BRACES)) {
3122                                                 if (c != '\\') {
3123                                                         free_stack();
3124                                                         return REG_EBRACE;
3125                                                 }
3126                                                 PATFETCH(c);
3127                                         }
3128
3129                                         if (c != '}') {
3130                                                 if (syntax & RE_NO_BK_BRACES) {
3131                                                         goto unfetch_interval;
3132                                                 } else {
3133                                                         free_stack();
3134                                                         return REG_BADBR;
3135                                                 }
3136                                         }
3137
3138                                         /* We just parsed a valid interval.  */
3139
3140                                         /* If it's invalid to have no preceding
3141                                            re.  */
3142                                         if (!laststart) {
3143                                                 if (syntax &
3144                                                     RE_CONTEXT_INVALID_OPS) {
3145                                                         free_stack();
3146                                                         return REG_BADRPT;
3147                                                 } else if (
3148                                                         syntax &
3149                                                         RE_CONTEXT_INDEP_OPS) {
3150                                                         laststart = buf_end;
3151                                                 } else {
3152                                                         goto unfetch_interval;
3153                                                 }
3154                                         }
3155
3156                                         /* If the upper bound is zero, don't
3157                                            want to succeed at all; jump from
3158                                            `laststart' to `b + 3', which will be
3159                                            the end of the buffer after we insert
3160                                            the jump.  */
3161                                         if (upper_bound == 0) {
3162                                                 GET_BUFFER_SPACE(3);
3163                                                 INSERT_JUMP(jump, laststart,
3164                                                             buf_end + 3);
3165                                                 buf_end += 3;
3166                                         }
3167
3168                                         /* Otherwise, we have a nontrivial interval.  When
3169                                            we're all done, the pattern will look like:
3170                                            set_number_at <jump count> <upper bound>
3171                                            set_number_at <succeed_n count> <lower bound>
3172                                            succeed_n <after jump addr> <succeed_n count>
3173                                            <body of loop>
3174                                            jump_n <succeed_n addr> <jump count>
3175                                            (The upper bound and `jump_n' are omitted if
3176                                            `upper_bound' is 1, though.)  */
3177                                         else {  /* If the upper bound is > 1, we need to insert
3178                                                    more at the end of the loop.  */
3179                                                 Memory_count nbytes =
3180                                                     10 + (upper_bound > 1) * 10;
3181
3182                                                 GET_BUFFER_SPACE(nbytes);
3183
3184                                                 /* Initialize lower bound of the `succeed_n', even
3185                                                    though it will be set during matching by its
3186                                                    attendant `set_number_at' (inserted next),
3187                                                    because `re_compile_fastmap' needs to know.
3188                                                    Jump to the `jump_n' we might insert below.  */
3189                                                 INSERT_JUMP2(succeed_n,
3190                                                              laststart,
3191                                                              buf_end + 5 +
3192                                                              (upper_bound >
3193                                                               1) * 5,
3194                                                              lower_bound);
3195                                                 buf_end += 5;
3196
3197                                                 /* Code to initialize the lower bound.  Insert
3198                                                    before the `succeed_n'.  The `5' is the last two
3199                                                    bytes of this `set_number_at', plus 3 bytes of
3200                                                    the following `succeed_n'.  */
3201                                                 insert_op2(set_number_at,
3202                                                            laststart, 5,
3203                                                            lower_bound,
3204                                                            buf_end);
3205                                                 buf_end += 5;
3206
3207                                                 if (upper_bound > 1) {  /* More than one repetition is allowed, so
3208                                                                            append a backward jump to the `succeed_n'
3209                                                                            that starts this interval.
3210
3211                                                                            When we've reached this during matching,
3212                                                                            we'll have matched the interval once, so
3213                                                                            jump back only `upper_bound - 1' times.  */
3214                                                         STORE_JUMP2(jump_n,
3215                                                                     buf_end,
3216                                                                     laststart +
3217                                                                     5,
3218                                                                     upper_bound
3219                                                                     - 1);
3220                                                         buf_end += 5;
3221
3222                                                         /* The location we want to set is the second
3223                                                            parameter of the `jump_n'; that is `b-2' as
3224                                                            an absolute address.  `laststart' will be
3225                                                            the `set_number_at' we're about to insert;
3226                                                            `laststart+3' the number to set, the source
3227                                                            for the relative address.  But we are
3228                                                            inserting into the middle of the pattern --
3229                                                            so everything is getting moved up by 5.
3230                                                            Conclusion: (b - 2) - (laststart + 3) + 5,
3231                                                            i.e., b - laststart.
3232
3233                                                            We insert this at the beginning of the loop
3234                                                            so that if we fail during matching, we'll
3235                                                            reinitialize the bounds.  */
3236                                                         insert_op2
3237                                                             (set_number_at,
3238                                                              laststart,
3239                                                              buf_end -
3240                                                              laststart,
3241                                                              upper_bound - 1,
3242                                                              buf_end);
3243                                                         buf_end += 5;
3244                                                 }
3245                                         }
3246                                         pending_exact = 0;
3247                                         beg_interval = NULL;
3248                                 }
3249                                 break;
3250
3251                               unfetch_interval:
3252                                 /* If an invalid interval, match the characters as literals.  */
3253                                 assert(beg_interval);
3254                                 p = beg_interval;
3255                                 beg_interval = NULL;
3256
3257                                 /* normal_char and normal_backslash need `c'.  */
3258                                 PATFETCH(c);
3259
3260                                 if (!(syntax & RE_NO_BK_BRACES)) {
3261                                         if (p > pattern && p[-1] == '\\')
3262                                                 goto normal_backslash;
3263                                 }
3264                                 goto normal_char;
3265
3266 #ifdef emacs
3267                                 /* There is no way to specify the before_dot and after_dot
3268                                    operators.  rms says this is ok.  --karl  */
3269                         case '=':
3270                                 BUF_PUSH(at_dot);
3271                                 break;
3272
3273                         case 's':
3274                                 laststart = buf_end;
3275                                 PATFETCH(c);
3276                                 /* XEmacs addition */
3277                                 if (c >= 0x80 || syntax_spec_code[c] == 0377) {
3278                                         free_stack();
3279                                         return REG_ESYNTAX;
3280                                 }
3281                                 BUF_PUSH_2(syntaxspec, syntax_spec_code[c]);
3282                                 break;
3283
3284                         case 'S':
3285                                 laststart = buf_end;
3286                                 PATFETCH(c);
3287                                 /* XEmacs addition */
3288                                 if (c >= 0x80 || syntax_spec_code[c] == 0377) {
3289                                         free_stack();
3290                                         return REG_ESYNTAX;
3291                                 }
3292                                 BUF_PUSH_2(notsyntaxspec, syntax_spec_code[c]);
3293                                 break;
3294
3295 #ifdef MULE
3296 /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */
3297                         case 'c':
3298                                 laststart = buf_end;
3299                                 PATFETCH_RAW(c);
3300                                 if (c < 32 || c > 127) {
3301                                         free_stack();
3302                                         return REG_ECATEGORY;
3303                                 }
3304                                 BUF_PUSH_2(categoryspec, c);
3305                                 break;
3306
3307                         case 'C':
3308                                 laststart = buf_end;
3309                                 PATFETCH_RAW(c);
3310                                 if (c < 32 || c > 127) {
3311                                         free_stack();
3312                                         return REG_ECATEGORY;
3313                                 }
3314                                 BUF_PUSH_2(notcategoryspec, c);
3315                                 break;
3316 /* end of category patch */
3317 #endif                          /* MULE */
3318 #endif                          /* emacs */
3319
3320                         case 'w':
3321                                 laststart = buf_end;
3322                                 BUF_PUSH(wordchar);
3323                                 break;
3324
3325                         case 'W':
3326                                 laststart = buf_end;
3327                                 BUF_PUSH(notwordchar);
3328                                 break;
3329
3330                         case '<':
3331                                 BUF_PUSH(wordbeg);
3332                                 break;
3333
3334                         case '>':
3335                                 BUF_PUSH(wordend);
3336                                 break;
3337
3338                         case 'b':
3339                                 BUF_PUSH(wordbound);
3340                                 break;
3341
3342                         case 'B':
3343                                 BUF_PUSH(notwordbound);
3344                                 break;
3345
3346                         case '`':
3347                                 BUF_PUSH(begbuf);
3348                                 break;
3349
3350                         case '\'':
3351                                 BUF_PUSH(endbuf);
3352                                 break;
3353
3354                         case '1':
3355                         case '2':
3356                         case '3':
3357                         case '4':
3358                         case '5':
3359                         case '6':
3360                         case '7':
3361                         case '8':
3362                         case '9': {
3363                                 int reg;
3364
3365                                 if (syntax & RE_NO_BK_REFS) {
3366                                         goto normal_char;
3367                                 }
3368                                 /* External register indexing. */
3369                                 reg = c - '0';
3370
3371                                 if (reg > bufp->re_nsub) {
3372                                         free_stack();
3373                                         return REG_ESUBREG;
3374                                 }
3375
3376                                 /* Convert external to internal as soon
3377                                    as possible. */
3378                                 reg = bufp->external_to_internal_register[reg];
3379  
3380                                 /* Can't back reference to a
3381                                    subexpression if inside it. */
3382                                 if (group_in_compile_stack(
3383                                             compile_stack, reg)) {
3384                                         goto normal_char;
3385                                 }
3386                                 laststart = buf_end;
3387                                 BUF_PUSH_2(duplicate, reg);
3388                         }
3389                                 break;
3390
3391                         case '+':
3392                         case '?':
3393                                 if (syntax & RE_BK_PLUS_QM) {
3394                                         goto handle_plus;
3395                                 } else {
3396                                         goto normal_backslash;
3397                                 }
3398                         default:
3399                         normal_backslash:
3400                                 /* You might think it would be useful for \ to
3401                                    mean not to translate; but if we don't
3402                                    translate it, it will never match
3403                                    anything.  */
3404                                 c = TRANSLATE(c);
3405                                 goto normal_char;
3406                         }
3407                         break;
3408
3409                         /* Expects the character in `c'.  */
3410                         /* `p' points to the location after where `c' came
3411                            from. */
3412                 normal_char:
3413                 default: {
3414                         /* XEmacs: modifications here for Mule. */
3415                         /* `q' points to the beginning of the next char. */
3416                         re_char *q = p;
3417
3418                         /* If no exactn currently being built.  */
3419                         if (!pending_exact
3420                             /* If last exactn not at current position.  */
3421                             || pending_exact + *pending_exact + 1 !=
3422                             buf_end
3423                             /* We have only one byte following the exactn for
3424                                the count. */
3425                             || ((unsigned int)(*pending_exact + (q - p)) >=
3426                                 ((unsigned int)(1 << BYTEWIDTH) - 1))
3427
3428                             /* If followed by a repetition operator.  */
3429                             || *q == '*' || *q == '^'
3430                             || ((syntax & RE_BK_PLUS_QM)
3431                                 ? *q == '\\' && (q[1] == '+'
3432                                                  || q[1] == '?')
3433                                 : (*q == '+' || *q == '?'))
3434                             || ((syntax & RE_INTERVALS)
3435                                 && ((syntax & RE_NO_BK_BRACES)
3436                                     ? *q == '{'
3437                                     : (q[0] == '\\' && q[1] == '{')))) {
3438                                 /* Start building a new exactn.  */
3439
3440                                 laststart = buf_end;
3441
3442                                 BUF_PUSH_2(exactn, 0);
3443                                 pending_exact = buf_end - 1;
3444                         }
3445 #ifndef MULE
3446                         BUF_PUSH(c);
3447                         (*pending_exact)++;
3448 #else
3449                         {
3450                                 Bytecount bt_count;
3451                                 Bufbyte tmp_buf[MAX_EMCHAR_LEN];
3452                                 int i;
3453
3454                                 bt_count =
3455                                         set_charptr_emchar(tmp_buf, c);
3456
3457                                 for (i = 0; i < bt_count; i++) {
3458                                         BUF_PUSH(tmp_buf[i]);
3459                                         (*pending_exact)++;
3460                                 }
3461                         }
3462 #endif
3463                         break;
3464                 }
3465                 }               /* switch (c) */
3466         }                       /* while p != pend */
3467
3468         /* Through the pattern now.  */
3469
3470         if (fixup_alt_jump) {
3471                 STORE_JUMP(jump_past_alt, fixup_alt_jump, buf_end);
3472         }
3473         if (!COMPILE_STACK_EMPTY) {
3474                 free_stack();
3475                 return REG_EPAREN;
3476         }
3477         /* If we don't want backtracking, force success
3478            the first time we reach the end of the compiled pattern.  */
3479         if (syntax & RE_NO_POSIX_BACKTRACKING) {
3480                 BUF_PUSH(succeed);
3481         }
3482         xfree(compile_stack.stack);
3483
3484         /* We have succeeded; set the length of the buffer.  */
3485         bufp->used = buf_end - bufp->buffer;
3486
3487 #ifdef REGEX_DEBUG_FLAG
3488         if (debug) {
3489                 DEBUG_PRINT1("\nCompiled pattern: \n");
3490                 print_compiled_pattern(bufp);
3491         }
3492 #endif                          /* DEBUG */
3493
3494 #ifndef MATCH_MAY_ALLOCATE
3495         /* Initialize the failure stack to the largest possible stack.  This
3496            isn't necessary unless we're trying to avoid calling alloca in
3497            the search and match routines.  */
3498         {
3499                 int num_regs = bufp->re_ngroups + 1;
3500
3501                 /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
3502                    is strictly greater than re_max_failures, the largest possible stack
3503                    is 2 * re_max_failures failure points.  */
3504                 if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) {
3505                         fail_stack.size =
3506                             (2 * re_max_failures * MAX_FAILURE_ITEMS);
3507
3508                         if (!fail_stack.stack) {
3509                                 fail_stack.stack =
3510                                         (fail_stack_elt_t *)
3511                                         xmalloc_atomic(
3512                                                 fail_stack.size *
3513                                                 sizeof(fail_stack_elt_t));
3514                         } else {
3515                                 fail_stack.stack =
3516                                         (fail_stack_elt_t *)
3517                                         xrealloc(fail_stack.stack,
3518                                                  (fail_stack.size *
3519                                                   sizeof(fail_stack_elt_t)));
3520                         }
3521                 }
3522
3523                 regex_grow_registers(num_regs);
3524         }
3525 #endif  /* not MATCH_MAY_ALLOCATE */
3526
3527         return REG_NOERROR;
3528 }
3529 /* regex_compile */
3530 \f
3531 /* Subroutines for `regex_compile'.  */
3532
3533 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
3534
3535 static void store_op1(re_opcode_t op, unsigned char *loc, int arg)
3536 {
3537         *loc = (unsigned char)op;
3538         STORE_NUMBER(loc + 1, arg);
3539 }
3540
3541 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
3542
3543 static void store_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2)
3544 {
3545         *loc = (unsigned char)op;
3546         STORE_NUMBER(loc + 1, arg1);
3547         STORE_NUMBER(loc + 3, arg2);
3548 }
3549
3550 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
3551    for OP followed by two-byte integer parameter ARG.  */
3552
3553 static void
3554 insert_op1(re_opcode_t op, unsigned char *loc, int arg, unsigned char *end)
3555 {
3556         REGISTER unsigned char *pfrom = end;
3557         REGISTER unsigned char *pto = end + 3;
3558
3559         while (pfrom != loc)
3560                 *--pto = *--pfrom;
3561
3562         store_op1(op, loc, arg);
3563 }
3564
3565 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
3566
3567 static void
3568 insert_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2,
3569            unsigned char *end)
3570 {
3571         REGISTER unsigned char *pfrom = end;
3572         REGISTER unsigned char *pto = end + 5;
3573
3574         while (pfrom != loc)
3575                 *--pto = *--pfrom;
3576
3577         store_op2(op, loc, arg1, arg2);
3578 }
3579
3580 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
3581    after an alternative or a begin-subexpression.  We assume there is at
3582    least one character before the ^.  */
3583
3584 static re_bool
3585 at_begline_loc_p(re_char *pattern, re_char *p, reg_syntax_t syntax)
3586 {
3587         re_char *prev = p - 2;
3588         re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3589
3590         return
3591             /* After a subexpression?  */
3592             (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3593             /* After an alternative?  */
3594             || (*prev == '|'
3595                 && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
3596 }
3597
3598 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
3599    at least one character after the $, i.e., `P < PEND'.  */
3600
3601 static re_bool
3602 at_endline_loc_p(re_char *p, re_char *pend, int syntax)
3603 {
3604         re_char *next = p;
3605         re_bool next_backslash = *next == '\\';
3606         re_char *next_next = p + 1 < pend ? p + 1 : 0;
3607
3608         return
3609             /* Before a subexpression?  */
3610             (syntax & RE_NO_BK_PARENS ? *next == ')'
3611              : next_backslash && next_next && *next_next == ')')
3612             /* Before an alternative?  */
3613             || (syntax & RE_NO_BK_VBAR ? *next == '|'
3614                 : next_backslash && next_next && *next_next == '|');
3615 }
3616
3617 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3618    false if it's not.  */
3619
3620 static re_bool
3621 group_in_compile_stack(compile_stack_type compile_stack, regnum_t regnum)
3622 {
3623         int this_element;
3624
3625         for (this_element = compile_stack.avail - 1;
3626              this_element >= 0; this_element--)
3627                 if (compile_stack.stack[this_element].regnum == regnum) {
3628                         return true;
3629                 }
3630         return false;
3631 }
3632
3633 /* Read the ending character of a range (in a bracket expression) from the
3634    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
3635    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
3636    Then we set the translation of all bits between the starting and
3637    ending characters (inclusive) in the compiled pattern B.
3638
3639    Return an error code.
3640
3641    We use these short variable names so we can use the same macros as
3642    `regex_compile' itself.  */
3643
3644 static reg_errcode_t
3645 compile_range(re_char ** p_ptr, re_char * pend, RE_TRANSLATE_TYPE translate,
3646               reg_syntax_t syntax, unsigned char *buf_end)
3647 {
3648         Element_count this_char;
3649
3650         re_char *p = *p_ptr;
3651         int range_start, range_end;
3652
3653         if (p == pend)
3654                 return REG_ERANGE;
3655
3656         /* Even though the pattern is a signed `char *', we need to fetch
3657            with unsigned char *'s; if the high bit of the pattern character
3658            is set, the range endpoints will be negative if we fetch using a
3659            signed char *.
3660
3661            We also want to fetch the endpoints without translating them; the
3662            appropriate translation is done in the bit-setting loop below.  */
3663         /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
3664         range_start = ((const unsigned char *)p)[-2];
3665         range_end = ((const unsigned char *)p)[0];
3666
3667         /* Have to increment the pointer into the pattern string, so the
3668            caller isn't still at the ending character.  */
3669         (*p_ptr)++;
3670
3671         /* If the start is after the end, the range is empty.  */
3672         if (range_start > range_end)
3673                 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3674
3675         /* Here we see why `this_char' has to be larger than an `unsigned
3676            char' -- the range is inclusive, so if `range_end' == 0xff
3677            (assuming 8-bit characters), we would otherwise go into an infinite
3678            loop, since all characters <= 0xff.  */
3679         for (this_char = range_start; this_char <= range_end; this_char++) {
3680                 SET_LIST_BIT(TRANSLATE(this_char));
3681         }
3682
3683         return REG_NOERROR;
3684 }
3685
3686 #ifdef MULE
3687
3688 static reg_errcode_t
3689 compile_extended_range(re_char ** p_ptr, re_char * pend,
3690                        RE_TRANSLATE_TYPE translate,
3691                        reg_syntax_t syntax, Lisp_Object rtab)
3692 {
3693         Emchar this_char, range_start, range_end;
3694         const Bufbyte *p;
3695
3696         if (*p_ptr == pend)
3697                 return REG_ERANGE;
3698
3699         p = (const Bufbyte *)*p_ptr;
3700         range_end = charptr_emchar(p);
3701         p--;                    /* back to '-' */
3702         DEC_CHARPTR(p);         /* back to start of range */
3703         /* We also want to fetch the endpoints without translating them; the
3704            appropriate translation is done in the bit-setting loop below.  */
3705         range_start = charptr_emchar(p);
3706         INC_CHARPTR(*p_ptr);
3707
3708         /* If the start is after the end, the range is empty.  */
3709         if (range_start > range_end)
3710                 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3711
3712         /* Can't have ranges spanning different charsets, except maybe for
3713            ranges entirely within the first 256 chars. */
3714
3715         if ((range_start >= 0x100 || range_end >= 0x100)
3716             && CHAR_LEADING_BYTE(range_start) != CHAR_LEADING_BYTE(range_end))
3717                 return REG_ERANGESPAN;
3718
3719         /* As advertised, translations only work over the 0 - 0x7F range.
3720            Making this kind of stuff work generally is much harder.
3721            Iterating over the whole range like this would be way efficient
3722            if the range encompasses 10,000 chars or something.  You'd have
3723            to do something like this:
3724
3725            range_table a;
3726            range_table b;
3727            map over translation table in [range_start, range_end] of
3728            (put the mapped range in a;
3729            put the translation in b)
3730            invert the range in a and truncate to [range_start, range_end]
3731            compute the union of a, b
3732            union the result into rtab
3733          */
3734         for (this_char = range_start;
3735              this_char <= range_end && this_char < 0x80; this_char++) {
3736                 SET_RANGETAB_BIT(TRANSLATE(this_char));
3737         }
3738
3739         if (this_char <= range_end)
3740                 put_range_table(rtab, this_char, range_end, Qt);
3741
3742         return REG_NOERROR;
3743 }
3744
3745 #endif                          /* MULE */
3746 \f
3747 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3748    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
3749    characters can start a string that matches the pattern.  This fastmap
3750    is used by re_search to skip quickly over impossible starting points.
3751
3752    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3753    area as BUFP->fastmap.
3754
3755    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3756    the pattern buffer.
3757
3758    Returns 0 if we succeed, -2 if an internal error.   */
3759
3760 int re_compile_fastmap(struct re_pattern_buffer *bufp)
3761 {
3762         int j, k;
3763 #ifdef MATCH_MAY_ALLOCATE
3764         fail_stack_type fail_stack;
3765 #endif
3766         DECLARE_DESTINATION;
3767         /* We don't push any register information onto the failure stack.  */
3768
3769         REGISTER char *fastmap = bufp->fastmap;
3770         unsigned char *pattern = bufp->buffer;
3771         unsigned long size = bufp->used;
3772         /* actually p supposed to carry the const qualifier, however, some
3773            silly mule code below CHANGES p and hence we cant go with the const
3774            qualifier here, leaving an `unfixable' warning on the way */
3775 #ifdef MULE
3776         unsigned char *p = pattern;
3777 #else
3778         re_char *p = pattern;
3779 #endif
3780         REGISTER unsigned char *pend = pattern + size;
3781
3782 #ifdef REL_ALLOC
3783         /* This holds the pointer to the failure stack, when
3784            it is allocated relocatably.  */
3785         fail_stack_elt_t *failure_stack_ptr;
3786 #endif
3787
3788         /* Assume that each path through the pattern can be null until
3789            proven otherwise.  We set this false at the bottom of switch
3790            statement, to which we get only if a particular path doesn't
3791            match the empty string.  */
3792         re_bool path_can_be_null = true;
3793
3794         /* We aren't doing a `succeed_n' to begin with.  */
3795         re_bool succeed_n_p = false;
3796
3797         assert(fastmap != NULL && p != NULL);
3798
3799         INIT_FAIL_STACK();
3800         memset(fastmap, 0, 1 << BYTEWIDTH);     /* Assume nothing's valid.  */
3801         bufp->fastmap_accurate = 1;     /* It will be when we're done.  */
3802         bufp->can_be_null = 0;
3803
3804         while (1) {
3805                 if (p == pend || *p == succeed) {
3806                         /* We have reached the (effective) end of pattern.  */
3807                         if (!FAIL_STACK_EMPTY()) {
3808                                 bufp->can_be_null |= path_can_be_null;
3809
3810                                 /* Reset for next path.  */
3811                                 path_can_be_null = true;
3812
3813                                 /* fuck, p isnt const thanks to that
3814                                  * unified range table function below */
3815 #ifdef MULE
3816                                 p = (unsigned char*)fail_stack.
3817                                     stack[--fail_stack.avail].pointer;
3818 #else
3819                                 p = fail_stack.stack[--fail_stack.avail]
3820                                         .pointer;
3821 #endif
3822
3823                                 continue;
3824                         } else {
3825                                 break;
3826                         }
3827                 }
3828
3829                 /* We should never be about to go beyond the end of the pattern.  */
3830                 assert(p < pend);
3831
3832                 switch (SWITCH_ENUM_CAST((re_opcode_t) * p++)) {
3833
3834                         /* I guess the idea here is to simply not bother with a
3835                            fastmap if a backreference is used, since it's too
3836                            hard to figure out the fastmap for the corresponding
3837                            group.  Setting `can_be_null' stops `re_search_2'
3838                            from using the fastmap, so that is all we do.  */
3839                 case duplicate:
3840                         bufp->can_be_null = 1;
3841                         goto done;
3842
3843                         /* Following are the cases which match a character.
3844                            These end with `break'.  */
3845
3846                 case exactn:
3847                         fastmap[p[1]] = 1;
3848                         break;
3849
3850                 case charset:
3851                         /* XEmacs: Under Mule, these bit vectors will
3852                            only contain values for characters below 0x80. */
3853                         for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3854                                 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3855                                         fastmap[j] = 1;
3856                         break;
3857
3858                 case charset_not:
3859                         /* Chars beyond end of map must be allowed.  */
3860 #ifdef MULE
3861                         for (j = *p * BYTEWIDTH; j < 0x80; j++)
3862                                 fastmap[j] = 1;
3863                         /* And all extended characters must be allowed, too. */
3864                         for (j = 0x80; j < 0xA0; j++)
3865                                 fastmap[j] = 1;
3866 #else                           /* not MULE */
3867                         for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3868                                 fastmap[j] = 1;
3869 #endif                          /* MULE */
3870
3871                         for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3872                                 if (!
3873                                     (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3874                                         fastmap[j] = 1;
3875                         break;
3876
3877 #ifdef MULE
3878                 case charset_mule: {
3879                         int nentries;
3880                         int i;
3881
3882                         nentries = unified_range_table_nentries(p);
3883                         for (i = 0; i < nentries; i++) {
3884                                 EMACS_INT first, last;
3885                                 Lisp_Object dummy_val;
3886                                 int jj;
3887                                 Bufbyte strr[MAX_EMCHAR_LEN];
3888
3889                                 unified_range_table_get_range(p, i,
3890                                                               &first,
3891                                                               &last,
3892                                                               &dummy_val);
3893                                 for (jj = first;
3894                                      jj <= last && jj < 0x80; jj++)
3895                                         fastmap[jj] = 1;
3896                                 /* Ranges below 0x100 can span charsets, but there
3897                                    are only two (Control-1 and Latin-1), and
3898                                    either first or last has to be in them. */
3899                                 set_charptr_emchar(strr, first);
3900                                 fastmap[*strr] = 1;
3901                                 if (last < 0x100) {
3902                                         set_charptr_emchar(strr, last);
3903                                         fastmap[*strr] = 1;
3904                                 }
3905                         }
3906                 }
3907                         break;
3908
3909                 case charset_mule_not: {
3910                         int nentries;
3911                         int i;
3912
3913                         nentries = unified_range_table_nentries(p);
3914                         for (i = 0; i < nentries; i++) {
3915                                 EMACS_INT first, last;
3916                                 Lisp_Object dummy_val;
3917                                 int jj;
3918                                 int smallest_prev = 0;
3919
3920                                 unified_range_table_get_range(p, i,
3921                                                               &first,
3922                                                               &last,
3923                                                               &dummy_val);
3924                                 for (jj = smallest_prev;
3925                                      jj < first && jj < 0x80; jj++)
3926                                         fastmap[jj] = 1;
3927                                 smallest_prev = last + 1;
3928                                 if (smallest_prev >= 0x80)
3929                                         break;
3930                         }
3931                         /* Calculating which leading bytes are actually allowed
3932                            here is rather difficult, so we just punt and allow
3933                            all of them. */
3934                         for (i = 0x80; i < 0xA0; i++)
3935                                 fastmap[i] = 1;
3936                 }
3937                         break;
3938 #endif                          /* MULE */
3939
3940                 case wordchar:
3941 #ifdef emacs
3942                         k = (int)Sword;
3943                         goto matchsyntax;
3944 #else
3945                         for (j = 0; j < (1 << BYTEWIDTH); j++)
3946                                 if (SYNTAX_UNSAFE
3947                                     (XCHAR_TABLE
3948                                      (regex_emacs_buffer->mirror_syntax_table),
3949                                      j) == Sword)
3950                                         fastmap[j] = 1;
3951                         break;
3952 #endif
3953
3954                 case notwordchar:
3955 #ifdef emacs
3956                         k = (int)Sword;
3957                         goto matchnotsyntax;
3958 #else
3959                         for (j = 0; j < (1 << BYTEWIDTH); j++)
3960                                 if (SYNTAX_UNSAFE
3961                                     (XCHAR_TABLE
3962                                      (regex_emacs_buffer->mirror_syntax_table),
3963                                      j) != Sword)
3964                                         fastmap[j] = 1;
3965                         break;
3966 #endif
3967
3968                 case anychar: {
3969                         int fastmap_newline = fastmap['\n'];
3970
3971                         /* `.' matches anything ...  */
3972 #ifdef MULE
3973                         /* "anything" only includes bytes that can be the
3974                            first byte of a character. */
3975                         for (j = 0; j < 0xA0; j++)
3976                                 fastmap[j] = 1;
3977 #else
3978                         for (j = 0; j < (1 << BYTEWIDTH); j++)
3979                                 fastmap[j] = 1;
3980 #endif
3981
3982                         /* ... except perhaps newline.  */
3983                         if (!(bufp->syntax & RE_DOT_NEWLINE))
3984                                 fastmap['\n'] = fastmap_newline;
3985
3986                         /* Return if we have already set `can_be_null'; if we
3987                            have, then the fastmap is irrelevant.  Something's
3988                            wrong here.  */
3989                         else if (bufp->can_be_null)
3990                                 goto done;
3991
3992                         /* Otherwise, have to check alternative paths.  */
3993                         break;
3994                 }
3995
3996 #ifdef emacs
3997                 case wordbound:
3998                 case notwordbound:
3999                 case wordbeg:
4000                 case wordend:
4001                 case notsyntaxspec:
4002                 case syntaxspec:
4003                         /* This match depends on text properties.  These end with
4004                            aborting optimizations.  */
4005                         bufp->can_be_null = 1;
4006                         goto done;
4007
4008 #ifdef emacs
4009                 matchsyntax:
4010 #ifdef MULE
4011                         for (j = 0; j < 0x80; j++)
4012                                 if (SYNTAX_UNSAFE
4013                                     (XCHAR_TABLE
4014                                      (regex_emacs_buffer->mirror_syntax_table),
4015                                      j) == (enum syntaxcode)k)
4016                                         fastmap[j] = 1;
4017                         for (j = 0x80; j < 0xA0; j++) {
4018                                 if (LEADING_BYTE_PREFIX_P(j))
4019                                         /* too complicated to calculate this right */
4020                                         fastmap[j] = 1;
4021                                 else {
4022                                         int multi_p;
4023                                         Lisp_Object cset;
4024
4025                                         cset = CHARSET_BY_LEADING_BYTE(j);
4026                                         if (CHARSETP(cset)) {
4027                                                 if (charset_syntax
4028                                                     (regex_emacs_buffer, cset,
4029                                                      &multi_p)
4030                                                     == Sword || multi_p)
4031                                                         fastmap[j] = 1;
4032                                         }
4033                                 }
4034                         }
4035 #else                           /* not MULE */
4036                         for (j = 0; j < (1 << BYTEWIDTH); j++)
4037                                 if (SYNTAX_UNSAFE
4038                                     (XCHAR_TABLE
4039                                      (regex_emacs_buffer->mirror_syntax_table),
4040                                      j) == (enum syntaxcode)k)
4041                                         fastmap[j] = 1;
4042 #endif                          /* MULE */
4043                         break;
4044
4045                 matchnotsyntax:
4046 #ifdef MULE
4047                         for (j = 0; j < 0x80; j++)
4048                                 if (SYNTAX_UNSAFE
4049                                     (XCHAR_TABLE
4050                                      (regex_emacs_buffer->mirror_syntax_table),
4051                                      j) != (enum syntaxcode)k)
4052                                         fastmap[j] = 1;
4053                         for (j = 0x80; j < 0xA0; j++) {
4054                                 if (LEADING_BYTE_PREFIX_P(j))
4055                                         /* too complicated to calculate this right */
4056                                         fastmap[j] = 1;
4057                                 else {
4058                                         int multi_p;
4059                                         Lisp_Object cset;
4060
4061                                         cset = CHARSET_BY_LEADING_BYTE(j);
4062                                         if (CHARSETP(cset)) {
4063                                                 if (charset_syntax
4064                                                     (regex_emacs_buffer, cset,
4065                                                      &multi_p)
4066                                                     != Sword || multi_p)
4067                                                         fastmap[j] = 1;
4068                                         }
4069                                 }
4070                         }
4071 #else                           /* not MULE */
4072                         for (j = 0; j < (1 << BYTEWIDTH); j++)
4073                                 if (SYNTAX_UNSAFE
4074                                     (XCHAR_TABLE
4075                                      (regex_emacs_buffer->mirror_syntax_table),
4076                                      j) != (enum syntaxcode)k)
4077                                         fastmap[j] = 1;
4078 #endif                          /* MULE */
4079                         break;
4080 #endif                          /* emacs */
4081
4082 #ifdef MULE
4083 /* 97/2/17 jhod category patch */
4084                 case categoryspec:
4085                 case notcategoryspec:
4086                         bufp->can_be_null = 1;
4087                         return 0;
4088 /* end if category patch */
4089 #endif                          /* MULE */
4090
4091                         /* All cases after this match the empty string.  These
4092                            end with `continue'.  */
4093
4094                 case before_dot:
4095                 case at_dot:
4096                 case after_dot:
4097                         continue;
4098 #endif                          /* emacs */
4099
4100                 case no_op:
4101                 case begline:
4102                 case endline:
4103                 case begbuf:
4104                 case endbuf:
4105 #ifndef emacs
4106                 case wordbound:
4107                 case notwordbound:
4108                 case wordbeg:
4109                 case wordend:
4110 #endif
4111                 case push_dummy_failure:
4112                         continue;
4113
4114                 case jump_n:
4115                 case pop_failure_jump:
4116                 case maybe_pop_jump:
4117                 case jump:
4118                 case jump_past_alt:
4119                 case dummy_failure_jump:
4120                         EXTRACT_NUMBER_AND_INCR(j, p);
4121                         p += j;
4122                         if (j > 0)
4123                                 continue;
4124
4125                         /* Jump backward implies we just went through the body
4126                            of a loop and matched nothing.  Opcode jumped to
4127                            should be `on_failure_jump' or `succeed_n'.  Just
4128                            treat it like an ordinary jump.  For a * loop, it has
4129                            pushed its failure point already; if so, discard that
4130                            as redundant.  */
4131                         if ((re_opcode_t) * p != on_failure_jump
4132                             && (re_opcode_t) * p != succeed_n)
4133                                 continue;
4134
4135                         p++;
4136                         EXTRACT_NUMBER_AND_INCR(j, p);
4137                         p += j;
4138
4139                         /* If what's on the stack is where we are now, pop
4140                            it.  */
4141                         if (!FAIL_STACK_EMPTY()
4142                             && fail_stack.stack[fail_stack.avail - 1].pointer ==
4143                             p)
4144                                 fail_stack.avail--;
4145
4146                         continue;
4147
4148                 case on_failure_jump:
4149                 case on_failure_keep_string_jump:
4150                       handle_on_failure_jump:
4151                         EXTRACT_NUMBER_AND_INCR(j, p);
4152
4153                         /* For some patterns, e.g., `(a?)?', `p+j' here points
4154                            to the end of the pattern.  We don't want to push
4155                            such a point, since when we restore it above,
4156                            entering the switch will increment `p' past the end
4157                            of the pattern.  We don't need to push such a point
4158                            since we obviously won't find any more fastmap
4159                            entries beyond `pend'.  Such a pattern can match the
4160                            null string, though.  */
4161                         if (p + j < pend) {
4162                                 if (!PUSH_PATTERN_OP(p + j, fail_stack)) {
4163                                         RESET_FAIL_STACK();
4164                                         return -2;
4165                                 }
4166                         } else
4167                                 bufp->can_be_null = 1;
4168
4169                         if (succeed_n_p) {
4170                                 EXTRACT_NUMBER_AND_INCR(k, p);  /* Skip the n.  */
4171                                 succeed_n_p = false;
4172                         }
4173
4174                         continue;
4175
4176                 case succeed_n:
4177                         /* Get to the number of times to succeed.  */
4178                         p += 2;
4179
4180                         /* Increment p past the n for when k != 0.  */
4181                         EXTRACT_NUMBER_AND_INCR(k, p);
4182                         if (k == 0) {
4183                                 p -= 4;
4184                                 succeed_n_p = true;     /* Spaghetti code alert.  */
4185                                 goto handle_on_failure_jump;
4186                         }
4187                         continue;
4188
4189                 case set_number_at:
4190                         p += 4;
4191                         continue;
4192
4193                 case start_memory:
4194                 case stop_memory:
4195                         p += 2;
4196                         continue;
4197
4198                 case succeed:
4199                 default:
4200                         abort();        /* We have listed all the cases.  */
4201                 }               /* switch *p++ */
4202
4203                 /* Getting here means we have found the possible starting
4204                    characters for one path of the pattern -- and that the empty
4205                    string does not match.  We need not follow this path further.
4206                    Instead, look at the next alternative (remembered on the
4207                    stack), or quit if no more.  The test at the top of the loop
4208                    does these things.  */
4209                 path_can_be_null = false;
4210                 p = pend;
4211         }                       /* while p */
4212
4213         /* Set `can_be_null' for the last path (also the first path, if the
4214            pattern is empty).  */
4215         bufp->can_be_null |= path_can_be_null;
4216
4217 done:
4218         RESET_FAIL_STACK();
4219         return 0;
4220 }       /* re_compile_fastmap */
4221 \f
4222 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
4223    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
4224    this memory for recording register information.  STARTS and ENDS
4225    must be allocated using the malloc library routine, and must each
4226    be at least NUM_REGS * sizeof (regoff_t) bytes long.
4227
4228    If NUM_REGS == 0, then subsequent matches should allocate their own
4229    register data.
4230
4231    Unless this function is called, the first search or match using
4232    PATTERN_BUFFER will allocate its own register data, without
4233    freeing the old data.  */
4234
4235 void
4236 re_set_registers(struct re_pattern_buffer *bufp, struct re_registers *regs,
4237                  unsigned num_regs, regoff_t * starts, regoff_t * ends)
4238 {
4239         if (num_regs) {
4240                 bufp->regs_allocated = REGS_REALLOCATE;
4241                 regs->num_regs = num_regs;
4242                 regs->start = starts;
4243                 regs->end = ends;
4244         } else {
4245                 bufp->regs_allocated = REGS_UNALLOCATED;
4246                 regs->num_regs = 0;
4247                 regs->start = regs->end = (regoff_t *) 0;
4248         }
4249 }
4250 \f
4251 /* Searching routines.  */
4252
4253 /* Like re_search_2, below, but only one string is specified, and
4254    doesn't let you say where to stop matching. */
4255
4256 int
4257 re_search(struct re_pattern_buffer *bufp, const char *string, int size,
4258           int startpos, int range, struct re_registers *regs)
4259 {
4260         return re_search_2(bufp, NULL, 0, string, size, startpos, range,
4261                            regs, size);
4262 }
4263
4264 #ifndef emacs
4265 /* Snarfed from src/lisp.h, needed for compiling [ce]tags. */
4266 # define bytecount_to_charcount(ptr, len) (len)
4267 # define charcount_to_bytecount(ptr, len) (len)
4268 typedef int Charcount;
4269 #endif
4270
4271 /* Using the compiled pattern in BUFP->buffer, first tries to match the
4272    virtual concatenation of STRING1 and STRING2, starting first at index
4273    STARTPOS, then at STARTPOS + 1, and so on.
4274
4275    With MULE, STARTPOS is a byte position, not a char position.  And the
4276    search will increment STARTPOS by the width of the current leading
4277    character.
4278
4279    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
4280
4281    RANGE is how far to scan while trying to match.  RANGE = 0 means try
4282    only at STARTPOS; in general, the last start tried is STARTPOS +
4283    RANGE.
4284
4285    With MULE, RANGE is a byte position, not a char position.  The last
4286    start tried is the character starting <= STARTPOS + RANGE.
4287
4288    In REGS, return the indices of the virtual concatenation of STRING1
4289    and STRING2 that matched the entire BUFP->buffer and its contained
4290    subexpressions.
4291
4292    Do not consider matching one past the index STOP in the virtual
4293    concatenation of STRING1 and STRING2.
4294
4295    We return either the position in the strings at which the match was
4296    found, -1 if no match, or -2 if error (such as failure
4297    stack overflow).  */
4298
4299 int
4300 re_search_2(struct re_pattern_buffer *bufp, const char *str1,
4301             int size1, const char *str2, int size2, int startpos,
4302             int range, struct re_registers *regs, int stop)
4303 {
4304         int val;
4305         re_char *string1 = (re_char *) str1;
4306         re_char *string2 = (re_char *) str2;
4307         REGISTER char *fastmap = bufp->fastmap;
4308         REGISTER RE_TRANSLATE_TYPE translate = bufp->translate;
4309         int total_size = size1 + size2;
4310         int endpos = startpos + range;
4311 #ifdef REGEX_BEGLINE_CHECK
4312         int anchored_at_begline = 0;
4313 #endif
4314         re_char *d;
4315         Charcount d_size;
4316
4317         /* Check for out-of-range STARTPOS.  */
4318         if (startpos < 0 || startpos > total_size)
4319                 return -1;
4320
4321         /* Fix up RANGE if it might eventually take us outside
4322            the virtual concatenation of STRING1 and STRING2.  */
4323         if (endpos < 0)
4324                 range = 0 - startpos;
4325         else if (endpos > total_size)
4326                 range = total_size - startpos;
4327
4328         /* If the search isn't to be a backwards one, don't waste time in a
4329            search for a pattern that must be anchored.  */
4330         if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf
4331             && range > 0) {
4332                 if (startpos > 0)
4333                         return -1;
4334                 else {
4335                         d = ((const unsigned char *)
4336                              (startpos >=
4337                               size1 ? string2 - size1 : string1) + startpos);
4338                         range = charcount_to_bytecount(d, 1);
4339                 }
4340         }
4341 #ifdef emacs
4342         /* In a forward search for something that starts with \=.
4343            don't keep searching past point.  */
4344         if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot
4345             && range > 0) {
4346                 range =
4347                     BUF_PT(regex_emacs_buffer) - BUF_BEGV(regex_emacs_buffer)
4348                     - startpos;
4349                 if (range < 0)
4350                         return -1;
4351         }
4352 #endif                          /* emacs */
4353
4354         /* Update the fastmap now if not correct already.  */
4355         if (fastmap && !bufp->fastmap_accurate)
4356                 if (re_compile_fastmap(bufp) == -2)
4357                         return -2;
4358
4359 #ifdef REGEX_BEGLINE_CHECK
4360         {
4361                 unsigned long i = 0;
4362
4363                 while (i < bufp->used) {
4364                         if (bufp->buffer[i] == start_memory ||
4365                             bufp->buffer[i] == stop_memory)
4366                                 i += 2;
4367                         else
4368                                 break;
4369                 }
4370                 anchored_at_begline = i < bufp->used
4371                     && bufp->buffer[i] == begline;
4372         }
4373 #endif
4374
4375 #ifdef emacs
4376         SETUP_SYNTAX_CACHE_FOR_OBJECT(regex_match_object,
4377                                       regex_emacs_buffer,
4378                                       SYNTAX_CACHE_OBJECT_BYTE_TO_CHAR
4379                                       (regex_match_object, regex_emacs_buffer,
4380                                        startpos), 1);
4381 #endif
4382
4383         /* Loop through the string, looking for a place to start matching.  */
4384         for (;;) {
4385 #ifdef REGEX_BEGLINE_CHECK
4386                 /* If the regex is anchored at the beginning of a line (i.e. with a ^),
4387                    then we can speed things up by skipping to the next beginning-of-
4388                    line. */
4389                 if (anchored_at_begline && startpos > 0 && startpos != size1 &&
4390                     range > 0) {
4391                         /* whose stupid idea was it anyway to make this
4392                            function take two strings to match?? */
4393                         int lim = 0;
4394                         int irange = range;
4395
4396                         if (startpos < size1 && startpos + range >= size1)
4397                                 lim = range - (size1 - startpos);
4398
4399                         d = ((const unsigned char *)
4400                              (startpos >=
4401                               size1 ? string2 - size1 : string1) + startpos);
4402                         DEC_CHARPTR(d); /* Ok, since startpos != size1. */
4403                         d_size = charcount_to_bytecount(d, 1);
4404
4405                         if (TRANSLATE_P(translate))
4406                                 while (range > lim && *d != '\n') {
4407                                         d += d_size;    /* Speedier INC_CHARPTR(d) */
4408                                         d_size = charcount_to_bytecount(d, 1);
4409                                         range -= d_size;
4410                         } else
4411                                 while (range > lim && *d != '\n') {
4412                                         d += d_size;    /* Speedier INC_CHARPTR(d) */
4413                                         d_size = charcount_to_bytecount(d, 1);
4414                                         range -= d_size;
4415                                 }
4416
4417                         startpos += irange - range;
4418                 }
4419 #endif                          /* REGEX_BEGLINE_CHECK */
4420
4421                 /* If a fastmap is supplied, skip quickly over characters that
4422                    cannot be the start of a match.  If the pattern can match the
4423                    null string, however, we don't need to skip characters; we want
4424                    the first null string.  */
4425                 if (fastmap && startpos < total_size && !bufp->can_be_null) {
4426                         if (range > 0) {        /* Searching forwards.  */
4427                                 int lim = 0;
4428                                 int irange = range;
4429
4430                                 if (startpos < size1
4431                                     && startpos + range >= size1)
4432                                         lim = range - (size1 - startpos);
4433
4434                                 d = ((const unsigned char *)
4435                                      (startpos >=
4436                                       size1 ? string2 - size1 : string1) +
4437                                      startpos);
4438
4439                                 /* Written out as an if-else to avoid testing `translate'
4440                                    inside the loop.  */
4441                                 if (TRANSLATE_P(translate))
4442                                         while (range > lim) {
4443 #ifdef MULE
4444                                                 Emchar buf_ch;
4445                                                 Bufbyte str[MAX_EMCHAR_LEN];
4446
4447                                                 buf_ch = charptr_emchar(d);
4448                                                 buf_ch = RE_TRANSLATE(buf_ch);
4449                                                 set_charptr_emchar(str, buf_ch);
4450                                                 if (buf_ch >= 0200
4451                                                     || fastmap[(unsigned char)
4452                                                                *str])
4453                                                         break;
4454 #else
4455                                                 if (fastmap
4456                                                     [(unsigned char)
4457                                                      RE_TRANSLATE(*d)])
4458                                                         break;
4459 #endif                          /* MULE */
4460                                                 d_size =
4461                                                     charcount_to_bytecount(d,
4462                                                                            1);
4463                                                 range -= d_size;
4464                                                 d += d_size;    /* Speedier INC_CHARPTR(d) */
4465                                 } else
4466                                         while (range > lim && !fastmap[*d]) {
4467                                                 d_size =
4468                                                     charcount_to_bytecount(d,
4469                                                                            1);
4470                                                 range -= d_size;
4471                                                 d += d_size;    /* Speedier INC_CHARPTR(d) */
4472                                         }
4473
4474                                 startpos += irange - range;
4475                         } else {        /* Searching backwards.  */
4476
4477                                 Emchar c = (size1 == 0 || startpos >= size1
4478                                             ? charptr_emchar(string2 +
4479                                                              startpos - size1)
4480                                             : charptr_emchar(string1 +
4481                                                              startpos));
4482                                 c = TRANSLATE(c);
4483 #ifdef MULE
4484                                 if (!(c >= 0200 || fastmap[(unsigned char)c]))
4485                                         goto advance;
4486 #else
4487                                 if (!fastmap[(unsigned char)c])
4488                                         goto advance;
4489 #endif
4490                         }
4491                 }
4492
4493                 /* If can't match the null string, and that's all we have left, fail.  */
4494                 if (range >= 0 && startpos == total_size && fastmap
4495                     && !bufp->can_be_null)
4496                         return -1;
4497
4498 #ifdef emacs                    /* XEmacs added, w/removal of immediate_quit */
4499                 if (!no_quit_in_re_search)
4500                         QUIT;
4501 #endif
4502                 val = re_match_2_internal(bufp, string1, size1, string2, size2,
4503                                           startpos, regs, stop);
4504 #ifndef REGEX_MALLOC
4505 #ifdef C_ALLOCA
4506                 alloca(0);
4507 #endif
4508 #endif
4509
4510                 if (val >= 0)
4511                         return startpos;
4512
4513                 if (val == -2)
4514                         return -2;
4515
4516               advance:
4517                 if (!range)
4518                         break;
4519                 else if (range > 0) {
4520                         d = ((const unsigned char *)
4521                              (startpos >=
4522                               size1 ? string2 - size1 : string1) + startpos);
4523                         d_size = charcount_to_bytecount(d, 1);
4524                         range -= d_size;
4525                         startpos += d_size;
4526                 } else {
4527                         /* Note startpos > size1 not >=.  If we are on the
4528                            string1/string2 boundary, we want to backup into string1. */
4529                         d = ((const unsigned char *)
4530                              (startpos >
4531                               size1 ? string2 - size1 : string1) + startpos);
4532                         DEC_CHARPTR(d);
4533                         d_size = charcount_to_bytecount(d, 1);
4534                         range += d_size;
4535                         startpos -= d_size;
4536                 }
4537         }
4538         return -1;
4539 }                               /* re_search_2 */
4540 \f
4541 /* Declarations and macros for re_match_2.  */
4542
4543 /* This converts PTR, a pointer into one of the search strings `string1'
4544    and `string2' into an offset from the beginning of that string.  */
4545 #define POINTER_TO_OFFSET(ptr)                  \
4546   (FIRST_STRING_P (ptr)                         \
4547    ? ((regoff_t) ((ptr) - string1))             \
4548    : ((regoff_t) ((ptr) - string2 + size1)))
4549
4550 /* Macros for dealing with the split strings in re_match_2.  */
4551
4552 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
4553
4554 /* Call before fetching a character with *d.  This switches over to
4555    string2 if necessary.  */
4556 #define REGEX_PREFETCH()                                                        \
4557   while (d == dend)                                                     \
4558     {                                                                   \
4559       /* End of string2 => fail.  */                                    \
4560       if (dend == end_match_2)                                          \
4561         goto fail;                                                      \
4562       /* End of string1 => advance to string2.  */                      \
4563       d = string2;                                                      \
4564       dend = end_match_2;                                               \
4565     }
4566
4567 /* Test if at very beginning or at very end of the virtual concatenation
4568    of `string1' and `string2'.  If only one string, it's `string2'.  */
4569 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
4570 #define AT_STRINGS_END(d) ((d) == end2)
4571
4572 /* XEmacs change:
4573    If the given position straddles the string gap, return the equivalent
4574    position that is before or after the gap, respectively; otherwise,
4575    return the same position. */
4576 #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d))
4577 #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d))
4578
4579 /* Test if CH is a word-constituent character. (XEmacs change) */
4580 #define WORDCHAR_P_UNSAFE(ch)                                              \
4581   (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),   \
4582                                ch) == Sword)
4583
4584 /* Free everything we malloc.  */
4585 #ifdef MATCH_MAY_ALLOCATE
4586 #define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
4587 #define FREE_VARIABLES()                                                \
4588   do {                                                                  \
4589     REGEX_FREE_STACK (fail_stack.stack);                                \
4590     FREE_VAR (regstart);                                                \
4591     FREE_VAR (regend);                                                  \
4592     FREE_VAR (old_regstart);                                            \
4593     FREE_VAR (old_regend);                                              \
4594     FREE_VAR (best_regstart);                                           \
4595     FREE_VAR (best_regend);                                             \
4596     FREE_VAR (reg_info);                                                \
4597     FREE_VAR (reg_dummy);                                               \
4598     FREE_VAR (reg_info_dummy);                                          \
4599   } while (0)
4600 #else                           /* not MATCH_MAY_ALLOCATE */
4601 #define FREE_VARIABLES() ((void)0)      /* Do nothing!  But inhibit gcc warning.  */
4602 #endif                          /* MATCH_MAY_ALLOCATE */
4603
4604 /* These values must meet several constraints.  They must not be valid
4605    register values; since we have a limit of 255 registers (because
4606    we use only one byte in the pattern for the register number), we can
4607    use numbers larger than 255.  They must differ by 1, because of
4608    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
4609    be larger than the value for the highest register, so we do not try
4610    to actually save any registers when none are active.  */
4611 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
4612 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
4613 \f
4614 /* Matching routines.  */
4615
4616 #ifndef emacs                   /* Emacs never uses this.  */
4617 /* re_match is like re_match_2 except it takes only a single string.  */
4618
4619 int
4620 re_match(struct re_pattern_buffer *bufp, const char *string, int size,
4621          int pos, struct re_registers *regs)
4622 {
4623         int result =
4624             re_match_2_internal(bufp, NULL, 0, (re_char *) string, size,
4625                                 pos, regs, size);
4626         alloca(0);
4627         return result;
4628 }
4629 #endif                          /* not emacs */
4630
4631 /* re_match_2 matches the compiled pattern in BUFP against the
4632    (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and
4633    SIZE2, respectively).  We start matching at POS, and stop matching
4634    at STOP.
4635
4636    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
4637    store offsets for the substring each group matched in REGS.  See the
4638    documentation for exactly how many groups we fill.
4639
4640    We return -1 if no match, -2 if an internal error (such as the
4641    failure stack overflowing).  Otherwise, we return the length of the
4642    matched substring.  */
4643
4644 int
4645 re_match_2(struct re_pattern_buffer *bufp, const char *string1,
4646            int size1, const char *string2, int size2, int pos,
4647            struct re_registers *regs, int stop)
4648 {
4649         int result;
4650
4651 #ifdef emacs
4652         SETUP_SYNTAX_CACHE_FOR_OBJECT(regex_match_object,
4653                                       regex_emacs_buffer,
4654                                       SYNTAX_CACHE_OBJECT_BYTE_TO_CHAR
4655                                       (regex_match_object, regex_emacs_buffer,
4656                                        pos), 1);
4657 #endif
4658
4659         result = re_match_2_internal(bufp, (re_char *) string1, size1,
4660                                      (re_char *) string2, size2,
4661                                      pos, regs, stop);
4662
4663         alloca(0);
4664         return result;
4665 }
4666
4667 /* This is a separate function so that we can force an alloca cleanup
4668    afterwards.  */
4669 static int
4670 re_match_2_internal(struct re_pattern_buffer *bufp, re_char * string1,
4671                     int size1, re_char * string2, int size2, int pos,
4672                     struct re_registers *regs, int stop)
4673 {
4674         /* General temporaries.  */
4675         int mcnt;
4676         unsigned char *p1;
4677         int should_succeed;     /* XEmacs change */
4678
4679         /* Just past the end of the corresponding string.  */
4680         re_char *end1, *end2;
4681
4682         /* Pointers into string1 and string2, just past the last characters in
4683            each to consider matching.  */
4684         re_char *end_match_1, *end_match_2;
4685
4686         /* Where we are in the data, and the end of the current string.  */
4687         re_char *d, *dend;
4688
4689         /* Where we are in the pattern, and the end of the pattern.  */
4690         unsigned char *p = bufp->buffer;
4691         REGISTER unsigned char *pend = p + bufp->used;
4692
4693         /* Mark the opcode just after a start_memory, so we can test for an
4694            empty subpattern when we get to the stop_memory.  */
4695         re_char *just_past_start_mem = 0;
4696
4697         /* We use this to map every character in the string.  */
4698         RE_TRANSLATE_TYPE translate = bufp->translate;
4699
4700         /* Failure point stack.  Each place that can handle a failure further
4701            down the line pushes a failure point on this stack.  It consists of
4702            restart, regend, and reg_info for all registers corresponding to
4703            the subexpressions we're currently inside, plus the number of such
4704            registers, and, finally, two char *'s.  The first char * is where
4705            to resume scanning the pattern; the second one is where to resume
4706            scanning the strings.  If the latter is zero, the failure point is
4707            a ``dummy''; if a failure happens and the failure point is a dummy,
4708            it gets discarded and the next one is tried.  */
4709 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, this is global.  */
4710         fail_stack_type fail_stack;
4711 #endif
4712 #ifdef REGEX_DEBUG_FLAG
4713         static unsigned failure_id;
4714         int nfailure_points_pushed = 0, nfailure_points_popped = 0;
4715 #endif
4716
4717 #ifdef REL_ALLOC
4718         /* This holds the pointer to the failure stack, when
4719            it is allocated relocatably.  */
4720         fail_stack_elt_t *failure_stack_ptr;
4721 #endif
4722
4723         /* We fill all the registers internally, independent of what we
4724            return, for use in backreferences.  The number here includes
4725            an element for register zero.  */
4726         int num_regs = bufp->re_ngroups + 1;
4727
4728         /* The currently active registers.  */
4729         int lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4730         int highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4731
4732         /* Information on the contents of registers. These are pointers into
4733            the input strings; they record just what was matched (on this
4734            attempt) by a subexpression part of the pattern, that is, the
4735            regnum-th regstart pointer points to where in the pattern we began
4736            matching and the regnum-th regend points to right after where we
4737            stopped matching the regnum-th subexpression.  (The zeroth register
4738            keeps track of what the whole pattern matches.)  */
4739 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, these are global.  */
4740         re_char **regstart, **regend;
4741 #endif
4742
4743         /* If a group that's operated upon by a repetition operator fails to
4744            match anything, then the register for its start will need to be
4745            restored because it will have been set to wherever in the string we
4746            are when we last see its open-group operator.  Similarly for a
4747            register's end.  */
4748 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, these are global.  */
4749         re_char **old_regstart, **old_regend;
4750 #endif
4751
4752         /* The is_active field of reg_info helps us keep track of which (possibly
4753            nested) subexpressions we are currently in. The matched_something
4754            field of reg_info[reg_num] helps us tell whether or not we have
4755            matched any of the pattern so far this time through the reg_num-th
4756            subexpression.  These two fields get reset each time through any
4757            loop their register is in.  */
4758 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, this is global.  */
4759         register_info_type *reg_info;
4760 #endif
4761
4762         /* The following record the register info as found in the above
4763            variables when we find a match better than any we've seen before.
4764            This happens as we backtrack through the failure points, which in
4765            turn happens only if we have not yet matched the entire string. */
4766         unsigned best_regs_set = false;
4767 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, these are global.  */
4768         re_char **best_regstart, **best_regend;
4769 #endif
4770
4771         /* Logically, this is `best_regend[0]'.  But we don't want to have to
4772            allocate space for that if we're not allocating space for anything
4773            else (see below).  Also, we never need info about register 0 for
4774            any of the other register vectors, and it seems rather a kludge to
4775            treat `best_regend' differently than the rest.  So we keep track of
4776            the end of the best match so far in a separate variable.  We
4777            initialize this to NULL so that when we backtrack the first time
4778            and need to test it, it's not garbage.  */
4779         re_char *match_end = NULL;
4780
4781         /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
4782         int set_regs_matched_done = 0;
4783
4784         /* Used when we pop values we don't care about.  */
4785 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, these are global.  */
4786         re_char **reg_dummy;
4787         register_info_type *reg_info_dummy;
4788 #endif
4789
4790 #ifdef REGEX_DEBUG_FLAG
4791         /* Counts the total number of registers pushed.  */
4792         unsigned num_regs_pushed = 0;
4793 #endif
4794
4795         /* 1 if this match ends in the same string (string1 or string2)
4796            as the best previous match.  */
4797         re_bool same_str_p;
4798
4799         /* 1 if this match is the best seen so far.  */
4800         re_bool best_match_p;
4801
4802         DEBUG_PRINT1("\n\nEntering re_match_2.\n");
4803
4804         INIT_FAIL_STACK();
4805
4806 #ifdef MATCH_MAY_ALLOCATE
4807         /* Do not bother to initialize all the register variables if there are
4808            no groups in the pattern, as it takes a fair amount of time.  If
4809            there are groups, we include space for register 0 (the whole
4810            pattern), even though we never use it, since it simplifies the
4811            array indexing.  We should fix this.  */
4812         if (bufp->re_ngroups) {
4813                 regstart = REGEX_TALLOC(num_regs, re_char *);
4814                 regend = REGEX_TALLOC(num_regs, re_char *);
4815                 old_regstart = REGEX_TALLOC(num_regs, re_char *);
4816                 old_regend = REGEX_TALLOC(num_regs, re_char *);
4817                 best_regstart = REGEX_TALLOC(num_regs, re_char *);
4818                 best_regend = REGEX_TALLOC(num_regs, re_char *);
4819                 reg_info = REGEX_TALLOC(num_regs, register_info_type);
4820                 reg_dummy = REGEX_TALLOC(num_regs, re_char *);
4821                 reg_info_dummy = REGEX_TALLOC(num_regs, register_info_type);
4822
4823                 if (!
4824                     (regstart && regend && old_regstart && old_regend
4825                      && reg_info && best_regstart && best_regend && reg_dummy
4826                      && reg_info_dummy)) {
4827                         FREE_VARIABLES();
4828                         return -2;
4829                 }
4830         } else {
4831                 /* We must initialize all our variables to NULL, so that
4832                    `FREE_VARIABLES' doesn't try to free them.  */
4833                 regstart = regend = old_regstart = old_regend = best_regstart
4834                     = best_regend = reg_dummy = NULL;
4835                 reg_info = reg_info_dummy = (register_info_type *) NULL;
4836         }
4837 #endif                          /* MATCH_MAY_ALLOCATE */
4838
4839         /* The starting position is bogus.  */
4840         if (pos < 0 || pos > size1 + size2) {
4841                 FREE_VARIABLES();
4842                 return -1;
4843         }
4844
4845         /* Initialize subexpression text positions to -1 to mark ones that no
4846            start_memory/stop_memory has been seen for. Also initialize the
4847            register information struct.  */
4848         for (mcnt = 1; mcnt < num_regs; mcnt++) {
4849                 regstart[mcnt] = regend[mcnt]
4850                     = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
4851
4852                 REG_MATCH_NULL_STRING_P(reg_info[mcnt]) =
4853                     MATCH_NULL_UNSET_VALUE;
4854                 IS_ACTIVE(reg_info[mcnt]) = 0;
4855                 MATCHED_SOMETHING(reg_info[mcnt]) = 0;
4856                 EVER_MATCHED_SOMETHING(reg_info[mcnt]) = 0;
4857         }
4858         /* We move `string1' into `string2' if the latter's empty -- but not if
4859            `string1' is null.  */
4860         if (size2 == 0 && string1 != NULL) {
4861                 string2 = string1;
4862                 size2 = size1;
4863                 string1 = 0;
4864                 size1 = 0;
4865         }
4866         end1 = string1 + size1;
4867         end2 = string2 + size2;
4868
4869         /* Compute where to stop matching, within the two strings.  */
4870         if (stop <= size1) {
4871                 end_match_1 = string1 + stop;
4872                 end_match_2 = string2;
4873         } else {
4874                 end_match_1 = end1;
4875                 end_match_2 = string2 + stop - size1;
4876         }
4877
4878         /* `p' scans through the pattern as `d' scans through the data.
4879            `dend' is the end of the input string that `d' points within.  `d'
4880            is advanced into the following input string whenever necessary, but
4881            this happens before fetching; therefore, at the beginning of the
4882            loop, `d' can be pointing at the end of a string, but it cannot
4883            equal `string2'.  */
4884         if (size1 > 0 && pos <= size1) {
4885                 d = string1 + pos;
4886                 dend = end_match_1;
4887         } else {
4888                 d = string2 + pos - size1;
4889                 dend = end_match_2;
4890         }
4891
4892         DEBUG_PRINT1("The compiled pattern is: \n");
4893         DEBUG_PRINT_COMPILED_PATTERN(bufp, p, pend);
4894         DEBUG_PRINT1("The string to match is: `");
4895         DEBUG_PRINT_DOUBLE_STRING(d, string1, size1, string2, size2);
4896         DEBUG_PRINT1("'\n");
4897
4898         /* This loops over pattern commands.  It exits by returning from the
4899            function if the match is complete, or it drops through if the match
4900            fails at this starting point in the input data.  */
4901         for (;;) {
4902                 DEBUG_PRINT2("\n0x%lx: ", (long)p);
4903 #ifdef emacs                    /* XEmacs added, w/removal of immediate_quit */
4904                 if (!no_quit_in_re_search)
4905                         QUIT;
4906 #endif
4907
4908                 if (p == pend) {
4909                         /* End of pattern means we might have succeeded.  */
4910                         DEBUG_PRINT1("end of pattern ... ");
4911
4912                         /* If we haven't matched the entire string, and we want
4913                            the longest match, try backtracking.  */
4914                         if (d != end_match_2) {
4915                                 same_str_p = (FIRST_STRING_P(match_end)
4916                                               == MATCHING_IN_FIRST_STRING);
4917
4918                                 /* AIX compiler got confused when this was
4919                                    combined with the previous declaration.  */
4920                                 if (same_str_p)
4921                                         best_match_p = d > match_end;
4922                                 else
4923                                         best_match_p =
4924                                             !MATCHING_IN_FIRST_STRING;
4925
4926                                 DEBUG_PRINT1("backtracking.\n");
4927
4928                                 if (!FAIL_STACK_EMPTY()) {
4929                                         /* More failure points to try.  */
4930
4931                                         /* If exceeds best match so far, save
4932                                            it.  */
4933                                         if (!best_regs_set || best_match_p) {
4934                                                 best_regs_set = true;
4935                                                 match_end = d;
4936
4937                                                 DEBUG_PRINT1
4938                                                     ("\nSAVING match as best so far.\n");
4939
4940                                                 for (mcnt = 1; mcnt < num_regs;
4941                                                      mcnt++) {
4942                                                         best_regstart[mcnt] =
4943                                                             regstart[mcnt];
4944                                                         best_regend[mcnt] =
4945                                                             regend[mcnt];
4946                                                 }
4947                                         }
4948                                         goto fail;
4949                                 }
4950
4951                                 /* If no failure points, don't restore garbage.
4952                                    And if last match is real best match, don't
4953                                    restore second best one. */
4954                                 else if (best_regs_set && !best_match_p) {
4955                                       restore_best_regs:
4956                                         /* Restore best match.  It may happen
4957                                            that `dend == end_match_1' while the
4958                                            restored d is in string2.  For
4959                                            example, the pattern `x.*y.*z'
4960                                            against the strings `x-' and `y-z-',
4961                                            if the two strings are not
4962                                            consecutive in memory.  */
4963                                         DEBUG_PRINT1
4964                                             ("Restoring best registers.\n");
4965
4966                                         d = match_end;
4967                                         dend = ((d >= string1 && d <= end1)
4968                                                 ? end_match_1 : end_match_2);
4969
4970                                         for (mcnt = 1; mcnt < num_regs; mcnt++) {
4971                                                 regstart[mcnt] =
4972                                                     best_regstart[mcnt];
4973                                                 regend[mcnt] =
4974                                                     best_regend[mcnt];
4975                                         }
4976                                 }
4977                         }
4978                         /* d != end_match_2 */
4979                 succeed_label:
4980                         DEBUG_PRINT1("Accepting match.\n");
4981                         {
4982                                 int num_nonshy_regs = bufp->re_nsub + 1;
4983                                 /* If caller wants register contents data back,
4984                                    fill REGS.  */
4985                                 if (regs && !bufp->no_sub) {
4986                                         /* Have the register data arrays been
4987                                            allocated?  */
4988                                         if (bufp->regs_allocated == REGS_UNALLOCATED) { 
4989                                           /* No.  So allocate them with malloc.
4990                                              We need one extra element beyond
4991                                              `num_regs' for the `-1' marker GNU
4992                                              code uses.  */
4993                                                 regs->num_regs = MAX(RE_NREGS, num_nonshy_regs + 1);
4994                                                 regs->start = TALLOC(regs->num_regs, regoff_t);
4995                                                 regs->end = TALLOC(regs->num_regs, regoff_t);
4996                                                 if (regs->start == NULL || regs->end == NULL) {
4997                                                         FREE_VARIABLES ();
4998                                                         return -2;
4999                                                 }
5000                                                 bufp->regs_allocated = REGS_REALLOCATE;
5001                                         } else if (bufp->regs_allocated == REGS_REALLOCATE) { 
5002                                           /* Yes.  If we need more elements than were already
5003                                              allocated, reallocate them.  If we need fewer, just
5004                                              leave it alone.  */
5005                                                 if (regs->num_regs < num_nonshy_regs + 1) {
5006                                                         regs->num_regs = num_nonshy_regs + 1;
5007                                                         RETALLOC(regs->start, regs->num_regs, regoff_t);
5008                                                         RETALLOC(regs->end, regs->num_regs, regoff_t);
5009                                                         if (regs->start == NULL || regs->end == NULL) {
5010                                                                 FREE_VARIABLES();
5011                                                                 return -2;
5012                                                         }
5013                                                 }
5014                                         } else {
5015                                                 /* The braces fend off a "empty body in an else-statement"
5016                                                    warning under GCC when assert expands to nothing.  */
5017                                                 assert (bufp->regs_allocated == REGS_FIXED);
5018                                         }
5019
5020                                         /* Convert the pointer data in `regstart' and `regend' to
5021                                            indices.  Register zero has to be set differently,
5022                                            since we haven't kept track of any info for it.  */
5023                                         if (regs->num_regs > 0) {
5024                                                 regs->start[0] = pos;
5025                                                 regs->end[0] = (MATCHING_IN_FIRST_STRING
5026                                                                 ? ((regoff_t) (d - string1))
5027                                                                 : ((regoff_t) (d - string2 + size1)));
5028                                         }
5029
5030                                         /* Map over the NUM_NONSHY_REGS non-shy internal registers.
5031                                            Copy each into the corresponding external register.
5032                                            N.B. MCNT indexes external registers. */
5033                                         for (mcnt = 1;
5034                                              mcnt < MIN (num_nonshy_regs, regs->num_regs);
5035                                              mcnt++) {
5036                                                 int ireg = bufp->external_to_internal_register[mcnt];
5037
5038                                                 if (REG_UNSET (regstart[ireg]) || REG_UNSET (regend[ireg]))
5039                                                         regs->start[mcnt] = regs->end[mcnt] = -1;
5040                                                 else {
5041                                                         regs->start[mcnt]
5042                                                                 = (regoff_t) POINTER_TO_OFFSET (regstart[ireg]);
5043                                                         regs->end[mcnt]
5044                                                                 = (regoff_t) POINTER_TO_OFFSET (regend[ireg]);
5045                                                 }
5046                                         }
5047                                 } /* regs && !bufp->no_sub */
5048
5049                                 /* If we have regs and the regs structure has
5050                                    more elements than were in the pattern, set
5051                                    the extra elements to -1.  If we
5052                                    (re)allocated the registers, this is the
5053                                    case, because we always allocate enough to
5054                                    have at least one -1 at the end.
5055
5056                                    We do this even when no_sub is set because
5057                                    some applications (XEmacs) reuse register
5058                                    structures which may contain stale
5059                                    information, and permit attempts to access
5060                                    those registers.
5061
5062                                    It would be possible to require the caller to
5063                                    do this, but we'd have to change the API for
5064                                    this function to reflect that, and audit all
5065                                    callers. */
5066                                 if (regs && regs->num_regs > 0)
5067                                         for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++)
5068                                                 regs->start[mcnt] = regs->end[mcnt] = -1;
5069                         }
5070
5071                         DEBUG_PRINT4("%u failure points pushed, %u popped (%u remain).\n",
5072                                       nfailure_points_pushed, nfailure_points_popped,
5073                                       nfailure_points_pushed - nfailure_points_popped);
5074                         DEBUG_PRINT2("%u registers pushed.\n", num_regs_pushed);
5075
5076                         mcnt = d - pos - (MATCHING_IN_FIRST_STRING
5077                                           ? string1
5078                                           : string2 - size1);
5079
5080                         DEBUG_PRINT2("Returning %d from re_match_2.\n", mcnt);
5081
5082                         FREE_VARIABLES();
5083                         return mcnt;
5084                 }
5085
5086                 /* Otherwise match next pattern command.  */
5087                 switch (SWITCH_ENUM_CAST((re_opcode_t) *p++)) {
5088                         /* Ignore these.  Used to ignore the n of succeed_n's
5089                            which currently have n == 0.  */
5090                 case no_op:
5091                         DEBUG_PRINT1("EXECUTING no_op.\n");
5092                         break;
5093
5094                 case succeed:
5095                         DEBUG_PRINT1("EXECUTING succeed.\n");
5096                         goto succeed_label;
5097
5098                         /* Match the next n pattern characters exactly.  The
5099                            following byte in the pattern defines n, and the n
5100                            bytes after that are the characters to match.  */
5101                 case exactn:
5102                         mcnt = *p++;
5103                         DEBUG_PRINT2("EXECUTING exactn %d.\n", mcnt);
5104
5105                         /* This is written out as an if-else so we don't waste
5106                            time testing `translate' inside the loop.  */
5107                         if (TRANSLATE_P(translate)) {
5108                                 do {
5109 #ifdef MULE
5110                                         Emchar pat_ch, buf_ch;
5111                                         Bytecount pat_len;
5112
5113                                         REGEX_PREFETCH();
5114                                         pat_ch = charptr_emchar(p);
5115                                         buf_ch = charptr_emchar(d);
5116                                         if (RE_TRANSLATE(buf_ch) != pat_ch)
5117                                                 goto fail;
5118
5119                                         pat_len = charcount_to_bytecount(p, 1);
5120                                         p += pat_len;
5121                                         INC_CHARPTR(d);
5122
5123                                         mcnt -= pat_len;
5124 #else                           /* not MULE */
5125                                         REGEX_PREFETCH();
5126                                         if ((unsigned char)RE_TRANSLATE(*d++) !=
5127                                             *p++)
5128                                                 goto fail;
5129                                         mcnt--;
5130 #endif
5131                                 }
5132                                 while (mcnt > 0);
5133                         } else {
5134                                 do {
5135                                         REGEX_PREFETCH();
5136                                         if (*d++ != *p++)
5137                                                 goto fail;
5138                                 }
5139                                 while (--mcnt);
5140                         }
5141                         SET_REGS_MATCHED();
5142                         break;
5143
5144                         /* Match any character except possibly a newline or a
5145                            null.  */
5146                 case anychar:
5147                         DEBUG_PRINT1("EXECUTING anychar.\n");
5148
5149                         REGEX_PREFETCH();
5150
5151                         if ((!(bufp->syntax & RE_DOT_NEWLINE)
5152                              && TRANSLATE(*d) == '\n')
5153                             || (bufp->syntax & RE_DOT_NOT_NULL
5154                                 && TRANSLATE(*d) == '\000'))
5155                                 goto fail;
5156
5157                         SET_REGS_MATCHED();
5158                         DEBUG_PRINT2("  Matched `%d'.\n", *d);
5159                         INC_CHARPTR(d); /* XEmacs change */
5160                         break;
5161
5162                 case charset:
5163                 case charset_not: {
5164                         REGISTER unsigned char c;
5165                         re_bool not_p =
5166                                 (re_opcode_t) * (p - 1) == charset_not;
5167
5168                         DEBUG_PRINT2("EXECUTING charset%s.\n",
5169                                      not_p ? "_not" : "");
5170
5171                         REGEX_PREFETCH();
5172                         /* The character to match.  */
5173                         c = TRANSLATE(*d);
5174
5175                         /* Cast to `unsigned' instead of `unsigned char'
5176                            in case the bit list is a full 32 bytes
5177                            long.  */
5178                         if (c < (unsigned)(*p * BYTEWIDTH)
5179                             && p[1 +
5180                                  c / BYTEWIDTH] & (1 << (c %
5181                                                          BYTEWIDTH)))
5182                                 not_p = !not_p;
5183
5184                         p += 1 + *p;
5185
5186                         if (!not_p)
5187                                 goto fail;
5188
5189                         SET_REGS_MATCHED();
5190                         INC_CHARPTR(d); /* XEmacs change */
5191                         break;
5192                 }
5193
5194 #ifdef MULE
5195                 case charset_mule:
5196                 case charset_mule_not: {
5197                         REGISTER Emchar c;
5198                         re_bool not_p =
5199                                 (re_opcode_t) * (p - 1) == charset_mule_not;
5200
5201                         DEBUG_PRINT2("EXECUTING charset_mule%s.\n",
5202                                      not_p ? "_not" : "");
5203
5204                         REGEX_PREFETCH();
5205                         c = charptr_emchar((const Bufbyte *)d);
5206                         /* The character to match.  */
5207                         c = TRANSLATE(c);
5208
5209                         if (EQ
5210                             (Qt,
5211                              unified_range_table_lookup(p, c, Qnil)))
5212                                 not_p = !not_p;
5213
5214                         p += unified_range_table_bytes_used(p);
5215
5216                         if (!not_p)
5217                                 goto fail;
5218
5219                         SET_REGS_MATCHED();
5220                         INC_CHARPTR(d);
5221                         break;
5222                 }
5223 #endif                          /* MULE */
5224
5225                         /* The beginning of a group is represented by
5226                            start_memory.  The arguments are the register number
5227                            in the next byte, and the number of groups inner to
5228                            this one in the next.  The text matched within the
5229                            group is recorded (in the internal registers data
5230                            structure) under the register number.  */
5231                 case start_memory:
5232                         DEBUG_PRINT3("EXECUTING start_memory %d (%d):\n", *p,
5233                                      p[1]);
5234
5235                         /* Find out if this group can match the empty string.  */
5236                         p1 = p; /* To send to group_match_null_string_p.  */
5237
5238                         if (REG_MATCH_NULL_STRING_P(reg_info[*p]) ==
5239                             MATCH_NULL_UNSET_VALUE)
5240                                 REG_MATCH_NULL_STRING_P(reg_info[*p])
5241                                     = group_match_null_string_p(&p1, pend,
5242                                                                 reg_info);
5243
5244                         /* Save the position in the string where we were the
5245                            last time we were at this open-group operator in case
5246                            the group is operated upon by a repetition operator,
5247                            e.g., with `(a*)*b' against `ab'; then we want to
5248                            ignore where we are now in the string in case this
5249                            attempt to match fails.  */
5250                         old_regstart[*p] = REG_MATCH_NULL_STRING_P(reg_info[*p])
5251                             ? REG_UNSET(regstart[*p]) ? d : regstart[*p]
5252                             : regstart[*p];
5253                         DEBUG_PRINT2("  old_regstart: %d\n",
5254                                      POINTER_TO_OFFSET(old_regstart[*p]));
5255
5256                         regstart[*p] = d;
5257                         DEBUG_PRINT2("  regstart: %d\n",
5258                                      POINTER_TO_OFFSET(regstart[*p]));
5259
5260                         IS_ACTIVE(reg_info[*p]) = 1;
5261                         MATCHED_SOMETHING(reg_info[*p]) = 0;
5262
5263                         /* Clear this whenever we change the register activity
5264                            status.  */
5265                         set_regs_matched_done = 0;
5266
5267                         /* This is the new highest active register.  */
5268                         highest_active_reg = *p;
5269
5270                         /* If nothing was active before, this is the new lowest
5271                            active register.  */
5272                         if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
5273                                 lowest_active_reg = *p;
5274
5275                         /* Move past the register number and inner group
5276                            count.  */
5277                         p += 2;
5278                         just_past_start_mem = p;
5279
5280                         break;
5281
5282                         /* The stop_memory opcode represents the end of a group.
5283                            Its arguments are the same as start_memory's: the
5284                            register number, and the number of inner groups.  */
5285                 case stop_memory:
5286                         DEBUG_PRINT3("EXECUTING stop_memory %d (%d):\n", *p,
5287                                      p[1]);
5288
5289                         /* We need to save the string position the last time we
5290                            were at this close-group operator in case the group
5291                            is operated upon by a repetition operator, e.g., with
5292                            `((a*)*(b*)*)*' against `aba'; then we want to ignore
5293                            where we are now in the string in case this attempt
5294                            to match fails.  */
5295                         old_regend[*p] = REG_MATCH_NULL_STRING_P(reg_info[*p])
5296                             ? REG_UNSET(regend[*p]) ? d : regend[*p]
5297                             : regend[*p];
5298                         DEBUG_PRINT2("      old_regend: %d\n",
5299                                      POINTER_TO_OFFSET(old_regend[*p]));
5300
5301                         regend[*p] = d;
5302                         DEBUG_PRINT2("      regend: %d\n",
5303                                      POINTER_TO_OFFSET(regend[*p]));
5304
5305                         /* This register isn't active anymore.  */
5306                         IS_ACTIVE(reg_info[*p]) = 0;
5307
5308                         /* Clear this whenever we change the register activity
5309                            status.  */
5310                         set_regs_matched_done = 0;
5311
5312                         /* If this was the only register active, nothing is
5313                            active anymore.  */
5314                         if (lowest_active_reg == highest_active_reg) {
5315                                 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
5316                                 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
5317                         } else {        /* We must scan for the new highest
5318                                            active register, since it isn't
5319                                            necessarily one less than now:
5320                                            consider (a(b)c(d(e)f)g).  When group
5321                                            3 ends, after the f), the new highest
5322                                            active register is 1.  */
5323                                 unsigned char r = *p - 1;
5324                                 while (r > 0 && !IS_ACTIVE(reg_info[r]))
5325                                         r--;
5326
5327                                 /* If we end up at register zero, that means
5328                                    that we saved the registers as the result of
5329                                    an `on_failure_jump', not a `start_memory',
5330                                    and we jumped to past the innermost
5331                                    `stop_memory'.  For example, in ((.)*) we
5332                                    save registers 1 and 2 as a result of the *,
5333                                    but when we pop back to the second ), we are
5334                                    at the stop_memory 1.  Thus, nothing is
5335                                    active.  */
5336                                 if (r == 0) {
5337                                         lowest_active_reg =
5338                                             NO_LOWEST_ACTIVE_REG;
5339                                         highest_active_reg =
5340                                             NO_HIGHEST_ACTIVE_REG;
5341                                 } else {
5342                                         highest_active_reg = r;
5343
5344                                         /* 98/9/21 jhod: We've also gotta set
5345                                            lowest_active_reg, don't we? */
5346                                         r = 1;
5347                                         while (r < highest_active_reg
5348                                                && !IS_ACTIVE(reg_info[r]))
5349                                                 r++;
5350                                         lowest_active_reg = r;
5351                                 }
5352                         }
5353
5354                         /* If just failed to match something this time around
5355                            with a group that's operated on by a repetition
5356                            operator, try to force exit from the ``loop'', and
5357                            restore the register information for this group that
5358                            we had before trying this last match.  */
5359                         if ((!MATCHED_SOMETHING(reg_info[*p])
5360                              || just_past_start_mem == p - 1)
5361                             && (p + 2) < pend) {
5362                                 re_bool is_a_jump_n = false;
5363
5364                                 p1 = p + 2;
5365                                 mcnt = 0;
5366                                 switch ((unsigned int)(re_opcode_t)*p1++) {
5367                                 case jump_n:
5368                                         is_a_jump_n = true;
5369                                 case pop_failure_jump:
5370                                 case maybe_pop_jump:
5371                                 case jump:
5372                                 case dummy_failure_jump:
5373                                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
5374                                         if (is_a_jump_n)
5375                                                 p1 += 2;
5376                                         break;
5377
5378                                 default:
5379                                         /* do nothing */ ;
5380                                 }
5381                                 p1 += mcnt;
5382
5383                                 /* If the next operation is a jump backwards in
5384                                    the pattern to an on_failure_jump right
5385                                    before the start_memory corresponding to this
5386                                    stop_memory, exit from the loop by forcing a
5387                                    failure after pushing on the stack the
5388                                    on_failure_jump's jump in the pattern, and
5389                                    d.  */
5390                                 if (mcnt < 0
5391                                     && (re_opcode_t) * p1 == on_failure_jump
5392                                     && (re_opcode_t) p1[3] == start_memory
5393                                     && p1[4] == *p) {
5394                                         /* If this group ever matched anything,
5395                                            then restore what its registers were
5396                                            before trying this last failed match,
5397                                            e.g., with `(a*)*b' against `ab' for
5398                                            regstart[1], and, e.g., with
5399                                            `((a*)*(b*)*)*' against `aba' for
5400                                            regend[3].
5401
5402                                            Also restore the registers for inner
5403                                            groups for, e.g., `((a*)(b*))*'
5404                                            against `aba' (register 3 would
5405                                            otherwise get trashed).  */
5406
5407                                         if (EVER_MATCHED_SOMETHING
5408                                             (reg_info[*p])) {
5409                                                 int r;
5410
5411                                                 EVER_MATCHED_SOMETHING(reg_info
5412                                                                        [*p]) =
5413                                                     0;
5414
5415                                                 /* Restore this and inner
5416                                                    groups' (if any)
5417                                                    registers.  */
5418                                                 for (r = *p; r < *p + *(p + 1);
5419                                                      r++) {
5420                                                         regstart[r] =
5421                                                             old_regstart[r];
5422
5423                                                         /* xx why this test?  */
5424                                                         if (old_regend[r] >=
5425                                                             regstart[r])
5426                                                                 regend[r] =
5427                                                                     old_regend
5428                                                                     [r];
5429                                                 }
5430                                         }
5431                                         p1++;
5432                                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
5433                                         PUSH_FAILURE_POINT(p1 + mcnt, d, -2);
5434
5435                                         goto fail;
5436                                 }
5437                         }
5438
5439                         /* Move past the register number and the inner group
5440                            count.  */
5441                         p += 2;
5442                         break;
5443
5444                         /* \<digit> has been turned into a `duplicate' command
5445                            which is followed by the numeric value of <digit> as
5446                            the register number.  (Already passed through
5447                            external-to-internal-register mapping, so it refers
5448                            to the actual group number, not the non-shy-only
5449                            numbering used in the external world.) */
5450                 case duplicate:
5451                         {
5452                                 REGISTER re_char *d2, *dend2;
5453                                 /* Get which register to match against.  */
5454                                 int regno = *p++;
5455                                 DEBUG_PRINT2("EXECUTING duplicate %d.\n",
5456                                              regno);
5457
5458                                 /* Can't back reference a group which we've
5459                                    never matched.  */
5460                                 if (REG_UNSET(regstart[regno])
5461                                     || REG_UNSET(regend[regno]))
5462                                         goto fail;
5463
5464                                 /* Where in input to try to start matching.  */
5465                                 d2 = regstart[regno];
5466
5467                                 /* Where to stop matching; if both the place to
5468                                    start and the place to stop matching are in
5469                                    the same string, then set to the place to
5470                                    stop, otherwise, for now have to use the end
5471                                    of the first string.  */
5472
5473                                 dend2 = ((FIRST_STRING_P(regstart[regno])
5474                                           == FIRST_STRING_P(regend[regno]))
5475                                          ? regend[regno] : end_match_1);
5476                                 for (;;) {
5477                                         /* If necessary, advance to next segment
5478                                            in register contents.  */
5479                                         while (d2 == dend2) {
5480                                                 if (dend2 == end_match_2)
5481                                                         break;
5482                                                 if (dend2 == regend[regno])
5483                                                         break;
5484
5485                                                 /* End of string1 => advance to
5486                                                    string2. */
5487                                                 d2 = string2;
5488                                                 dend2 = regend[regno];
5489                                         }
5490                                         /* At end of register contents =>
5491                                            success */
5492                                         if (d2 == dend2)
5493                                                 break;
5494
5495                                         /* If necessary, advance to next segment
5496                                            in data.  */
5497                                         REGEX_PREFETCH();
5498
5499                                         /* How many characters left in this segment to match.  */
5500                                         mcnt = dend - d;
5501
5502                                         /* Want how many consecutive characters
5503                                            we can match in one shot, so, if
5504                                            necessary, adjust the count.  */
5505                                         if (mcnt > dend2 - d2)
5506                                                 mcnt = dend2 - d2;
5507
5508                                         /* Compare that many; failure if
5509                                            mismatch, else move past them.  */
5510                                         if (TRANSLATE_P(translate)
5511                                             ? bcmp_translate(
5512                                                     (const unsigned char*)d,
5513                                                     (const unsigned char*)d2,
5514                                                     mcnt, translate)
5515                                             : memcmp(d, d2, mcnt)) {
5516                                                 goto fail;
5517                                         }
5518                                         d += mcnt, d2 += mcnt;
5519
5520                                         /* Do this because we've match some
5521                                            characters.  */
5522                                         SET_REGS_MATCHED();
5523                                 }
5524                         }
5525                         break;
5526
5527                         /* begline matches the empty string at the beginning of
5528                            the string (unless `not_bol' is set in `bufp'), and,
5529                            if `newline_anchor' is set, after newlines.  */
5530                 case begline:
5531                         DEBUG_PRINT1("EXECUTING begline.\n");
5532
5533                         if (AT_STRINGS_BEG(d)) {
5534                                 if (!bufp->not_bol)
5535                                         break;
5536                         } else if (d[-1] == '\n' && bufp->newline_anchor) {
5537                                 break;
5538                         }
5539                         /* In all other cases, we fail.  */
5540                         goto fail;
5541
5542                         /* endline is the dual of begline.  */
5543                 case endline:
5544                         DEBUG_PRINT1("EXECUTING endline.\n");
5545
5546                         if (AT_STRINGS_END(d)) {
5547                                 if (!bufp->not_eol)
5548                                         break;
5549                         }
5550
5551                         /* We have to ``prefetch'' the next character.  */
5552                         else if ((d == end1 ? *string2 : *d) == '\n'
5553                                  && bufp->newline_anchor) {
5554                                 break;
5555                         }
5556                         goto fail;
5557
5558                         /* Match at the very beginning of the data.  */
5559                 case begbuf:
5560                         DEBUG_PRINT1("EXECUTING begbuf.\n");
5561                         if (AT_STRINGS_BEG(d))
5562                                 break;
5563                         goto fail;
5564
5565                         /* Match at the very end of the data.  */
5566                 case endbuf:
5567                         DEBUG_PRINT1("EXECUTING endbuf.\n");
5568                         if (AT_STRINGS_END(d))
5569                                 break;
5570                         goto fail;
5571
5572                         /* on_failure_keep_string_jump is used to optimize
5573                            `.*\n'.  It pushes NULL as the value for the string
5574                            on the stack.  Then `pop_failure_point' will keep the
5575                            current value for the string, instead of restoring
5576                            it.  To see why, consider matching `foo\nbar' against
5577                            `.*\n'.  The .* matches the foo; then the . fails
5578                            against the \n.  But the next thing we want to do is
5579                            match the \n against the \n; if we restored the
5580                            string value, we would be back at the foo.
5581
5582                            Because this is used only in specific cases, we don't
5583                            need to check all the things that `on_failure_jump'
5584                            does, to make sure the right things get saved on the
5585                            stack.  Hence we don't share its code.  The only
5586                            reason to push anything on the stack at all is that
5587                            otherwise we would have to change `anychar's code to
5588                            do something besides goto fail in this case; that
5589                            seems worse than this.  */
5590                 case on_failure_keep_string_jump:
5591                         DEBUG_PRINT1("EXECUTING on_failure_keep_string_jump");
5592
5593                         EXTRACT_NUMBER_AND_INCR(mcnt, p);
5594                         DEBUG_PRINT3(" %d (to 0x%lx):\n", mcnt,
5595                                      (long)(p + mcnt));
5596
5597                         PUSH_FAILURE_POINT(p + mcnt, (unsigned char *)0, -2);
5598                         break;
5599
5600                         /* Uses of on_failure_jump:
5601
5602                            Each alternative starts with an on_failure_jump that
5603                            points to the beginning of the next alternative.
5604                            Each alternative except the last ends with a jump
5605                            that in effect jumps past the rest of the
5606                            alternatives.  (They really jump to the ending jump
5607                            of the following alternative, because tensioning
5608                            these jumps is a hassle.)
5609
5610                            Repeats start with an on_failure_jump that points
5611                            past both the repetition text and either the
5612                            following jump or pop_failure_jump back to this
5613                            on_failure_jump.  */
5614                 case on_failure_jump:
5615                       on_failure:
5616                         DEBUG_PRINT1("EXECUTING on_failure_jump");
5617
5618                         EXTRACT_NUMBER_AND_INCR(mcnt, p);
5619                         DEBUG_PRINT3(" %d (to 0x%lx)", mcnt, (long)(p + mcnt));
5620
5621                         /* If this on_failure_jump comes right before a group
5622                            (i.e., the original * applied to a group), save the
5623                            information for that group and all inner ones, so
5624                            that if we fail back to this point, the group's
5625                            information will be correct.  For example, in
5626                            \(a*\)*\1, we need the preceding group, and in
5627                            \(\(a*\)b*\)\2, we need the inner group.  */
5628
5629                         /* We can't use `p' to check ahead because we push
5630                            a failure point to `p + mcnt' after we do this.  */
5631                         p1 = p;
5632
5633                         /* We need to skip no_op's before we look for the
5634                            start_memory in case this on_failure_jump is
5635                            happening as the result of a completed succeed_n, as
5636                            in \(a\)\{1,3\}b\1 against aba.  */
5637                         while (p1 < pend && (re_opcode_t) * p1 == no_op)
5638                                 p1++;
5639
5640                         if (p1 < pend && (re_opcode_t) * p1 == start_memory) {
5641                                 /* We have a new highest active register now.
5642                                    This will get reset at the start_memory we
5643                                    are about to get to, but we will have saved
5644                                    all the registers relevant to this repetition
5645                                    op, as described above.  */
5646                                 highest_active_reg = *(p1 + 1) + *(p1 + 2);
5647                                 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
5648                                         lowest_active_reg = *(p1 + 1);
5649                         }
5650
5651                         DEBUG_PRINT1(":\n");
5652                         PUSH_FAILURE_POINT(p + mcnt, d, -2);
5653                         break;
5654
5655                         /* A smart repeat ends with `maybe_pop_jump'.
5656                            We change it to either `pop_failure_jump' or `jump'.  */
5657                 case maybe_pop_jump:
5658                         EXTRACT_NUMBER_AND_INCR(mcnt, p);
5659                         DEBUG_PRINT2("EXECUTING maybe_pop_jump %d.\n", mcnt);
5660                         {
5661                                 REGISTER unsigned char *p2 = p;
5662
5663                                 /* Compare the beginning of the repeat with what
5664                                    in the pattern follows its end. If we can
5665                                    establish that there is nothing that they
5666                                    would both match, i.e., that we would have to
5667                                    backtrack because of (as in, e.g., `a*a')
5668                                    then we can change to pop_failure_jump,
5669                                    because we'll never have to backtrack.
5670
5671                                    This is not true in the case of alternatives:
5672                                    in `(a|ab)*' we do need to backtrack to the
5673                                    `ab' alternative (e.g., if the string was
5674                                    `ab').  But instead of trying to detect that
5675                                    here, the alternative has put on a dummy
5676                                    failure point which is what we will end up
5677                                    popping.  */
5678
5679                                 /* Skip over open/close-group commands.  If what
5680                                    follows this loop is a ...+ construct, look
5681                                    at what begins its body, since we will have
5682                                    to match at least one of that.  */
5683                                 while (1) {
5684                                         if (p2 + 2 < pend
5685                                             && ((re_opcode_t) * p2 ==
5686                                                 stop_memory
5687                                                 || (re_opcode_t) * p2 ==
5688                                                 start_memory))
5689                                                 p2 += 3;
5690                                         else if (p2 + 6 < pend
5691                                                  && (re_opcode_t) * p2 ==
5692                                                  dummy_failure_jump)
5693                                                 p2 += 6;
5694                                         else
5695                                                 break;
5696                                 }
5697
5698                                 p1 = p + mcnt;
5699                                 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
5700                                    to the `maybe_finalize_jump' of this case.  Examine what
5701                                    follows.  */
5702
5703                                 /* If we're at the end of the pattern, we can change.  */
5704                                 if (p2 == pend) {
5705                                         /* Consider what happens when matching ":\(.*\)"
5706                                            against ":/".  I don't really understand this code
5707                                            yet.  */
5708                                         p[-3] = (unsigned char)pop_failure_jump;
5709                                         DEBUG_PRINT1
5710                                             ("  End of pattern: change to `pop_failure_jump'.\n");
5711                                 }
5712
5713                                 else if ((re_opcode_t) * p2 == exactn
5714                                          || (bufp->newline_anchor
5715                                              && (re_opcode_t) * p2 ==
5716                                              endline)) {
5717                                         REGISTER unsigned char c =
5718                                             *p2 ==
5719                                             (unsigned char)endline ? '\n' :
5720                                             p2[2];
5721
5722                                         if ((re_opcode_t) p1[3] == exactn
5723                                             && p1[5] != c) {
5724                                                 p[-3] =
5725                                                     (unsigned char)
5726                                                     pop_failure_jump;
5727                                                 DEBUG_PRINT3
5728                                                     ("  %c != %c => pop_failure_jump.\n",
5729                                                      c, p1[5]);
5730                                         }
5731
5732                                         else if ((re_opcode_t) p1[3] == charset
5733                                                  || (re_opcode_t) p1[3] ==
5734                                                  charset_not) {
5735                                                 int not_p =
5736                                                     (re_opcode_t) p1[3] ==
5737                                                     charset_not;
5738
5739                                                 if (c <
5740                                                     (unsigned char)(p1[4] *
5741                                                                     BYTEWIDTH)
5742                                                     && p1[5 +
5743                                                           c /
5744                                                           BYTEWIDTH] & (1 << (c
5745                                                                               %
5746                                                                               BYTEWIDTH)))
5747                                                         not_p = !not_p;
5748
5749                                                 /* `not_p' is equal to 1 if c would match, which means
5750                                                    that we can't change to pop_failure_jump.  */
5751                                                 if (!not_p) {
5752                                                         p[-3] =
5753                                                             (unsigned char)
5754                                                             pop_failure_jump;
5755                                                         DEBUG_PRINT1
5756                                                             ("  No match => pop_failure_jump.\n");
5757                                                 }
5758                                         }
5759                                 } else if ((re_opcode_t) * p2 == charset) {
5760 #ifdef REGEX_DEBUG_FLAG
5761                                         REGISTER unsigned char c
5762                                             =
5763                                             *p2 ==
5764                                             (unsigned char)endline ? '\n' :
5765                                             p2[2];
5766 #endif
5767
5768                                         if ((re_opcode_t) p1[3] == exactn
5769                                             && !((int)p2[1] * BYTEWIDTH >
5770                                                  (int)p1[5]
5771                                                  && (p2[2 + p1[5] / BYTEWIDTH]
5772                                                      & (1 <<
5773                                                         (p1[5] %
5774                                                          BYTEWIDTH))))) {
5775                                                 p[-3] =
5776                                                     (unsigned char)
5777                                                     pop_failure_jump;
5778                                                 DEBUG_PRINT3
5779                                                     ("  %c != %c => pop_failure_jump.\n",
5780                                                      c, p1[5]);
5781                                         }
5782
5783                                         else if ((re_opcode_t) p1[3] ==
5784                                                  charset_not) {
5785                                                 int idx;
5786                                                 /* We win if the charset_not inside the loop
5787                                                    lists every character listed in the charset after.  */
5788                                                 for (idx = 0; idx < (int)p2[1];
5789                                                      idx++)
5790                                                         if (!
5791                                                             (p2[2 + idx] == 0
5792                                                              || (idx <
5793                                                                  (int)p1[4]
5794                                                                  &&
5795                                                                  ((p2[2 + idx] &
5796                                                                    ~p1[5 +
5797                                                                        idx]) ==
5798                                                                   0))))
5799                                                                 break;
5800
5801                                                 if (idx == p2[1]) {
5802                                                         p[-3] =
5803                                                             (unsigned char)
5804                                                             pop_failure_jump;
5805                                                         DEBUG_PRINT1
5806                                                             ("  No match => pop_failure_jump.\n");
5807                                                 }
5808                                         } else if ((re_opcode_t) p1[3] ==
5809                                                    charset) {
5810                                                 int idx;
5811                                                 /* We win if the charset inside the loop
5812                                                    has no overlap with the one after the loop.  */
5813                                                 for (idx = 0;
5814                                                      idx < (int)p2[1]
5815                                                      && idx < (int)p1[4]; idx++)
5816                                                         if ((p2[2 + idx] &
5817                                                              p1[5 + idx]) != 0)
5818                                                                 break;
5819
5820                                                 if (idx == p2[1]
5821                                                     || idx == p1[4]) {
5822                                                         p[-3] =
5823                                                             (unsigned char)
5824                                                             pop_failure_jump;
5825                                                         DEBUG_PRINT1
5826                                                             ("  No match => pop_failure_jump.\n");
5827                                                 }
5828                                         }
5829                                 }
5830                         }
5831                         p -= 2; /* Point at relative address again.  */
5832                         if ((re_opcode_t) p[-1] != pop_failure_jump) {
5833                                 p[-1] = (unsigned char)jump;
5834                                 DEBUG_PRINT1("  Match => jump.\n");
5835                                 goto unconditional_jump;
5836                         }
5837                         /* Note fall through.  */
5838
5839                         /* The end of a simple repeat has a pop_failure_jump
5840                            back to its matching on_failure_jump, where the
5841                            latter will push a failure point.  The
5842                            pop_failure_jump takes off failure points put on by
5843                            this pop_failure_jump's matching on_failure_jump; we
5844                            got through the pattern to here from the matching
5845                            on_failure_jump, so didn't fail.  */
5846                 case pop_failure_jump: {
5847                         /* We need to pass separate storage for the
5848                            lowest and highest registers, even though we
5849                            don't care about the actual values.
5850                            Otherwise, we will restore only one register
5851                            from the stack, since lowest will == highest
5852                            in `pop_failure_point'.  */
5853                         int dummy_low_reg, dummy_high_reg;
5854                         re_char *pdummy;
5855                         re_char *sdummy = NULL;
5856
5857                         DEBUG_PRINT1("EXECUTING pop_failure_jump.\n");
5858                         POP_FAILURE_POINT(sdummy, pdummy,
5859                                           dummy_low_reg, dummy_high_reg,
5860                                           reg_dummy, reg_dummy,
5861                                           reg_info_dummy);
5862                 }
5863                         /* Note fall through.  */
5864
5865                         /* Unconditionally jump (without popping any failure
5866                            points).  */
5867                 case jump:
5868                 unconditional_jump:
5869                         /* Get the amount to jump. */
5870                         EXTRACT_NUMBER_AND_INCR(mcnt, p);
5871                         DEBUG_PRINT2("EXECUTING jump %d ", mcnt);
5872                         p += mcnt;      /* Do the jump.  */
5873                         DEBUG_PRINT2("(to 0x%lx).\n", (long)p);
5874                         break;
5875
5876                         /* We need this opcode so we can detect where alternatives end
5877                            in `group_match_null_string_p' et al.  */
5878                 case jump_past_alt:
5879                         DEBUG_PRINT1("EXECUTING jump_past_alt.\n");
5880                         goto unconditional_jump;
5881
5882                         /* Normally, the on_failure_jump pushes a failure point, which
5883                            then gets popped at pop_failure_jump.  We will end up at
5884                            pop_failure_jump, also, and with a pattern of, say, `a+', we
5885                            are skipping over the on_failure_jump, so we have to push
5886                            something meaningless for pop_failure_jump to pop.  */
5887                 case dummy_failure_jump:
5888                         DEBUG_PRINT1("EXECUTING dummy_failure_jump.\n");
5889                         /* It doesn't matter what we push for the string here.  What
5890                            the code at `fail' tests is the value for the pattern.  */
5891                         PUSH_FAILURE_POINT(NULL, NULL, -2);
5892                         goto unconditional_jump;
5893
5894                         /* At the end of an alternative, we need to push a dummy failure
5895                            point in case we are followed by a `pop_failure_jump', because
5896                            we don't want the failure point for the alternative to be
5897                            popped.  For example, matching `(a|ab)*' against `aab'
5898                            requires that we match the `ab' alternative.  */
5899                 case push_dummy_failure:
5900                         DEBUG_PRINT1("EXECUTING push_dummy_failure.\n");
5901                         /* See comments just above at `dummy_failure_jump' about the
5902                            two zeroes.  */
5903                         PUSH_FAILURE_POINT(NULL, NULL, -2);
5904                         break;
5905
5906                         /* Have to succeed matching what follows at least n times.
5907                            After that, handle like `on_failure_jump'.  */
5908                 case succeed_n:
5909                         EXTRACT_NUMBER(mcnt, p + 2);
5910                         DEBUG_PRINT2("EXECUTING succeed_n %d.\n", mcnt);
5911
5912                         assert(mcnt >= 0);
5913                         /* Originally, this is how many times we HAVE to succeed.  */
5914                         if (mcnt > 0) {
5915                                 mcnt--;
5916                                 p += 2;
5917                                 STORE_NUMBER_AND_INCR(p, mcnt);
5918                                 DEBUG_PRINT3("  Setting 0x%lx to %d.\n",
5919                                              (long)p, mcnt);
5920                         } else if (mcnt == 0) {
5921                                 DEBUG_PRINT2
5922                                     ("  Setting two bytes from 0x%lx to no_op.\n",
5923                                      (long)(p + 2));
5924                                 p[2] = (unsigned char)no_op;
5925                                 p[3] = (unsigned char)no_op;
5926                                 goto on_failure;
5927                         }
5928                         break;
5929
5930                 case jump_n:
5931                         EXTRACT_NUMBER(mcnt, p + 2);
5932                         DEBUG_PRINT2("EXECUTING jump_n %d.\n", mcnt);
5933
5934                         /* Originally, this is how many times we CAN jump.  */
5935                         if (mcnt) {
5936                                 mcnt--;
5937                                 STORE_NUMBER(p + 2, mcnt);
5938                                 goto unconditional_jump;
5939                         }
5940                         /* If don't have to jump any more, skip over the rest of command.  */
5941                         else
5942                                 p += 4;
5943                         break;
5944
5945                 case set_number_at:
5946                         {
5947                                 DEBUG_PRINT1("EXECUTING set_number_at.\n");
5948
5949                                 EXTRACT_NUMBER_AND_INCR(mcnt, p);
5950                                 p1 = p + mcnt;
5951                                 EXTRACT_NUMBER_AND_INCR(mcnt, p);
5952                                 DEBUG_PRINT3("  Setting 0x%lx to %d.\n",
5953                                              (long)p1, mcnt);
5954                                 STORE_NUMBER(p1, mcnt);
5955                                 break;
5956                         }
5957
5958                 case wordbound:
5959                         DEBUG_PRINT1("EXECUTING wordbound.\n");
5960                         should_succeed = 1;
5961                       matchwordbound:
5962                         {
5963                                 /* XEmacs change */
5964                                 /* Straightforward and (I hope) correct implementation.
5965                                    Probably should be optimized by arranging to compute
5966                                    pos only once. */
5967                                 /* emch1 is the character before d, syn1 is the syntax of
5968                                    emch1, emch2 is the character at d, and syn2 is the
5969                                    syntax of emch2. */
5970                                 Emchar emch1, emch2;
5971                                 /* GCC isn't smart enough to see these are initialized if used. */
5972                                 int syn1 = 0, syn2 = 0;
5973                                 re_char *d_before, *d_after;
5974                                 int result,
5975                                     at_beg = AT_STRINGS_BEG(d),
5976                                     at_end = AT_STRINGS_END(d);
5977 #ifdef emacs
5978                                 int xpos;
5979 #endif
5980
5981                                 if (at_beg && at_end) {
5982                                         result = 0;
5983                                 } else {
5984                                         if (!at_beg) {
5985                                                 d_before =
5986                                                     POS_BEFORE_GAP_UNSAFE(d);
5987                                                 DEC_CHARPTR(d_before);
5988                                                 emch1 =
5989                                                     charptr_emchar(d_before);
5990 #ifdef emacs
5991                                                 xpos =
5992                                                     SYNTAX_CACHE_BYTE_TO_CHAR
5993                                                     (PTR_TO_OFFSET(d)) - 1;
5994                                                 UPDATE_SYNTAX_CACHE(xpos);
5995 #endif
5996                                                 syn1 = SYNTAX_FROM_CACHE
5997                                                     (XCHAR_TABLE
5998                                                      (regex_emacs_buffer->
5999                                                       mirror_syntax_table),
6000                                                      emch1);
6001                                         }
6002                                         if (!at_end) {
6003                                                 d_after =
6004                                                     POS_AFTER_GAP_UNSAFE(d);
6005                                                 emch2 = charptr_emchar(d_after);
6006 #ifdef emacs
6007                                                 xpos =
6008                                                     SYNTAX_CACHE_BYTE_TO_CHAR
6009                                                     (PTR_TO_OFFSET(d));
6010                                                 UPDATE_SYNTAX_CACHE_FORWARD(xpos
6011                                                                             +
6012                                                                             1);
6013 #endif
6014                                                 syn2 = SYNTAX_FROM_CACHE
6015                                                     (XCHAR_TABLE
6016                                                      (regex_emacs_buffer->
6017                                                       mirror_syntax_table),
6018                                                      emch2);
6019                                         }
6020
6021                                         if (at_beg)
6022                                                 result = (syn2 == Sword);
6023                                         else if (at_end)
6024                                                 result = (syn1 == Sword);
6025                                         else
6026                                                 result =
6027                                                     ((syn1 == Sword) !=
6028                                                      (syn2 == Sword));
6029                                 }
6030
6031                                 if (result == should_succeed)
6032                                         break;
6033                                 goto fail;
6034                         }
6035
6036                 case notwordbound:
6037                         DEBUG_PRINT1("EXECUTING notwordbound.\n");
6038                         should_succeed = 0;
6039                         goto matchwordbound;
6040
6041                 case wordbeg:
6042                         DEBUG_PRINT1("EXECUTING wordbeg.\n");
6043                         if (AT_STRINGS_END(d))
6044                                 goto fail;
6045                         {
6046                                 /* XEmacs: this originally read:
6047
6048                                    if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
6049                                    break;
6050
6051                                  */
6052                                 re_char *dtmp = POS_AFTER_GAP_UNSAFE(d);
6053                                 Emchar emch = charptr_emchar(dtmp);
6054 #ifdef emacs
6055                                 int charpos =
6056                                     SYNTAX_CACHE_BYTE_TO_CHAR(PTR_TO_OFFSET(d));
6057                                 UPDATE_SYNTAX_CACHE(charpos);
6058 #endif
6059                                 if (SYNTAX_FROM_CACHE
6060                                     (XCHAR_TABLE
6061                                      (regex_emacs_buffer->mirror_syntax_table),
6062                                      emch) != Sword)
6063                                         goto fail;
6064                                 if (AT_STRINGS_BEG(d))
6065                                         break;
6066                                 dtmp = POS_BEFORE_GAP_UNSAFE(d);
6067                                 DEC_CHARPTR(dtmp);
6068                                 emch = charptr_emchar(dtmp);
6069 #ifdef emacs
6070                                 UPDATE_SYNTAX_CACHE_BACKWARD(charpos - 1);
6071 #endif
6072                                 if (SYNTAX_FROM_CACHE
6073                                     (XCHAR_TABLE
6074                                      (regex_emacs_buffer->mirror_syntax_table),
6075                                      emch) != Sword)
6076                                         break;
6077                                 goto fail;
6078                         }
6079
6080                 case wordend:
6081                         DEBUG_PRINT1("EXECUTING wordend.\n");
6082                         if (AT_STRINGS_BEG(d))
6083                                 goto fail;
6084                         {
6085                                 /* XEmacs: this originally read:
6086
6087                                    if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
6088                                    && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
6089                                    break;
6090
6091                                    The or condition is incorrect (reversed).
6092                                  */
6093                                 re_char *dtmp;
6094                                 Emchar emch;
6095 #ifdef emacs
6096                                 int charpos =
6097                                     SYNTAX_CACHE_BYTE_TO_CHAR(PTR_TO_OFFSET(d))
6098                                     - 1;
6099                                 UPDATE_SYNTAX_CACHE(charpos);
6100 #endif
6101                                 dtmp = POS_BEFORE_GAP_UNSAFE(d);
6102                                 DEC_CHARPTR(dtmp);
6103                                 emch = charptr_emchar(dtmp);
6104                                 if (SYNTAX_FROM_CACHE
6105                                     (XCHAR_TABLE
6106                                      (regex_emacs_buffer->mirror_syntax_table),
6107                                      emch) != Sword)
6108                                         goto fail;
6109                                 if (AT_STRINGS_END(d))
6110                                         break;
6111                                 dtmp = POS_AFTER_GAP_UNSAFE(d);
6112                                 emch = charptr_emchar(dtmp);
6113 #ifdef emacs
6114                                 UPDATE_SYNTAX_CACHE_FORWARD(charpos + 1);
6115 #endif
6116                                 if (SYNTAX_FROM_CACHE
6117                                     (XCHAR_TABLE
6118                                      (regex_emacs_buffer->mirror_syntax_table),
6119                                      emch) != Sword)
6120                                         break;
6121                                 goto fail;
6122                         }
6123
6124 #ifdef emacs
6125                 case before_dot:
6126                         DEBUG_PRINT1("EXECUTING before_dot.\n");
6127                         if (!
6128                             (NILP(regex_match_object)
6129                              || BUFFERP(regex_match_object))
6130                             || (BUF_PTR_BYTE_POS(regex_emacs_buffer, d)
6131                                 >= BUF_PT(regex_emacs_buffer)))
6132                                 goto fail;
6133                         break;
6134
6135                 case at_dot:
6136                         DEBUG_PRINT1("EXECUTING at_dot.\n");
6137                         if (!
6138                             (NILP(regex_match_object)
6139                              || BUFFERP(regex_match_object))
6140                             || (BUF_PTR_BYTE_POS(regex_emacs_buffer, d)
6141                                 != BUF_PT(regex_emacs_buffer)))
6142                                 goto fail;
6143                         break;
6144
6145                 case after_dot:
6146                         DEBUG_PRINT1("EXECUTING after_dot.\n");
6147                         if (!
6148                             (NILP(regex_match_object)
6149                              || BUFFERP(regex_match_object))
6150                             || (BUF_PTR_BYTE_POS(regex_emacs_buffer, d)
6151                                 <= BUF_PT(regex_emacs_buffer)))
6152                                 goto fail;
6153                         break;
6154 #if 0                           /* not emacs19 */
6155                 case at_dot:
6156                         DEBUG_PRINT1("EXECUTING at_dot.\n");
6157                         if (BUF_PTR_BYTE_POS
6158                             (regex_emacs_buffer,
6159                              (unsigned char *)d) + 1 !=
6160                             BUF_PT(regex_emacs_buffer))
6161                                 goto fail;
6162                         break;
6163 #endif                          /* not emacs19 */
6164
6165                 case syntaxspec:
6166                         DEBUG_PRINT2("EXECUTING syntaxspec %d.\n", mcnt);
6167                         mcnt = *p++;
6168                         goto matchsyntax;
6169
6170                 case wordchar:
6171                         DEBUG_PRINT1("EXECUTING Emacs wordchar.\n");
6172                         mcnt = (int)Sword;
6173                       matchsyntax:
6174                         should_succeed = 1;
6175                       matchornotsyntax:
6176                         {
6177                                 int matches;
6178                                 Emchar emch;
6179
6180                                 REGEX_PREFETCH();
6181 #ifdef emacs
6182                                 {
6183                                         int charpos =
6184                                             SYNTAX_CACHE_BYTE_TO_CHAR
6185                                             (PTR_TO_OFFSET(d));
6186                                         UPDATE_SYNTAX_CACHE(charpos);
6187                                 }
6188 #endif
6189
6190                                 emch = charptr_emchar((const Bufbyte *)d);
6191                                 matches =
6192                                     (SYNTAX_FROM_CACHE
6193                                      (XCHAR_TABLE
6194                                       (regex_emacs_buffer->mirror_syntax_table),
6195                                       emch) == (enum syntaxcode)mcnt);
6196                                 INC_CHARPTR(d);
6197                                 if (matches != should_succeed)
6198                                         goto fail;
6199                                 SET_REGS_MATCHED();
6200                         }
6201                         break;
6202
6203                 case notsyntaxspec:
6204                         DEBUG_PRINT2("EXECUTING notsyntaxspec %d.\n", mcnt);
6205                         mcnt = *p++;
6206                         goto matchnotsyntax;
6207
6208                 case notwordchar:
6209                         DEBUG_PRINT1("EXECUTING Emacs notwordchar.\n");
6210                         mcnt = (int)Sword;
6211                       matchnotsyntax:
6212                         should_succeed = 0;
6213                         goto matchornotsyntax;
6214
6215 #ifdef MULE
6216 /* 97/2/17 jhod Mule category code patch */
6217                 case categoryspec:
6218                         should_succeed = 1;
6219                       matchornotcategory:
6220                         {
6221                                 Emchar emch;
6222
6223                                 mcnt = *p++;
6224                                 REGEX_PREFETCH();
6225                                 emch = charptr_emchar((const Bufbyte *)d);
6226                                 INC_CHARPTR(d);
6227                                 if (check_category_char
6228                                     (emch, regex_emacs_buffer->category_table,
6229                                      mcnt, should_succeed))
6230                                         goto fail;
6231                                 SET_REGS_MATCHED();
6232                         }
6233                         break;
6234
6235                 case notcategoryspec:
6236                         should_succeed = 0;
6237                         goto matchornotcategory;
6238 /* end of category patch */
6239 #endif                          /* MULE */
6240 #else                           /* not emacs */
6241                 case wordchar:
6242                         DEBUG_PRINT1("EXECUTING non-Emacs wordchar.\n");
6243                         REGEX_PREFETCH();
6244                         if (!WORDCHAR_P_UNSAFE((int)(*d)))
6245                                 goto fail;
6246                         SET_REGS_MATCHED();
6247                         d++;
6248                         break;
6249
6250                 case notwordchar:
6251                         DEBUG_PRINT1("EXECUTING non-Emacs notwordchar.\n");
6252                         REGEX_PREFETCH();
6253                         if (!WORDCHAR_P_UNSAFE((int)(*d)))
6254                                 goto fail;
6255                         SET_REGS_MATCHED();
6256                         d++;
6257                         break;
6258 #endif                          /* emacs */
6259
6260                 default:
6261                         abort();
6262                 }
6263                 /* Successfully executed one pattern command; keep going.  */
6264                 continue;
6265
6266                 /* We goto here if a matching operation fails. */
6267         fail:
6268                 if (!FAIL_STACK_EMPTY()) {
6269                         /* A restart point is known.  Restore to that state.  */
6270                         DEBUG_PRINT1("\nFAIL:\n");
6271                         POP_FAILURE_POINT(d, p /* used to be const char* */,
6272                                           lowest_active_reg, highest_active_reg,
6273                                           regstart, regend, reg_info);
6274
6275                         /* If this failure point is a dummy, try the next one.  */
6276                         if (!p)
6277                                 goto fail;
6278
6279                         /* If we failed to the end of the pattern, don't examine
6280                            *p.  */
6281                         assert(p <= pend);
6282                         if (p < pend) {
6283                                 re_bool is_a_jump_n = false;
6284
6285                                 /* If failed to a backwards jump that's part of
6286                                    a repetition loop, need to pop this failure
6287                                    point and use the next one.  */
6288                                 switch ((unsigned int)(re_opcode_t)*p) {
6289                                 case jump_n:
6290                                         is_a_jump_n = true;
6291                                 case maybe_pop_jump:
6292                                 case pop_failure_jump:
6293                                 case jump:
6294                                         p1 = p + 1;
6295                                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6296                                         p1 += mcnt;
6297
6298                                         if ((is_a_jump_n
6299                                              && (re_opcode_t) * p1 == succeed_n)
6300                                             || (!is_a_jump_n
6301                                                 && (re_opcode_t) * p1 ==
6302                                                 on_failure_jump))
6303                                                 goto fail;
6304                                         break;
6305                                 default:
6306                                         /* do nothing */ ;
6307                                 }
6308                         }
6309
6310                         if (d >= string1 && d <= end1)
6311                                 dend = end_match_1;
6312                 } else
6313                         break;  /* Matching at this starting point really fails.  */
6314         }                       /* for (;;) */
6315
6316         if (best_regs_set)
6317                 goto restore_best_regs;
6318
6319         FREE_VARIABLES();
6320
6321         return -1;              /* Failure to match.  */
6322 }                               /* re_match_2 */
6323 \f
6324 /* Subroutine definitions for re_match_2.  */
6325
6326 /* We are passed P pointing to a register number after a start_memory.
6327
6328    Return true if the pattern up to the corresponding stop_memory can
6329    match the empty string, and false otherwise.
6330
6331    If we find the matching stop_memory, sets P to point to one past its number.
6332    Otherwise, sets P to an undefined byte less than or equal to END.
6333
6334    We don't handle duplicates properly (yet).  */
6335
6336 static re_bool
6337 group_match_null_string_p(unsigned char **p, unsigned char *end,
6338                           register_info_type * register_info)
6339 {
6340         int mcnt;
6341         /* Point to after the args to the start_memory.  */
6342         unsigned char *p1 = *p + 2;
6343
6344         while (p1 < end) {
6345                 /* Skip over opcodes that can match nothing, and return true or
6346                    false, as appropriate, when we get to one that can't, or to the
6347                    matching stop_memory.  */
6348
6349                 switch ((unsigned int)(re_opcode_t)*p1) {
6350                         /* Could be either a loop or a series of alternatives.  */
6351                 case on_failure_jump:
6352                         p1++;
6353                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6354
6355                         /* If the next operation is not a jump backwards in the
6356                            pattern.  */
6357
6358                         if (mcnt >= 0) {
6359                                 /* Go through the on_failure_jumps of the alternatives,
6360                                    seeing if any of the alternatives cannot match nothing.
6361                                    The last alternative starts with only a jump,
6362                                    whereas the rest start with on_failure_jump and end
6363                                    with a jump, e.g., here is the pattern for `a|b|c':
6364
6365                                    /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
6366                                    /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
6367                                    /exactn/1/c
6368
6369                                    So, we have to first go through the first (n-1)
6370                                    alternatives and then deal with the last one separately.  */
6371
6372                                 /* Deal with the first (n-1) alternatives, which start
6373                                    with an on_failure_jump (see above) that jumps to right
6374                                    past a jump_past_alt.  */
6375
6376                                 while ((re_opcode_t) p1[mcnt - 3] ==
6377                                        jump_past_alt) {
6378                                         /* `mcnt' holds how many bytes long the alternative
6379                                            is, including the ending `jump_past_alt' and
6380                                            its number.  */
6381
6382                                         if (!alt_match_null_string_p
6383                                             (p1, p1 + mcnt - 3, register_info))
6384                                                 return false;
6385
6386                                         /* Move to right after this alternative, including the
6387                                            jump_past_alt.  */
6388                                         p1 += mcnt;
6389
6390                                         /* Break if it's the beginning of an n-th alternative
6391                                            that doesn't begin with an on_failure_jump.  */
6392                                         if ((re_opcode_t) * p1 !=
6393                                             on_failure_jump)
6394                                                 break;
6395
6396                                         /* Still have to check that it's not an n-th
6397                                            alternative that starts with an on_failure_jump.  */
6398                                         p1++;
6399                                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6400                                         if ((re_opcode_t) p1[mcnt - 3] !=
6401                                             jump_past_alt) {
6402                                                 /* Get to the beginning of the n-th alternative.  */
6403                                                 p1 -= 3;
6404                                                 break;
6405                                         }
6406                                 }
6407
6408                                 /* Deal with the last alternative: go back and get number
6409                                    of the `jump_past_alt' just before it.  `mcnt' contains
6410                                    the length of the alternative.  */
6411                                 EXTRACT_NUMBER(mcnt, p1 - 2);
6412
6413                                 if (!alt_match_null_string_p
6414                                     (p1, p1 + mcnt, register_info))
6415                                         return false;
6416
6417                                 p1 += mcnt;     /* Get past the n-th alternative.  */
6418                         }       /* if mcnt > 0 */
6419                         break;
6420
6421                 case stop_memory:
6422                         assert(p1[1] == **p);
6423                         *p = p1 + 2;
6424                         return true;
6425
6426                 default:
6427                         if (!common_op_match_null_string_p(&p1, end, register_info))
6428                                 return false;
6429                 }
6430         }                       /* while p1 < end */
6431
6432         return false;
6433 }                               /* group_match_null_string_p */
6434
6435 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
6436    It expects P to be the first byte of a single alternative and END one
6437    byte past the last. The alternative can contain groups.  */
6438
6439 static re_bool
6440 alt_match_null_string_p(unsigned char *p, unsigned char *end,
6441                         register_info_type * register_info)
6442 {
6443         int mcnt;
6444         unsigned char *p1 = p;
6445
6446         while (p1 < end) {
6447                 /* Skip over opcodes that can match nothing, and break when we get
6448                    to one that can't.  */
6449
6450                 switch ((unsigned int)(re_opcode_t)*p1) {
6451                         /* It's a loop.  */
6452                 case on_failure_jump:
6453                         p1++;
6454                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6455                         p1 += mcnt;
6456                         break;
6457
6458                 default:
6459                         if (!common_op_match_null_string_p(&p1, end, register_info))
6460                                 return false;
6461                 }
6462         }                       /* while p1 < end */
6463
6464         return true;
6465 }                               /* alt_match_null_string_p */
6466
6467 /* Deals with the ops common to group_match_null_string_p and
6468    alt_match_null_string_p.
6469
6470    Sets P to one after the op and its arguments, if any.  */
6471
6472 static re_bool
6473 common_op_match_null_string_p(unsigned char **p, unsigned char *end,
6474                               register_info_type * register_info)
6475 {
6476         int mcnt;
6477         re_bool ret;
6478         int reg_no;
6479         unsigned char *p1 = *p;
6480
6481         switch ((unsigned int)(re_opcode_t)*p1++) {
6482         case no_op:
6483         case begline:
6484         case endline:
6485         case begbuf:
6486         case endbuf:
6487         case wordbeg:
6488         case wordend:
6489         case wordbound:
6490         case notwordbound:
6491 #ifdef emacs
6492         case before_dot:
6493         case at_dot:
6494         case after_dot:
6495 #endif
6496                 break;
6497
6498         case start_memory:
6499                 reg_no = *p1;
6500                 assert(reg_no > 0 && reg_no <= MAX_REGNUM);
6501                 ret = group_match_null_string_p(&p1, end, register_info);
6502
6503                 /* Have to set this here in case we're checking a group which
6504                    contains a group and a back reference to it.  */
6505
6506                 if (REG_MATCH_NULL_STRING_P(register_info[reg_no]) ==
6507                     MATCH_NULL_UNSET_VALUE)
6508                         REG_MATCH_NULL_STRING_P(register_info[reg_no]) = ret;
6509
6510                 if (!ret)
6511                         return false;
6512                 break;
6513
6514                 /* If this is an optimized succeed_n for zero times, make the jump.  */
6515         case jump:
6516                 EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6517                 if (mcnt >= 0)
6518                         p1 += mcnt;
6519                 else
6520                         return false;
6521                 break;
6522
6523         case succeed_n:
6524                 /* Get to the number of times to succeed.  */
6525                 p1 += 2;
6526                 EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6527
6528                 if (mcnt == 0) {
6529                         p1 -= 4;
6530                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6531                         p1 += mcnt;
6532                 } else
6533                         return false;
6534                 break;
6535
6536         case duplicate:
6537                 if (!REG_MATCH_NULL_STRING_P(register_info[*p1]))
6538                         return false;
6539                 break;
6540
6541         case set_number_at:
6542                 p1 += 4;
6543
6544         default:
6545                 /* All other opcodes mean we cannot match the empty string.  */
6546                 return false;
6547         }
6548
6549         *p = p1;
6550         return true;
6551 }                               /* common_op_match_null_string_p */
6552
6553 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
6554    bytes; nonzero otherwise.  */
6555
6556 static int
6557 bcmp_translate(re_char *s1, re_char *s2,
6558                REGISTER int len, RE_TRANSLATE_TYPE translate)
6559 {
6560         REGISTER const unsigned char *p1 = s1, *p2 = s2;
6561 #ifdef MULE
6562         const unsigned char *p1_end = s1 + len;
6563         const unsigned char *p2_end = s2 + len;
6564
6565         while (p1 != p1_end && p2 != p2_end) {
6566                 Emchar p1_ch, p2_ch;
6567
6568                 p1_ch = charptr_emchar(p1);
6569                 p2_ch = charptr_emchar(p2);
6570
6571                 if (RE_TRANSLATE(p1_ch)
6572                     != RE_TRANSLATE(p2_ch))
6573                         return 1;
6574                 INC_CHARPTR(p1);
6575                 INC_CHARPTR(p2);
6576         }
6577 #else                           /* not MULE */
6578         while (len) {
6579                 if (RE_TRANSLATE(*p1++) != RE_TRANSLATE(*p2++))
6580                         return 1;
6581                 len--;
6582         }
6583 #endif                          /* MULE */
6584         return 0;
6585 }
6586 \f
6587 /* Entry points for GNU code.  */
6588
6589 /* re_compile_pattern is the GNU regular expression compiler: it
6590    compiles PATTERN (of length SIZE) and puts the result in BUFP.
6591    Returns 0 if the pattern was valid, otherwise an error string.
6592
6593    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
6594    are set in BUFP on entry.
6595
6596    We call regex_compile to do the actual compilation.  */
6597
6598 const char *re_compile_pattern(const char *pattern, int length,
6599                                struct re_pattern_buffer *bufp)
6600 {
6601         reg_errcode_t ret;
6602
6603         /* GNU code is written to assume at least RE_NREGS registers will be set
6604            (and at least one extra will be -1).  */
6605         bufp->regs_allocated = REGS_UNALLOCATED;
6606
6607         /* And GNU code determines whether or not to get register information
6608            by passing null for the REGS argument to re_match, etc., not by
6609            setting no_sub.  */
6610         bufp->no_sub = 0;
6611
6612         /* Match anchors at newline.  */
6613         bufp->newline_anchor = 1;
6614
6615         ret = regex_compile((const unsigned char*)pattern,
6616                             length, re_syntax_options, bufp);
6617
6618         if (!ret)
6619                 return NULL;
6620         return gettext(re_error_msgid[(int)ret]);
6621 }
6622 \f
6623 /* Entry points compatible with 4.2 BSD regex library.  We don't define
6624    them unless specifically requested.  */
6625
6626 #ifdef _REGEX_RE_COMP
6627
6628 /* BSD has one and only one pattern buffer.  */
6629 static struct re_pattern_buffer re_comp_buf;
6630
6631 char *re_comp(const char *s)
6632 {
6633         reg_errcode_t ret;
6634
6635         if (!s) {
6636                 if (!re_comp_buf.buffer)
6637                         return gettext("No previous regular expression");
6638                 return 0;
6639         }
6640
6641         if (!re_comp_buf.buffer) {
6642                 re_comp_buf.buffer = (unsigned char *)xmalloc_atomic(200);
6643                 if (re_comp_buf.buffer == NULL)
6644                         return gettext(re_error_msgid[(int)REG_ESPACE]);
6645                 re_comp_buf.allocated = 200;
6646
6647                 re_comp_buf.fastmap = (char *)xmalloc_atomic(1 << BYTEWIDTH);
6648                 if (re_comp_buf.fastmap == NULL)
6649                         return gettext(re_error_msgid[(int)REG_ESPACE]);
6650         }
6651
6652         /* Since `re_exec' always passes NULL for the `regs' argument, we
6653            don't need to initialize the pattern buffer fields which affect it.  */
6654
6655         /* Match anchors at newlines.  */
6656         re_comp_buf.newline_anchor = 1;
6657
6658         ret =
6659             regex_compile((unsigned char *)s, strlen(s), re_syntax_options,
6660                           &re_comp_buf);
6661
6662         if (!ret)
6663                 return NULL;
6664
6665         /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
6666         return (char *)gettext(re_error_msgid[(int)ret]);
6667 }
6668
6669 int re_exec(const char *s)
6670 {
6671         const int len = strlen(s);
6672         return
6673             0 <= re_search(&re_comp_buf, s, len, 0, len,
6674                            (struct re_registers *)0);
6675 }
6676 #endif                          /* _REGEX_RE_COMP */
6677 \f
6678 /* POSIX.2 functions.  Don't define these for Emacs.  */
6679
6680 #ifndef emacs
6681
6682 /* regcomp takes a regular expression as a string and compiles it.
6683
6684    PREG is a regex_t *.  We do not expect any fields to be initialized,
6685    since POSIX says we shouldn't.  Thus, we set
6686
6687      `buffer' to the compiled pattern;
6688      `used' to the length of the compiled pattern;
6689      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
6690        REG_EXTENDED bit in CFLAGS is set; otherwise, to
6691        RE_SYNTAX_POSIX_BASIC;
6692      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
6693      `fastmap' and `fastmap_accurate' to zero;
6694      `re_nsub' to the number of subexpressions in PATTERN.
6695      (non-shy of course.  POSIX probably doesn't know about
6696      shy ones, and in any case they should be invisible.)
6697
6698    PATTERN is the address of the pattern string.
6699
6700    CFLAGS is a series of bits which affect compilation.
6701
6702      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
6703      use POSIX basic syntax.
6704
6705      If REG_NEWLINE is set, then . and [^...] don't match newline.
6706      Also, regexec will try a match beginning after every newline.
6707
6708      If REG_ICASE is set, then we considers upper- and lowercase
6709      versions of letters to be equivalent when matching.
6710
6711      If REG_NOSUB is set, then when PREG is passed to regexec, that
6712      routine will report only success or failure, and nothing about the
6713      registers.
6714
6715    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
6716    the return codes and their meanings.)  */
6717
6718 int regcomp(regex_t * preg, const char *pattern, int cflags)
6719 {
6720         reg_errcode_t ret;
6721         unsigned syntax
6722             = (cflags & REG_EXTENDED) ?
6723             RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
6724
6725         /* regex_compile will allocate the space for the compiled pattern.  */
6726         preg->buffer = 0;
6727         preg->allocated = 0;
6728         preg->used = 0;
6729
6730         /* Don't bother to use a fastmap when searching.  This simplifies the
6731            REG_NEWLINE case: if we used a fastmap, we'd have to put all the
6732            characters after newlines into the fastmap.  This way, we just try
6733            every character.  */
6734         preg->fastmap = 0;
6735
6736         if (cflags & REG_ICASE) {
6737                 int i;
6738
6739                 preg->translate = (char *)xmalloc_atomic(CHAR_SET_SIZE);
6740                 if (preg->translate == NULL)
6741                         return (int)REG_ESPACE;
6742
6743                 /* Map uppercase characters to corresponding lowercase ones.  */
6744                 for (i = 0; i < CHAR_SET_SIZE; i++)
6745                         preg->translate[i] = ISUPPER(i) ? tolower(i) : i;
6746         } else
6747                 preg->translate = NULL;
6748
6749         /* If REG_NEWLINE is set, newlines are treated differently.  */
6750         if (cflags & REG_NEWLINE) {
6751                 /* REG_NEWLINE implies neither . nor [^...] match newline.  */
6752                 syntax &= ~RE_DOT_NEWLINE;
6753                 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
6754                 /* It also changes the matching behavior.  */
6755                 preg->newline_anchor = 1;
6756         } else {
6757                 preg->newline_anchor = 0;
6758         }
6759         preg->no_sub = !!(cflags & REG_NOSUB);
6760
6761         /* POSIX says a null character in the pattern terminates it, so we
6762            can use strlen here in compiling the pattern.  */
6763         ret = regex_compile((const unsigned char*)pattern,
6764                             strlen(pattern), syntax, preg);
6765
6766         /* POSIX doesn't distinguish between an unmatched open-group and an
6767            unmatched close-group: both are REG_EPAREN.  */
6768         if (ret == REG_ERPAREN)
6769                 ret = REG_EPAREN;
6770
6771         return (int)ret;
6772 }
6773
6774 /* regexec searches for a given pattern, specified by PREG, in the
6775    string STRING.
6776
6777    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
6778    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
6779    least NMATCH elements, and we set them to the offsets of the
6780    corresponding matched substrings.
6781
6782    EFLAGS specifies `execution flags' which affect matching: if
6783    REG_NOTBOL is set, then ^ does not match at the beginning of the
6784    string; if REG_NOTEOL is set, then $ does not match at the end.
6785
6786    We return 0 if we find a match and REG_NOMATCH if not.  */
6787
6788 int
6789 regexec(const regex_t * preg, const char *string, Element_count nmatch,
6790         regmatch_t pmatch[], int eflags)
6791 {
6792         int ret;
6793         struct re_registers regs;
6794         regex_t private_preg;
6795         int len = strlen(string);
6796         re_bool want_reg_info = !preg->no_sub && nmatch > 0;
6797
6798         private_preg = *preg;
6799
6800         private_preg.not_bol = !!(eflags & REG_NOTBOL);
6801         private_preg.not_eol = !!(eflags & REG_NOTEOL);
6802
6803         /* The user has told us exactly how many registers to return
6804            information about, via `nmatch'.  We have to pass that on to the
6805            matching routines.  */
6806         private_preg.regs_allocated = REGS_FIXED;
6807
6808         if (want_reg_info) {
6809                 regs.num_regs = nmatch;
6810                 regs.start = TALLOC(nmatch, regoff_t);
6811                 regs.end = TALLOC(nmatch, regoff_t);
6812                 if (regs.start == NULL || regs.end == NULL)
6813                         return (int)REG_NOMATCH;
6814         }
6815
6816         /* Perform the searching operation.  */
6817         ret = re_search(&private_preg, string, len,
6818                         /* start: */ 0, /* range: */ len,
6819                         want_reg_info ? &regs : (struct re_registers *)0);
6820
6821         /* Copy the register information to the POSIX structure.  */
6822         if (want_reg_info) {
6823                 if (ret >= 0) {
6824                         Element_count r;
6825
6826                         for (r = 0; r < nmatch; r++) {
6827                                 pmatch[r].rm_so = regs.start[r];
6828                                 pmatch[r].rm_eo = regs.end[r];
6829                         }
6830                 }
6831
6832                 /* If we needed the temporary register info, free the space now.  */
6833                 xfree(regs.start);
6834                 xfree(regs.end);
6835         }
6836
6837         /* We want zero return to mean success, unlike `re_search'.  */
6838         return ret >= 0 ? (int)REG_NOERROR : (int)REG_NOMATCH;
6839 }
6840
6841 /* Returns a message corresponding to an error code, ERRCODE, returned
6842    from either regcomp or regexec.   We don't use PREG here.  */
6843
6844 Memory_count
6845 regerror(int errcode, const regex_t * preg, char *errbuf,
6846          Memory_count errbuf_size)
6847 {
6848         const char *msg;
6849         Memory_count msg_size;
6850
6851         if (errcode < 0 || (size_t) errcode >= (sizeof(re_error_msgid)
6852                                                 / sizeof(re_error_msgid[0])))
6853                 /* Only error codes returned by the rest of the code should be passed
6854                    to this routine.  If we are given anything else, or if other regex
6855                    code generates an invalid error code, then the program has a bug.
6856                    Dump core so we can fix it.  */
6857                 abort();
6858
6859         msg = gettext(re_error_msgid[errcode]);
6860
6861         msg_size = strlen(msg) + 1;     /* Includes the null.  */
6862
6863         if (errbuf_size != 0) {
6864                 strncpy(errbuf, msg, errbuf_size - 1);
6865                 errbuf[errbuf_size-1]='\0';
6866         }
6867
6868         return msg_size;
6869 }
6870
6871 /* Free dynamically allocated space used by PREG.  */
6872
6873 void regfree(regex_t * preg)
6874 {
6875         if (preg->buffer != NULL) {
6876                 xfree(preg->buffer);
6877         }
6878         preg->buffer = NULL;
6879
6880         preg->allocated = 0;
6881         preg->used = 0;
6882
6883         if (preg->fastmap != NULL) {
6884                 xfree(preg->fastmap);
6885         }
6886         preg->fastmap = NULL;
6887         preg->fastmap_accurate = 0;
6888
6889         if (preg->translate != NULL) {
6890                 xfree(preg->translate);
6891         }
6892         preg->translate = NULL;
6893 }
6894
6895 #endif                          /* not emacs  */
6896 \f
6897 /*
6898 Local variables:
6899 make-backup-files: t
6900 version-control: t
6901 trim-versions-without-asking: nil
6902 End:
6903 */