Minor package-get cleanup + bldchain tweak
[sxemacs] / src / regex.c
1 /* Extended regular expression matching and search library,
2    version 0.12, extended for XEmacs.
3    (Implements POSIX draft P10003.2/D11.2, except for
4    internationalization features.)
5
6    Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
7    Copyright (C) 1995 Sun Microsystems, Inc.
8    Copyright (C) 1995 Ben Wing.
9
10 This file is part of SXEmacs
11
12 SXEmacs is free software: you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation, either version 3 of the License, or
15 (at your option) any later version.
16
17 SXEmacs is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program.  If not, see <http://www.gnu.org/licenses/>. */
24
25
26 /* Synched up with: FSF 19.29. */
27
28 /* Changes made for XEmacs:
29
30    (1) the REGEX_BEGLINE_CHECK code from the XEmacs v18 regex routines
31        was added.  This causes a huge speedup in font-locking.
32    (2) Rel-alloc is disabled when the MMAP version of rel-alloc is
33        being used, because it's too slow -- all those calls to mmap()
34        add humongous overhead.
35    (3) Lots and lots of changes for Mule.  They are bracketed by
36        `#ifdef MULE' or with comments that have `XEmacs' in them.
37  */
38
39 #ifdef HAVE_CONFIG_H
40 #include <config.h>
41 #endif
42
43 #ifndef REGISTER                /* Rigidly enforced as of 20.3 */
44 #define REGISTER
45 #endif
46
47 #ifndef _GNU_SOURCE
48 #define _GNU_SOURCE 1
49 #endif
50
51 #ifdef emacs
52 /* Converts the pointer to the char to BEG-based offset from the start.  */
53 #define PTR_TO_OFFSET(d) (MATCHING_IN_FIRST_STRING                      \
54                           ? (d) - string1 : (d) - (string2 - size1))
55 #else
56 #define PTR_TO_OFFSET(d) 0
57 #endif
58
59 /* We assume non-Mule if emacs isn't defined. */
60 #ifndef emacs
61 #undef MULE
62 #endif
63
64 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
65 #include <sys/types.h>
66
67 /* This is for other GNU distributions with internationalized messages.  */
68 #if defined (I18N3) && (defined (HAVE_LIBINTL_H) || defined (_LIBC))
69 # include <libintl.h>
70 #else
71 # define gettext(msgid) (msgid)
72 #endif
73
74 /* XEmacs: define this to add in a speedup for patterns anchored at
75    the beginning of a line.  Keep the ifdefs so that it's easier to
76    tell where/why this code has diverged from v19. */
77 #define REGEX_BEGLINE_CHECK
78
79 /* XEmacs: the current mmap-based ralloc handles small blocks very
80    poorly, so we disable it here. */
81
82 #if (defined (REL_ALLOC) && defined (HAVE_MMAP)) || defined(DOUG_LEA_MALLOC)
83 # undef REL_ALLOC
84 #endif
85
86 /* The `emacs' switch turns on certain matching commands
87    that make sense only in Emacs. */
88 #ifdef emacs
89
90 #include "lisp.h"
91 #include "buffer.h"
92 #include "syntax.h"
93
94 #ifdef MULE
95
96 Lisp_Object Vthe_lisp_rangetab;
97
98 void complex_vars_of_regex(void)
99 {
100         Vthe_lisp_rangetab = Fmake_range_table();
101         staticpro(&Vthe_lisp_rangetab);
102 }
103
104 #else  /* not MULE */
105
106 void complex_vars_of_regex(void)
107 {
108 }
109
110 #endif  /* MULE */
111
112 #define RE_TRANSLATE(ch) TRT_TABLE_OF (translate, (Emchar) ch)
113 #define TRANSLATE_P(tr) (!NILP (tr) && CHAR_TABLEP(tr))
114
115 /* just massage that xfree thing */
116 #undef xfree
117 #if defined HAVE_BDWGC && defined EF_USE_BDWGC
118 # define xfree          zfree
119 #else  /* !BDWGC */
120 # define xfree          yfree
121 #endif  /* BDWGC */
122
123 #else  /* !emacs */
124
125 /* If we are not linking with Emacs proper,
126    we can't use the relocating allocator
127    even if config.h says that we can.  */
128 #undef REL_ALLOC
129
130 #if defined (STDC_HEADERS) || defined (_LIBC)
131 # include <stdlib.h>
132 # include <stdbool.h>
133 #else
134 char *malloc();
135 char *realloc();
136 #endif
137
138 /* Types normally included via lisp.h */
139 #include <stddef.h>             /* for ptrdiff_t */
140
141 #ifdef REGEX_MALLOC
142 #ifndef DECLARE_NOTHING
143 #define DECLARE_NOTHING struct nosuchstruct
144 #endif
145 #endif
146
147 typedef int Emchar;
148
149 #define charptr_emchar(str)             ((Emchar) (str)[0])
150
151 #define INC_CHARPTR(p) ((p)++)
152 #define DEC_CHARPTR(p) ((p)--)
153
154 #include <string.h>
155
156 /* Define the syntax stuff for \<, \>, etc.  */
157
158 /* This must be nonzero for the wordchar and notwordchar pattern
159    commands in re_match_2.  */
160 #ifndef Sword
161 #define Sword 1
162 #endif
163
164 #ifdef SYNTAX_TABLE
165
166 extern char *re_syntax_table;
167
168 #else                           /* not SYNTAX_TABLE */
169
170 /* How many characters in the character set.  */
171 #define CHAR_SET_SIZE 256
172
173 static char re_syntax_table[CHAR_SET_SIZE];
174
175 static void init_syntax_once(void)
176 {
177         static int done = 0;
178
179         if (!done) {
180                 const char *word_syntax_chars =
181                     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_";
182
183                 memset(re_syntax_table, 0, sizeof(re_syntax_table));
184
185                 while (*word_syntax_chars)
186                         re_syntax_table[(unsigned int)(*word_syntax_chars++)] =
187                             Sword;
188
189                 done = 1;
190         }
191 }
192
193 #endif                          /* SYNTAX_TABLE */
194
195 #define SYNTAX_UNSAFE(ignored, c) re_syntax_table[c]
196 #undef SYNTAX_FROM_CACHE
197 #define SYNTAX_FROM_CACHE SYNTAX_UNSAFE
198
199 #define RE_TRANSLATE(c) translate[(unsigned char) (c)]
200 #define TRANSLATE_P(tr) tr
201
202 #if defined HAVE_BDWGC && defined EF_USE_BDWGC
203 # if defined HAVE_GC_GC_H
204 #  include "gc/gc.h"
205 # elif defined HAVE_GC_H
206 #  include "gc.h"
207 # endif
208 # define xmalloc        GC_MALLOC
209 # define xmalloc_atomic GC_MALLOC_ATOMIC
210 # define xrealloc       GC_REALLOC
211 # define xfree          GC_FREE
212 # define xstrdup        GC_STRDUP
213 #else  /* !HAVE_BDWGC */
214 # define xmalloc        malloc
215 # define xmalloc_atomic malloc
216 # define xrealloc       realloc
217 # define xfree          free
218 # define xstrdup        strdup
219 #endif  /* HAVE_BDWGC */
220
221 #endif  /* emacs */
222
223
224 /* Under XEmacs, this is needed because we don't define it elsewhere. */
225 #ifdef SWITCH_ENUM_BUG
226 #define SWITCH_ENUM_CAST(x) ((int)(x))
227 #else
228 #define SWITCH_ENUM_CAST(x) (x)
229 #endif
230 \f
231 /* Get the interface, including the syntax bits.  */
232 #include "regex.h"
233
234 /* isalpha etc. are used for the character classes.  */
235 #include <ctype.h>
236
237 /* Jim Meyering writes:
238
239    "... Some ctype macros are valid only for character codes that
240    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
241    using /bin/cc or gcc but without giving an ansi option).  So, all
242    ctype uses should be through macros like ISPRINT...  If
243    STDC_HEADERS is defined, then autoconf has verified that the ctype
244    macros don't need to be guarded with references to isascii. ...
245    Defining isascii to 1 should let any compiler worth its salt
246    eliminate the && through constant folding."  */
247
248 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
249 #define ISASCII_1(c) 1
250 #else
251 #define ISASCII_1(c) isascii(c)
252 #endif
253
254 #ifdef MULE
255 /* The IS*() macros can be passed any character, including an extended
256    one.  We need to make sure there are no crashes, which would occur
257    otherwise due to out-of-bounds array references. */
258 #define ISASCII(c) (((EMACS_UINT) (c)) < 0x100 && ISASCII_1 (c))
259 #else
260 #define ISASCII(c) ISASCII_1 (c)
261 #endif                          /* MULE */
262
263 #ifdef isblank
264 #define ISBLANK(c) (ISASCII (c) && isblank (c))
265 #else
266 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
267 #endif
268 #ifdef isgraph
269 #define ISGRAPH(c) (ISASCII (c) && isgraph (c))
270 #else
271 #define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
272 #endif
273
274 /* stolen from FSF/src/charset.h */
275 #define SINGLE_BYTE_CHAR_P(c)   (((unsigned int)(c) & 0xFF) == (c))
276 /* 1 if C is an ASCII character.  */
277 #define IS_REAL_ASCII(c) ((c) < 128)
278 /* 1 if C is a unibyte character.  */
279 #define ISUNIBYTE(c)    (SINGLE_BYTE_CHAR_P(c))
280
281 #define ISPRINT(c) (ISASCII (c) && isprint (c))
282 #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
283 #define ISALNUM(c) (ISASCII (c) && isalnum (c))
284 #define ISALPHA(c) (ISASCII (c) && isalpha (c))
285 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
286 #define ISLOWER(c) (ISASCII (c) && islower (c))
287 #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
288 #define ISSPACE(c) (ISASCII (c) && isspace (c))
289 #define ISUPPER(c) (ISASCII (c) && isupper (c))
290 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
291
292 #if defined emacs
293 static inline bool
294 ISWORD(int c)
295         __attribute__((always_inline));
296 static inline bool
297 ISWORD(int c)
298 {
299         Lisp_Object ct = regex_emacs_buffer->mirror_syntax_table;
300
301         return (SYNTAX(XCHAR_TABLE(ct), c) == Sword);
302 }
303 #else
304 # define ISWORD(c)      (ISALPHA(c))
305 #endif  /* emacs */
306
307 #ifndef NULL
308 #define NULL (void *)0
309 #endif
310
311 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
312    since ours (we hope) works properly with all combinations of
313    machines, compilers, `char' and `unsigned char' argument types.
314    (Per Bothner suggested the basic approach.)  */
315 #undef SIGN_EXTEND_CHAR
316 #if __STDC__
317 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
318 #else                           /* not __STDC__ */
319 /* As in Harbison and Steele.  */
320 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
321 #endif
322 \f
323 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
324    use `alloca' instead of `malloc'.  This is because using malloc in
325    re_search* or re_match* could cause memory leaks when C-g is used in
326    Emacs; also, malloc is slower and causes storage fragmentation.  On
327    the other hand, malloc is more portable, and easier to debug.
328
329    Because we sometimes use alloca, some routines have to be macros,
330    not functions -- `alloca'-allocated space disappears at the end of the
331    function it is called in.  */
332
333 #ifdef REGEX_MALLOC
334
335 #define REGEX_ALLOCATE  xmalloc
336 #define REGEX_REALLOCATE(source, osize, nsize) xrealloc(source, nsize)
337 #define REGEX_FREE      xfree
338
339 #else  /* !REGEX_MALLOC */
340
341 /* Emacs already defines alloca, sometimes.  */
342 #ifndef alloca
343
344 /* Make alloca work the best possible way.  */
345 #ifdef __GNUC__
346 #define alloca __builtin_alloca
347 #else                           /* not __GNUC__ */
348 #if HAVE_ALLOCA_H
349 #include <alloca.h>
350 #else                           /* not __GNUC__ or HAVE_ALLOCA_H */
351 #ifndef _AIX                    /* Already did AIX, up at the top.  */
352 void *alloca();
353 #endif                          /* not _AIX */
354 #endif                          /* HAVE_ALLOCA_H */
355 #endif                          /* __GNUC__ */
356
357 #endif                          /* not alloca */
358
359 #define REGEX_ALLOCATE alloca
360
361 /* Assumes a `char *destination' variable.  */
362 #define REGEX_REALLOCATE(source, osize, nsize)                          \
363   (destination = (char *) alloca (nsize),                               \
364    memmove (destination, source, osize),                                \
365    destination)
366
367 /* No need to do anything to free, after alloca.  */
368 #define REGEX_FREE(arg) ((void)0)       /* Do nothing!  But inhibit gcc warning.  */
369
370 #endif  /* REGEX_MALLOC */
371
372 /* Define how to allocate the failure stack.  */
373
374 #ifdef REL_ALLOC
375 #error not again
376 #define REGEX_ALLOCATE_STACK(size)                              \
377         r_alloc ((char **) &failure_stack_ptr, (size))
378 #define REGEX_REALLOCATE_STACK(source, osize, nsize)            \
379         r_re_alloc ((char **) &failure_stack_ptr, (nsize))
380 #define REGEX_FREE_STACK(ptr)                                   \
381         r_alloc_free ((void **) &failure_stack_ptr)
382
383 #else  /* !REL_ALLOC */
384
385 #ifdef REGEX_MALLOC
386
387 #define REGEX_ALLOCATE_STACK    xmalloc
388 #define REGEX_REALLOCATE_STACK(source, osize, nsize) xrealloc(source, nsize)
389 #define REGEX_FREE_STACK        xfree
390
391 #else  /* !REGEX_MALLOC */
392
393 #define REGEX_ALLOCATE_STACK alloca
394
395 #define REGEX_REALLOCATE_STACK(source, osize, nsize)                    \
396    REGEX_REALLOCATE (source, osize, nsize)
397 /* No need to explicitly free anything.  */
398 #define REGEX_FREE_STACK(arg)
399
400 #endif  /* REGEX_MALLOC */
401 #endif  /* REL_ALLOC */
402
403 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
404    `string1' or just past its end.  This works if PTR is NULL, which is
405    a good thing.  */
406 #define FIRST_STRING_P(ptr)                                     \
407   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
408
409 /* (Re)Allocate N items of type T using malloc, or fail.  */
410 #define TALLOC(n, t)                            \
411         ((t *)xmalloc((n) * sizeof (t)))
412 #define TALLOC_ATOMIC(n, t)                     \
413         ((t *)xmalloc_atomic((n) * sizeof (t)))
414 #define RETALLOC(addr, n, t)                                    \
415         ((addr) = (t *)xrealloc(addr, (n) * sizeof (t)))
416 #define RETALLOC_IF(addr, n, t)                                 \
417         if (addr) {                                             \
418                 RETALLOC((addr), (n), t);                       \
419         } else                                                  \
420                 (addr) = TALLOC ((n), t)
421 #define REGEX_TALLOC(n, t)                              \
422         ((t *)REGEX_ALLOCATE((n) * sizeof (t)))
423
424 #define BYTEWIDTH 8             /* In bits.  */
425
426 #define STREQ(s1, s2) (strcmp (s1, s2) == 0)
427
428 #undef MAX
429 #undef MIN
430 #define MAX(a, b) ((a) > (b) ? (a) : (b))
431 #define MIN(a, b) ((a) < (b) ? (a) : (b))
432
433 /* Type of source-pattern and string chars.  */
434 typedef const unsigned char re_char;
435
436 typedef char re_bool;
437 #define false 0
438 #define true 1
439 \f
440 /* These are the command codes that appear in compiled regular
441    expressions.  Some opcodes are followed by argument bytes.  A
442    command code can specify any interpretation whatsoever for its
443    arguments.  Zero bytes may appear in the compiled regular expression.  */
444
445 typedef enum {
446         no_op = 0,
447
448         /* Succeed right away--no more backtracking.  */
449         succeed,
450
451         /* Followed by one byte giving n, then by n literal bytes.  */
452         exactn,
453
454         /* Matches any (more or less) character.  */
455         anychar,
456
457         /* Matches any one char belonging to specified set.  First
458            following byte is number of bitmap bytes.  Then come bytes
459            for a bitmap saying which chars are in.  Bits in each byte
460            are ordered low-bit-first.  A character is in the set if its
461            bit is 1.  A character too large to have a bit in the map is
462            automatically not in the set.  */
463         charset,
464
465         /* Same parameters as charset, but match any character that is
466            not one of those specified.  */
467         charset_not,
468
469         /* Start remembering the text that is matched, for storing in a
470            register.  Followed by one byte with the register number, in
471            the range 1 to the pattern buffer's re_ngroups
472            field.  Then followed by one byte with the number of groups
473            inner to this one.  (This last has to be part of the
474            start_memory only because we need it in the on_failure_jump
475            of re_match_2.)  */
476         start_memory,
477
478         /* Stop remembering the text that is matched and store it in a
479            memory register.  Followed by one byte with the register
480            number, in the range 1 to `re_ngroups' in the
481            pattern buffer, and one byte with the number of inner groups,
482            just like `start_memory'.  (We need the number of inner
483            groups here because we don't have any easy way of finding the
484            corresponding start_memory when we're at a stop_memory.)  */
485         stop_memory,
486
487         /* Match a duplicate of something remembered. Followed by one
488            byte containing the register number.  */
489         duplicate,
490
491         /* Fail unless at beginning of line.  */
492         begline,
493
494         /* Fail unless at end of line.  */
495         endline,
496
497         /* Succeeds if at beginning of buffer (if emacs) or at beginning
498            of string to be matched (if not).  */
499         begbuf,
500
501         /* Analogously, for end of buffer/string.  */
502         endbuf,
503
504         /* Followed by two byte relative address to which to jump.  */
505         jump,
506
507         /* Same as jump, but marks the end of an alternative.  */
508         jump_past_alt,
509
510         /* Followed by two-byte relative address of place to resume at
511            in case of failure.  */
512         on_failure_jump,
513
514         /* Like on_failure_jump, but pushes a placeholder instead of the
515            current string position when executed.  */
516         on_failure_keep_string_jump,
517
518         /* Throw away latest failure point and then jump to following
519            two-byte relative address.  */
520         pop_failure_jump,
521
522         /* Change to pop_failure_jump if know won't have to backtrack to
523            match; otherwise change to jump.  This is used to jump
524            back to the beginning of a repeat.  If what follows this jump
525            clearly won't match what the repeat does, such that we can be
526            sure that there is no use backtracking out of repetitions
527            already matched, then we change it to a pop_failure_jump.
528            Followed by two-byte address.  */
529         maybe_pop_jump,
530
531         /* Jump to following two-byte address, and push a dummy failure
532            point. This failure point will be thrown away if an attempt
533            is made to use it for a failure.  A `+' construct makes this
534            before the first repeat.  Also used as an intermediary kind
535            of jump when compiling an alternative.  */
536         dummy_failure_jump,
537
538         /* Push a dummy failure point and continue.  Used at the end of
539            alternatives.  */
540         push_dummy_failure,
541
542         /* Followed by two-byte relative address and two-byte number n.
543            After matching N times, jump to the address upon failure.  */
544         succeed_n,
545
546         /* Followed by two-byte relative address, and two-byte number n.
547            Jump to the address N times, then fail.  */
548         jump_n,
549
550         /* Set the following two-byte relative address to the
551            subsequent two-byte number.  The address *includes* the two
552            bytes of number.  */
553         set_number_at,
554
555         wordchar,               /* Matches any word-constituent character.  */
556         notwordchar,            /* Matches any char that is not a word-constituent.  */
557
558         wordbeg,                /* Succeeds if at word beginning.  */
559         wordend,                /* Succeeds if at word end.  */
560
561         wordbound,              /* Succeeds if at a word boundary.  */
562         notwordbound            /* Succeeds if not at a word boundary.  */
563 #ifdef emacs
564             , before_dot,       /* Succeeds if before point.  */
565         at_dot,                 /* Succeeds if at point.  */
566         after_dot,              /* Succeeds if after point.  */
567
568         /* Matches any character whose syntax is specified.  Followed by
569            a byte which contains a syntax code, e.g., Sword.  */
570         syntaxspec,
571
572         /* Matches any character whose syntax is not that specified.  */
573         notsyntaxspec
574 #endif                          /* emacs */
575 #ifdef MULE
576             /* need extra stuff to be able to properly work with XEmacs/Mule
577                characters (which may take up more than one byte) */
578             , charset_mule,     /* Matches any character belonging to specified set.
579                                    The set is stored in "unified range-table
580                                    format"; see rangetab.c.  Unlike the `charset'
581                                    opcode, this can handle arbitrary characters. */
582
583         charset_mule_not        /* Same parameters as charset_mule, but match any
584                                    character that is not one of those specified.  */
585             /* 97/2/17 jhod: The following two were merged back in from the Mule
586                2.3 code to enable some language specific processing */
587             , categoryspec,     /* Matches entries in the character category tables */
588         notcategoryspec         /* The opposite of the above */
589 #endif                          /* MULE */
590 } re_opcode_t;
591 \f
592 /* Common operations on the compiled pattern.  */
593
594 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
595
596 #define STORE_NUMBER(destination, number)                               \
597   do {                                                                  \
598     (destination)[0] = (number) & 0377;                                 \
599     (destination)[1] = (number) >> 8;                                   \
600   } while (0)
601
602 /* Same as STORE_NUMBER, except increment DESTINATION to
603    the byte after where the number is stored.  Therefore, DESTINATION
604    must be an lvalue.  */
605
606 #define STORE_NUMBER_AND_INCR(destination, number)                      \
607   do {                                                                  \
608     STORE_NUMBER (destination, number);                                 \
609     (destination) += 2;                                                 \
610   } while (0)
611
612 /* Put into DESTINATION a number stored in two contiguous bytes starting
613    at SOURCE.  */
614
615 #define EXTRACT_NUMBER(destination, source)                             \
616   do {                                                                  \
617     (destination) = *(source) & 0377;                                   \
618     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;           \
619   } while (0)
620
621 #ifdef REGEX_DEBUG_FLAG
622 static void extract_number(int *dest, re_char * source)
623 {
624         int temp = SIGN_EXTEND_CHAR(*(source + 1));
625         *dest = *source & 0377;
626         *dest += temp << 8;
627 }
628
629 #ifndef EXTRACT_MACROS          /* To debug the macros.  */
630 #undef EXTRACT_NUMBER
631 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
632 #endif                          /* not EXTRACT_MACROS */
633
634 #endif                          /* REGEX_DEBUG_FLAG */
635
636 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
637    SOURCE must be an lvalue.  */
638
639 #define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
640         do {                                                            \
641                 EXTRACT_NUMBER (destination, source);                   \
642                 (source) += 2;                                          \
643         } while (0)
644
645 #ifdef REGEX_DEBUG_FLAG
646 static void
647 extract_number_and_incr(int *destination, unsigned char **source)
648 {
649         extract_number(destination, *source);
650         *source += 2;
651 }
652
653 #ifndef EXTRACT_MACROS
654 #undef EXTRACT_NUMBER_AND_INCR
655 #define EXTRACT_NUMBER_AND_INCR(dest, src)      \
656         extract_number_and_incr (&dest, &src)
657 #endif                          /* not EXTRACT_MACROS */
658
659 #endif                          /* REGEX_DEBUG_FLAG */
660 \f
661 /* If REGEX_DEBUG_FLAG is defined, Regex prints many voluminous messages about
662    what it is doing (if the variable `debug' is nonzero).  If linked with the
663    main program in `iregex.c', you can enter patterns and strings interactively.
664    And if linked with the main program in `main.c' and the other test files, you
665    can run the already-written tests.  */
666
667 #if defined (REGEX_DEBUG_FLAG)
668
669 /* We use standard I/O for debugging.  */
670 #include <stdio.h>
671
672 #ifndef emacs
673 /* XEmacs provides its own version of assert() */
674 /* It is useful to test things that ``must'' be true when debugging.  */
675 #include <assert.h>
676 #endif
677
678 #if 0
679 /* no point in having this one, since we do not offer any accessor to this */
680 static int debug = 0;
681 #endif
682
683 #define DEBUG_STATEMENT(e)              e
684 #define DEBUG_PRINT1(x)                 printf(x)
685 #define DEBUG_PRINT2(x1, x2)            printf(x1, x2)
686 #define DEBUG_PRINT3(x1, x2, x3)        printf(x1, x2, x3)
687 #define DEBUG_PRINT4(x1, x2, x3, x4)    printf(x1, x2, x3, x4)
688 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                           \
689         print_partial_compiled_pattern(s, e)
690 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                  \
691         print_double_string (w, s1, sz1, s2, sz2)
692
693 /* Print the fastmap in human-readable form.  */
694
695 static void print_fastmap(char *fastmap)
696 {
697         unsigned was_a_range = 0;
698         unsigned i = 0;
699
700         while (i < (1 << BYTEWIDTH)) {
701                 if (fastmap[i++]) {
702                         was_a_range = 0;
703                         putchar(i - 1);
704                         while (i < (1 << BYTEWIDTH) && fastmap[i]) {
705                                 was_a_range = 1;
706                                 i++;
707                         }
708                         if (was_a_range) {
709                                 putchar('-');
710                                 putchar(i - 1);
711                         }
712                 }
713         }
714         putchar('\n');
715 }
716
717 /* Print a compiled pattern string in human-readable form, starting at
718    the START pointer into it and ending just before the pointer END.  */
719
720 /* can't help it ... the ugly mule code changes `start' upon alignment,
721  * so this can't have the const qualifier ... bugger */
722 static void
723 print_partial_compiled_pattern(unsigned char *start, re_char *end)
724 {
725         int mcnt, mcnt2;
726         unsigned char *p = start;
727         re_char *pend = end;
728
729         if (start == NULL) {
730                 puts("(null)");
731                 return;
732         }
733
734         /* Loop over pattern commands.  */
735         while (p < pend) {
736                 printf("%ld:\t", (long)(p - start));
737
738                 switch ((const re_opcode_t)*p++) {
739                 case no_op:
740                         printf("/no_op");
741                         break;
742
743                 case exactn:
744                         mcnt = *p++;
745                         printf("/exactn/%d", mcnt);
746                         do {
747                                 putchar('/');
748                                 putchar(*p++);
749                         }
750                         while (--mcnt);
751                         break;
752
753                 case start_memory:
754                         mcnt = *p++;
755                         printf("/start_memory/%d/%d", mcnt, *p++);
756                         break;
757
758                 case stop_memory:
759                         mcnt = *p++;
760                         printf("/stop_memory/%d/%d", mcnt, *p++);
761                         break;
762
763                 case duplicate:
764                         printf("/duplicate/%d", *p++);
765                         break;
766
767                 case anychar:
768                         printf("/anychar");
769                         break;
770
771                 case charset:
772                 case charset_not: {
773                         REGISTER int c, last = -100;
774                         REGISTER int in_range = 0;
775
776                         printf("/charset [%s",
777                                (re_opcode_t) * (p - 1) == charset_not
778                                ? "^" : "");
779
780                         assert(p + *p < pend);
781
782                         for (c = 0; c < 256; c++)
783                                 if (((unsigned char)(c / 8) < *p)
784                                     && (p[1 + (c / 8)] &
785                                         (1 << (c % 8)))) {
786                                         /* Are we starting a range?  */
787                                         if (last + 1 == c && !in_range) {
788                                                 putchar('-');
789                                                 in_range = 1;
790                                         }
791                                         /* Have we broken a range?  */
792                                         else if (last + 1 != c
793                                                  && in_range) {
794                                                 putchar(last);
795                                                 in_range = 0;
796                                         }
797
798                                         if (!in_range)
799                                                 putchar(c);
800
801                                         last = c;
802                                 }
803
804                         if (in_range)
805                                 putchar(last);
806
807                         putchar(']');
808
809                         p += 1 + *p;
810                         break;
811                 }
812
813 #ifdef MULE
814                 case charset_mule:
815                 case charset_mule_not: {
816                         int nentries, i;
817
818                         printf("/charset_mule [%s",
819                                (re_opcode_t) * (p - 1) == charset_mule_not
820                                ? "^" : "");
821                         /* u_r_t_nentries() CHANGES its arg, why on earth
822                          * is this marked const here then? :O -hrop */
823                         nentries = unified_range_table_nentries(p);
824                         for (i = 0; i < nentries; i++) {
825                                 EMACS_INT first, last;
826                                 Lisp_Object dummy_val;
827
828                                 /* u_r_t_get_range CHANGES its arg ...
829                                  * p can't be const thence */
830                                 unified_range_table_get_range(p, i,
831                                                               &first,
832                                                               &last,
833                                                               &dummy_val);
834                                 if (first < 0x100)
835                                         putchar(first);
836                                 else
837                                         printf("(0x%lx)", (long)first);
838                                 if (first != last) {
839                                         putchar('-');
840                                         if (last < 0x100)
841                                                 putchar(last);
842                                         else
843                                                 printf("(0x%lx)",
844                                                        (long)last);
845                                 }
846                         }
847                         putchar(']');
848                         p += unified_range_table_bytes_used(p);
849                 }
850                         break;
851 #endif
852
853                 case begline:
854                         printf("/begline");
855                         break;
856
857                 case endline:
858                         printf("/endline");
859                         break;
860
861                 case on_failure_jump:
862                         extract_number_and_incr(&mcnt, &p);
863                         printf("/on_failure_jump to %ld",
864                                (long)(p + mcnt - start));
865                         break;
866
867                 case on_failure_keep_string_jump:
868                         extract_number_and_incr(&mcnt, &p);
869                         printf("/on_failure_keep_string_jump to %ld",
870                                (long)(p + mcnt - start));
871                         break;
872
873                 case dummy_failure_jump:
874                         extract_number_and_incr(&mcnt, &p);
875                         printf("/dummy_failure_jump to %ld",
876                                (long)(p + mcnt - start));
877                         break;
878
879                 case push_dummy_failure:
880                         printf("/push_dummy_failure");
881                         break;
882
883                 case maybe_pop_jump:
884                         extract_number_and_incr(&mcnt, &p);
885                         printf("/maybe_pop_jump to %ld",
886                                (long)(p + mcnt - start));
887                         break;
888
889                 case pop_failure_jump:
890                         extract_number_and_incr(&mcnt, &p);
891                         printf("/pop_failure_jump to %ld",
892                                (long)(p + mcnt - start));
893                         break;
894
895                 case jump_past_alt:
896                         extract_number_and_incr(&mcnt, &p);
897                         printf("/jump_past_alt to %ld",
898                                (long)(p + mcnt - start));
899                         break;
900
901                 case jump:
902                         extract_number_and_incr(&mcnt, &p);
903                         printf("/jump to %ld", (long)(p + mcnt - start));
904                         break;
905
906                 case succeed_n:
907                         extract_number_and_incr(&mcnt, &p);
908                         extract_number_and_incr(&mcnt2, &p);
909                         printf("/succeed_n to %ld, %d times",
910                                (long)(p + mcnt - start), mcnt2);
911                         break;
912
913                 case jump_n:
914                         extract_number_and_incr(&mcnt, &p);
915                         extract_number_and_incr(&mcnt2, &p);
916                         printf("/jump_n to %ld, %d times",
917                                (long)(p + mcnt - start), mcnt2);
918                         break;
919
920                 case set_number_at:
921                         extract_number_and_incr(&mcnt, &p);
922                         extract_number_and_incr(&mcnt2, &p);
923                         printf("/set_number_at location %ld to %d",
924                                (long)(p + mcnt - start), mcnt2);
925                         break;
926
927                 case wordbound:
928                         printf("/wordbound");
929                         break;
930
931                 case notwordbound:
932                         printf("/notwordbound");
933                         break;
934
935                 case wordbeg:
936                         printf("/wordbeg");
937                         break;
938
939                 case wordend:
940                         printf("/wordend");
941
942 #ifdef emacs
943                 case before_dot:
944                         printf("/before_dot");
945                         break;
946
947                 case at_dot:
948                         printf("/at_dot");
949                         break;
950
951                 case after_dot:
952                         printf("/after_dot");
953                         break;
954
955                 case syntaxspec:
956                         printf("/syntaxspec");
957                         mcnt = *p++;
958                         printf("/%d", mcnt);
959                         break;
960
961                 case notsyntaxspec:
962                         printf("/notsyntaxspec");
963                         mcnt = *p++;
964                         printf("/%d", mcnt);
965                         break;
966
967 #ifdef MULE
968 /* 97/2/17 jhod Mule category patch */
969                 case categoryspec:
970                         printf("/categoryspec");
971                         mcnt = *p++;
972                         printf("/%d", mcnt);
973                         break;
974
975                 case notcategoryspec:
976                         printf("/notcategoryspec");
977                         mcnt = *p++;
978                         printf("/%d", mcnt);
979                         break;
980 /* end of category patch */
981 #endif                          /* MULE */
982 #endif                          /* emacs */
983
984                 case wordchar:
985                         printf("/wordchar");
986                         break;
987
988                 case notwordchar:
989                         printf("/notwordchar");
990                         break;
991
992                 case begbuf:
993                         printf("/begbuf");
994                         break;
995
996                 case endbuf:
997                         printf("/endbuf");
998                         break;
999
1000                 case succeed:
1001                 default:
1002                         printf("?%d", *(p - 1));
1003                 }
1004
1005                 putchar('\n');
1006         }
1007
1008         printf("%ld:\tend of pattern.\n", (long)(p - start));
1009 }
1010
1011 static void
1012 print_compiled_pattern(struct re_pattern_buffer *bufp)
1013 {
1014 #if 0
1015         /* cant be const */
1016         re_char *buffer = bufp->buffer;
1017 #else
1018         unsigned char *buffer = bufp->buffer;
1019 #endif
1020
1021         print_partial_compiled_pattern(buffer, buffer + bufp->used);
1022         printf("%ld bytes used/%ld bytes allocated.\n", bufp->used,
1023                bufp->allocated);
1024
1025         if (bufp->fastmap_accurate && bufp->fastmap) {
1026                 printf("fastmap: ");
1027                 print_fastmap(bufp->fastmap);
1028         }
1029
1030         printf("re_nsub: %ld\t", (long)bufp->re_nsub);
1031         printf("re_ngroups: %ld\t", (long)bufp->re_ngroups);
1032         printf("regs_alloc: %d\t", bufp->regs_allocated);
1033         printf("can_be_null: %d\t", bufp->can_be_null);
1034         printf("newline_anchor: %d\n", bufp->newline_anchor);
1035         printf("no_sub: %d\t", bufp->no_sub);
1036         printf("not_bol: %d\t", bufp->not_bol);
1037         printf("not_eol: %d\t", bufp->not_eol);
1038         printf("syntax: %d\n", bufp->syntax);
1039         /* Perhaps we should print the translate table?  */
1040         /* and maybe the category table? */
1041
1042         if (bufp->external_to_internal_register) {
1043                 int i;
1044
1045                 printf("external_to_internal_register:\n");
1046                 for (i = 0; i <= bufp->re_nsub; i++) {
1047                         if (i > 0)
1048                                 printf(", ");
1049                         printf("%d -> %d", i,
1050                                bufp->external_to_internal_register[i]);
1051                 }
1052                 printf("\n");
1053         }
1054 }
1055
1056 static void
1057 print_double_string(re_char * where, re_char * string1, int size1,
1058                     re_char * string2, int size2)
1059 {
1060         if (where == NULL)
1061                 printf("(null)");
1062         else {
1063                 Element_count this_char;
1064
1065                 if (FIRST_STRING_P(where)) {
1066                         for (this_char = where - string1; this_char < size1;
1067                              this_char++)
1068                                 putchar(string1[this_char]);
1069
1070                         where = string2;
1071                 }
1072
1073                 for (this_char = where - string2; this_char < size2;
1074                      this_char++)
1075                         putchar(string2[this_char]);
1076         }
1077 }
1078
1079 #else                           /* not REGEX_DEBUG_FLAG */
1080
1081 #undef assert
1082 #define assert(e)
1083
1084 #define DEBUG_STATEMENT(e)
1085 #define DEBUG_PRINT1(x)
1086 #define DEBUG_PRINT2(x1, x2)
1087 #define DEBUG_PRINT3(x1, x2, x3)
1088 #define DEBUG_PRINT4(x1, x2, x3, x4)
1089 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1090 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1091
1092 #endif                          /* REGEX_DEBUG_FLAG */
1093 \f
1094 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
1095    also be assigned to arbitrarily: each pattern buffer stores its own
1096    syntax, so it can be changed between regex compilations.  */
1097 /* This has no initializer because initialized variables in Emacs
1098    become read-only after dumping.  */
1099 reg_syntax_t re_syntax_options;
1100
1101 /* Specify the precise syntax of regexps for compilation.  This provides
1102    for compatibility for various utilities which historically have
1103    different, incompatible syntaxes.
1104
1105    The argument SYNTAX is a bit mask comprised of the various bits
1106    defined in regex.h.  We return the old syntax.  */
1107
1108 reg_syntax_t re_set_syntax(reg_syntax_t syntax)
1109 {
1110         reg_syntax_t ret = re_syntax_options;
1111
1112         re_syntax_options = syntax;
1113         return ret;
1114 }
1115 \f
1116 /* This table gives an error message for each of the error codes listed
1117    in regex.h.  Obviously the order here has to be same as there.
1118    POSIX doesn't require that we do anything for REG_NOERROR,
1119    but why not be nice?  */
1120
1121 static const char *re_error_msgid[] = {
1122         "Success",              /* REG_NOERROR */
1123         "No match",             /* REG_NOMATCH */
1124         "Invalid regular expression",   /* REG_BADPAT */
1125         "Invalid collation character",  /* REG_ECOLLATE */
1126         "Invalid character class name", /* REG_ECTYPE */
1127         "Trailing backslash",   /* REG_EESCAPE */
1128         "Invalid back reference",       /* REG_ESUBREG */
1129         "Unmatched [ or [^",    /* REG_EBRACK */
1130         "Unmatched ( or \\(",   /* REG_EPAREN */
1131         "Unmatched \\{",        /* REG_EBRACE */
1132         "Invalid content of \\{\\}",    /* REG_BADBR */
1133         "Invalid range end",    /* REG_ERANGE */
1134         "Memory exhausted",     /* REG_ESPACE */
1135         "Invalid preceding regular expression", /* REG_BADRPT */
1136         "Premature end of regular expression",  /* REG_EEND */
1137         "Regular expression too big",   /* REG_ESIZE */
1138         "Unmatched ) or \\)",   /* REG_ERPAREN */
1139 #ifdef emacs
1140         "Invalid syntax designator",    /* REG_ESYNTAX */
1141 #endif
1142 #ifdef MULE
1143         "Ranges may not span charsets", /* REG_ERANGESPAN */
1144         "Invalid category designator",  /* REG_ECATEGORY */
1145 #endif
1146 };
1147 \f
1148 /* Avoiding alloca during matching, to placate r_alloc.  */
1149
1150 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1151    searching and matching functions should not call alloca.  On some
1152    systems, alloca is implemented in terms of malloc, and if we're
1153    using the relocating allocator routines, then malloc could cause a
1154    relocation, which might (if the strings being searched are in the
1155    ralloc heap) shift the data out from underneath the regexp
1156    routines.
1157
1158    Here's another reason to avoid allocation: Emacs
1159    processes input from X in a signal handler; processing X input may
1160    call malloc; if input arrives while a matching routine is calling
1161    malloc, then we're scrod.  But Emacs can't just block input while
1162    calling matching routines; then we don't notice interrupts when
1163    they come in.  So, Emacs blocks input around all regexp calls
1164    except the matching calls, which it leaves unprotected, in the
1165    faith that they will not malloc.  */
1166
1167 /* Normally, this is fine.  */
1168 #define MATCH_MAY_ALLOCATE
1169
1170 /* When using GNU C, we are not REALLY using the C alloca, no matter
1171    what config.h may say.  So don't take precautions for it.  */
1172 #ifdef __GNUC__
1173 #undef C_ALLOCA
1174 #endif
1175
1176 /* The match routines may not allocate if (1) they would do it with malloc
1177    and (2) it's not safe for them to use malloc.
1178    Note that if REL_ALLOC is defined, matching would not use malloc for the
1179    failure stack, but we would still use it for the register vectors;
1180    so REL_ALLOC should not affect this.  */
1181 #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
1182 #undef MATCH_MAY_ALLOCATE
1183 #endif
1184 \f
1185 /* Failure stack declarations and macros; both re_compile_fastmap and
1186    re_match_2 use a failure stack.  These have to be macros because of
1187    REGEX_ALLOCATE_STACK.  */
1188
1189 /* Number of failure points for which to initially allocate space
1190    when matching.  If this number is exceeded, we allocate more
1191    space, so it is not a hard limit.  */
1192 #ifndef INIT_FAILURE_ALLOC
1193 #define INIT_FAILURE_ALLOC 20
1194 #endif
1195
1196 /* Roughly the maximum number of failure points on the stack.  Would be
1197    exactly that if always used MAX_FAILURE_SPACE each time we failed.
1198    This is a variable only so users of regex can assign to it; we never
1199    change it ourselves.  */
1200 #if defined (MATCH_MAY_ALLOCATE) || defined (REGEX_MALLOC)
1201 /* 4400 was enough to cause a crash on Alpha OSF/1,
1202    whose default stack limit is 2mb.  */
1203 int re_max_failures = 50000;
1204 #else
1205 int re_max_failures = 2000;
1206 #endif
1207
1208 union fail_stack_elt {
1209         const unsigned char *pointer;
1210         int integer;
1211 };
1212
1213 typedef union fail_stack_elt fail_stack_elt_t;
1214
1215 typedef struct {
1216         fail_stack_elt_t *stack;
1217         Element_count size;
1218         Element_count avail;    /* Offset of next open position.  */
1219 } fail_stack_type;
1220
1221 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1222 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1223 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1224
1225 /* Define macros to initialize and free the failure stack.
1226    Do `return -2' if the alloc fails.  */
1227
1228 #ifdef MATCH_MAY_ALLOCATE
1229 #define INIT_FAIL_STACK()                                               \
1230         do {                                                            \
1231                 fail_stack.stack = (fail_stack_elt_t *)                 \
1232                         REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC *      \
1233                                               sizeof (fail_stack_elt_t)); \
1234                                                                         \
1235                 if (fail_stack.stack == NULL)                           \
1236                         return -2;                                      \
1237                                                                         \
1238                 fail_stack.size = INIT_FAILURE_ALLOC;                   \
1239                 fail_stack.avail = 0;                                   \
1240         } while (0)
1241
1242 #define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1243 #else
1244 #define INIT_FAIL_STACK()                       \
1245         do {                                    \
1246                 fail_stack.avail = 0;           \
1247         } while (0)
1248
1249 #define RESET_FAIL_STACK()
1250 #endif
1251
1252 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1253
1254    Return 1 if succeeds, and 0 if either ran out of memory
1255    allocating space for it or it was already too large.
1256
1257    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1258
1259 #define DOUBLE_FAIL_STACK(fail_stack)                                   \
1260   ((int) (fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS        \
1261    ? 0                                                                  \
1262    : ((fail_stack).stack = (fail_stack_elt_t *)                         \
1263         REGEX_REALLOCATE_STACK ((fail_stack).stack,                     \
1264           (fail_stack).size * sizeof (fail_stack_elt_t),                \
1265           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),        \
1266                                                                         \
1267       (fail_stack).stack == NULL                                        \
1268       ? 0                                                               \
1269       : ((fail_stack).size <<= 1,                                       \
1270          1)))
1271
1272 /* Push pointer POINTER on FAIL_STACK.
1273    Return 1 if was able to do so and 0 if ran out of memory allocating
1274    space to do so.  */
1275 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)                            \
1276   ((FAIL_STACK_FULL ()                                                  \
1277     && !DOUBLE_FAIL_STACK (FAIL_STACK))                                 \
1278    ? 0                                                                  \
1279    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,       \
1280       1))
1281
1282 /* Push a pointer value onto the failure stack.
1283    Assumes the variable `fail_stack'.  Probably should only
1284    be called from within `PUSH_FAILURE_POINT'.  */
1285 #define PUSH_FAILURE_POINTER(item)                                      \
1286         fail_stack.stack[fail_stack.avail++].pointer =                  \
1287                 (const unsigned char*)(item)
1288
1289 /* This pushes an integer-valued item onto the failure stack.
1290    Assumes the variable `fail_stack'.  Probably should only
1291    be called from within `PUSH_FAILURE_POINT'.  */
1292 #define PUSH_FAILURE_INT(item)                                  \
1293         fail_stack.stack[fail_stack.avail++].integer = (item)
1294
1295 /* Push a fail_stack_elt_t value onto the failure stack.
1296    Assumes the variable `fail_stack'.  Probably should only
1297    be called from within `PUSH_FAILURE_POINT'.  */
1298 #define PUSH_FAILURE_ELT(item)                                  \
1299         fail_stack.stack[fail_stack.avail++] =  (item)
1300
1301 /* These three POP... operations complement the three PUSH... operations.
1302    All assume that `fail_stack' is nonempty.  */
1303 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1304 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1305 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1306
1307 /* Used to omit pushing failure point id's when we're not debugging.  */
1308 #ifdef REGEX_DEBUG_FLAG
1309 #define DEBUG_PUSH PUSH_FAILURE_INT
1310 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1311 #else
1312 #define DEBUG_PUSH(item)
1313 #define DEBUG_POP(item_addr)
1314 #endif
1315
1316 /* Push the information about the state we will need
1317    if we ever fail back to it.
1318
1319    Requires variables fail_stack, regstart, regend, reg_info, and
1320    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
1321    declared.
1322
1323    Does `return FAILURE_CODE' if runs out of memory.  */
1324
1325 #if !defined (REGEX_MALLOC) && !defined (REL_ALLOC)
1326 #define DECLARE_DESTINATION char *destination
1327 #else
1328 #define DECLARE_DESTINATION DECLARE_NOTHING
1329 #endif
1330
1331 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)   \
1332         do {                                                            \
1333                 DECLARE_DESTINATION;                                    \
1334                 /* Must be int, so when we don't save any registers, the\
1335                    arithmetic \ of 0 + -1 isn't done as unsigned.  */   \
1336                 int this_reg;                                           \
1337                                                                         \
1338                 DEBUG_STATEMENT (failure_id++);                         \
1339                 DEBUG_STATEMENT (nfailure_points_pushed++);             \
1340                 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
1341                 DEBUG_PRINT2 ("  Before push, next avail: %lu\n",       \
1342                               (long unsigned int)(fail_stack).avail);   \
1343                 DEBUG_PRINT2 ("                     size: %lu\n",       \
1344                               (long unsigned int)(fail_stack).size);    \
1345                                                                         \
1346                 DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS); \
1347                 DEBUG_PRINT2 ("     available: %ld\n",                  \
1348                               (long) REMAINING_AVAIL_SLOTS);            \
1349                                                                         \
1350                 /* Ensure we have enough space allocated for what       \
1351                  * we will push.  */                                    \
1352                 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) {     \
1353                         if (!DOUBLE_FAIL_STACK (fail_stack))            \
1354                                 return failure_code;                    \
1355                                                                         \
1356                         DEBUG_PRINT2 ("\n  Doubled stack; size now: %lu\n", \
1357                                       (unsigned long) (fail_stack).size); \
1358                         DEBUG_PRINT2 ("  slots available: %ld\n",       \
1359                                       (long) REMAINING_AVAIL_SLOTS);    \
1360                 }                                                       \
1361                                                                         \
1362                 /* Push the info, starting with the registers.  */      \
1363                 DEBUG_PRINT1 ("\n");                                    \
1364                                                                         \
1365                 for (this_reg = lowest_active_reg;                      \
1366                      this_reg <= highest_active_reg;                    \
1367                      this_reg++) {                                      \
1368                         DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg); \
1369                         DEBUG_STATEMENT (num_regs_pushed++);            \
1370                                                                         \
1371                         DEBUG_PRINT2("    start: %p\n",                 \
1372                                      regstart[this_reg]);               \
1373                         PUSH_FAILURE_POINTER(regstart[this_reg]);       \
1374                                                                         \
1375                         DEBUG_PRINT2 ("    end: %p\n",                  \
1376                                       regend[this_reg]);                \
1377                         PUSH_FAILURE_POINTER(regend[this_reg]); \
1378                                                                         \
1379                         DEBUG_PRINT2 ("    info: 0x%p\n      ",         \
1380                                       &reg_info[this_reg]);             \
1381                         DEBUG_PRINT2 (" match_null=%d",                 \
1382                                       REG_MATCH_NULL_STRING_P           \
1383                                       (reg_info[this_reg]));            \
1384                         DEBUG_PRINT2 (" active=%d",                     \
1385                                       IS_ACTIVE (reg_info[this_reg]));  \
1386                         DEBUG_PRINT2 (" matched_something=%d",          \
1387                                       MATCHED_SOMETHING                 \
1388                                       (reg_info[this_reg]));            \
1389                         DEBUG_PRINT2 (" ever_matched_something=%d",     \
1390                                       EVER_MATCHED_SOMETHING            \
1391                                       (reg_info[this_reg]));            \
1392                         DEBUG_PRINT1 ("\n");                            \
1393                         PUSH_FAILURE_ELT(reg_info[this_reg].word);      \
1394                 }                                                       \
1395                                                                         \
1396                 DEBUG_PRINT2 ("  Pushing  low active reg: %d\n",        \
1397                               lowest_active_reg);                       \
1398                 PUSH_FAILURE_INT (lowest_active_reg);                   \
1399                                                                         \
1400                 DEBUG_PRINT2 ("  Pushing high active reg: %d\n",        \
1401                               highest_active_reg);                      \
1402                 PUSH_FAILURE_INT (highest_active_reg);                  \
1403                                                                         \
1404                 DEBUG_PRINT2 ("  Pushing pattern 0x%lx: \n",            \
1405                               (long) pattern_place);                    \
1406                 DEBUG_PRINT_COMPILED_PATTERN(bufp, pattern_place, pend); \
1407                 PUSH_FAILURE_POINTER (pattern_place);                   \
1408                                                                         \
1409                 DEBUG_PRINT2("  Pushing string %p: `",                  \
1410                              string_place);                             \
1411                 DEBUG_PRINT_DOUBLE_STRING(string_place,                 \
1412                                           string1, size1,               \
1413                                           string2, size2);              \
1414                 DEBUG_PRINT1("'\n");                                    \
1415                 PUSH_FAILURE_POINTER (string_place);                    \
1416                                                                         \
1417                 DEBUG_PRINT2("  Pushing failure id: %u\n", failure_id); \
1418                 DEBUG_PUSH(failure_id);                                 \
1419         } while (0)
1420
1421 /* This is the number of items that are pushed and popped on the stack
1422    for each register.  */
1423 #define NUM_REG_ITEMS  3
1424
1425 /* Individual items aside from the registers.  */
1426 #ifdef DEBUG
1427 #define NUM_NONREG_ITEMS 5      /* Includes failure point id.  */
1428 #else
1429 #define NUM_NONREG_ITEMS 4
1430 #endif
1431
1432 /* We push at most this many items on the stack.  */
1433 /* We used to use (num_regs - 1), which is the number of registers
1434    this regexp will save; but that was changed to 5
1435    to avoid stack overflow for a regexp with lots of parens.  */
1436 #define MAX_FAILURE_ITEMS (5 * (NUM_REG_ITEMS + NUM_NONREG_ITEMS))
1437
1438 /* We actually push this many items.  */
1439 #define NUM_FAILURE_ITEMS                                               \
1440   ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \
1441     + NUM_NONREG_ITEMS)
1442
1443 /* How many items can still be added to the stack without overflowing it.  */
1444 #define REMAINING_AVAIL_SLOTS ((int) ((fail_stack).size - (fail_stack).avail))
1445
1446 /* Pops what PUSH_FAIL_STACK pushes.
1447
1448    We restore into the parameters, all of which should be lvalues:
1449      STR -- the saved data position.
1450      PAT -- the saved pattern position.
1451      LOW_REG, HIGH_REG -- the highest and lowest active registers.
1452      REGSTART, REGEND -- arrays of string positions.
1453      REG_INFO -- array of information about each subexpression.
1454
1455    Also assumes the variables `fail_stack' and (if debugging), `bufp',
1456    `pend', `string1', `size1', `string2', and `size2'.  */
1457
1458 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg,                  \
1459                           regstart, regend, reg_info)                   \
1460         do {                                                            \
1461                 int this_reg;                                           \
1462                 const unsigned char *string_temp;                       \
1463                 DEBUG_STATEMENT (fail_stack_elt_t ffailure_id);         \
1464                                                                         \
1465                 assert (!FAIL_STACK_EMPTY ());                          \
1466                                                                         \
1467                 /* Remove failure points and point to                   \
1468                  * how many regs pushed.  */                            \
1469                 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                  \
1470                 DEBUG_PRINT2 ("  Before pop, next avail: %lu\n",        \
1471                               (unsigned long) fail_stack.avail);        \
1472                 DEBUG_PRINT2 ("                    size: %lu\n",        \
1473                               (unsigned long) fail_stack.size);         \
1474                                                                         \
1475                 assert (fail_stack.avail >= NUM_NONREG_ITEMS);          \
1476                                                                         \
1477                 DEBUG_POP (&ffailure_id.integer);                       \
1478                 DEBUG_PRINT2 ("  Popping failure id: %u\n",             \
1479                               * (unsigned int *) &ffailure_id);         \
1480                                                                         \
1481                 /* If the saved string location is NULL, it             \
1482                    came from an on_failure_keep_string_jump opcode,     \
1483                    and we want to throw away the saved NULL, thus       \
1484                    retaining our current position in the string.  */    \
1485                 string_temp = (const unsigned char *)                   \
1486                         POP_FAILURE_POINTER ();                         \
1487                 if (string_temp != NULL)                                \
1488                         str = string_temp;                              \
1489                                                                         \
1490                 DEBUG_PRINT2("  Popping string %p: `", str);            \
1491                 DEBUG_PRINT_DOUBLE_STRING(str,                          \
1492                                           string1, size1,               \
1493                                           string2, size2);              \
1494                 DEBUG_PRINT1("'\n");                                    \
1495                                                                         \
1496                 pat = (unsigned char *) POP_FAILURE_POINTER();          \
1497                 DEBUG_PRINT2 ("  Popping pattern %p: ", pat);           \
1498                 DEBUG_PRINT_COMPILED_PATTERN(bufp, pat, pend);          \
1499                                                                         \
1500                 /* Restore register info.  */                           \
1501                 high_reg = (unsigned) POP_FAILURE_INT ();               \
1502                 DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg); \
1503                                                                         \
1504                 low_reg = (unsigned) POP_FAILURE_INT ();                \
1505                 DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg); \
1506                                                                         \
1507                 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) { \
1508                         DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg); \
1509                                                                         \
1510                         reg_info[this_reg].word = POP_FAILURE_ELT ();   \
1511                         DEBUG_PRINT2 ("      info: %p\n",               \
1512                                       &reg_info[this_reg]);             \
1513                                                                         \
1514                         regend[this_reg] = POP_FAILURE_POINTER ();      \
1515                         DEBUG_PRINT2 ("      end: %p\n",                \
1516                                       regend[this_reg]);                \
1517                                                                         \
1518                         regstart[this_reg] = POP_FAILURE_POINTER ();    \
1519                         DEBUG_PRINT2 ("      start: %p\n",              \
1520                                       regstart[this_reg]);              \
1521                 }                                                       \
1522                                                                         \
1523                 set_regs_matched_done = 0;                              \
1524                 DEBUG_STATEMENT (nfailure_points_popped++);             \
1525         } while (0)                     /* POP_FAILURE_POINT */
1526 \f
1527 /* Structure for per-register (a.k.a. per-group) information.
1528    Other register information, such as the
1529    starting and ending positions (which are addresses), and the list of
1530    inner groups (which is a bits list) are maintained in separate
1531    variables.
1532
1533    We are making a (strictly speaking) nonportable assumption here: that
1534    the compiler will pack our bit fields into something that fits into
1535    the type of `word', i.e., is something that fits into one item on the
1536    failure stack.  */
1537
1538 typedef union {
1539         fail_stack_elt_t word;
1540         struct {
1541                 /* This field is one if this group can match the empty string,
1542                    zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1543 #define MATCH_NULL_UNSET_VALUE 3
1544                 unsigned match_null_string_p:2;
1545                 unsigned is_active:1;
1546                 unsigned matched_something:1;
1547                 unsigned ever_matched_something:1;
1548         } bits;
1549 } register_info_type;
1550
1551 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1552 #define IS_ACTIVE(R)  ((R).bits.is_active)
1553 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1554 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1555
1556 /* Call this when have matched a real character; it sets `matched' flags
1557    for the subexpressions which we are currently inside.  Also records
1558    that those subexprs have matched.  */
1559 #define SET_REGS_MATCHED()                                              \
1560   do                                                                    \
1561     {                                                                   \
1562       if (!set_regs_matched_done)                                       \
1563         {                                                               \
1564           Element_count r;                                              \
1565           set_regs_matched_done = 1;                                    \
1566           for (r = lowest_active_reg; r <= highest_active_reg; r++)     \
1567             {                                                           \
1568               MATCHED_SOMETHING (reg_info[r])                           \
1569                 = EVER_MATCHED_SOMETHING (reg_info[r])                  \
1570                 = 1;                                                    \
1571             }                                                           \
1572         }                                                               \
1573     }                                                                   \
1574   while (0)
1575
1576 /* Registers are set to a sentinel when they haven't yet matched.  */
1577 static unsigned char reg_unset_dummy;
1578 #define REG_UNSET_VALUE (&reg_unset_dummy)
1579 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1580 \f
1581 /* Subroutine declarations and macros for regex_compile.  */
1582
1583 /* Fetch the next character in the uncompiled pattern---translating it
1584    if necessary.  Also cast from a signed character in the constant
1585    string passed to us by the user to an unsigned char that we can use
1586    as an array index (in, e.g., `translate').  */
1587 #define PATFETCH(c)                                                     \
1588   do {                                                                  \
1589     PATFETCH_RAW (c);                                                   \
1590     c = TRANSLATE (c);                                                  \
1591   } while (0)
1592
1593 /* Fetch the next character in the uncompiled pattern, with no
1594    translation.  */
1595 #define PATFETCH_RAW(c)                                                 \
1596   do {if (p == pend) return REG_EEND;                                   \
1597     assert (p < pend);                                                  \
1598     c = charptr_emchar (p);                                             \
1599     INC_CHARPTR (p);                                                    \
1600   } while (0)
1601
1602 /* Go backwards one character in the pattern.  */
1603 #define PATUNFETCH DEC_CHARPTR (p)
1604
1605 #ifdef MULE
1606
1607 #define PATFETCH_EXTENDED(emch)                                         \
1608   do {if (p == pend) return REG_EEND;                                   \
1609     assert (p < pend);                                                  \
1610     emch = charptr_emchar ((const Bufbyte *) p);                        \
1611     INC_CHARPTR (p);                                                    \
1612     if (TRANSLATE_P (translate) && emch < 0x80)                         \
1613       emch = (Emchar) (unsigned char) RE_TRANSLATE (emch);              \
1614   } while (0)
1615
1616 #define PATFETCH_RAW_EXTENDED(emch)                                     \
1617   do {if (p == pend) return REG_EEND;                                   \
1618     assert (p < pend);                                                  \
1619     emch = charptr_emchar ((const Bufbyte *) p);                        \
1620     INC_CHARPTR (p);                                                    \
1621   } while (0)
1622
1623 #define PATUNFETCH_EXTENDED DEC_CHARPTR (p)
1624
1625 #define PATFETCH_EITHER(emch)                   \
1626   do {                                          \
1627     if (has_extended_chars)                     \
1628       PATFETCH_EXTENDED (emch);                 \
1629     else                                        \
1630       PATFETCH (emch);                          \
1631   } while (0)
1632
1633 #define PATFETCH_RAW_EITHER(emch)               \
1634   do {                                          \
1635     if (has_extended_chars)                     \
1636       PATFETCH_RAW_EXTENDED (emch);             \
1637     else                                        \
1638       PATFETCH_RAW (emch);                      \
1639   } while (0)
1640
1641 #define PATUNFETCH_EITHER                       \
1642   do {                                          \
1643     if (has_extended_chars)                     \
1644       PATUNFETCH_EXTENDED (emch);               \
1645     else                                        \
1646       PATUNFETCH (emch);                        \
1647   } while (0)
1648
1649 #else                           /* not MULE */
1650
1651 #define PATFETCH_EITHER(emch) PATFETCH (emch)
1652 #define PATFETCH_RAW_EITHER(emch) PATFETCH_RAW (emch)
1653 #define PATUNFETCH_EITHER PATUNFETCH
1654
1655 #endif                          /* MULE */
1656
1657 /* If `translate' is non-null, return translate[D], else just D.  We
1658    cast the subscript to translate because some data is declared as
1659    `char *', to avoid warnings when a string constant is passed.  But
1660    when we use a character as a subscript we must make it unsigned.  */
1661 #define TRANSLATE(d) (TRANSLATE_P (translate) ? RE_TRANSLATE (d) : (d))
1662
1663 /* Macros for outputting the compiled pattern into `buffer'.  */
1664
1665 /* If the buffer isn't allocated when it comes in, use this.  */
1666 #define INIT_BUF_SIZE  32
1667
1668 /* Make sure we have at least N more bytes of space in buffer.  */
1669 #define GET_BUFFER_SPACE(n)                                             \
1670     while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated)  \
1671       EXTEND_BUFFER ()
1672
1673 /* Make sure we have one more byte of buffer space and then add C to it.  */
1674 #define BUF_PUSH(c)                                                     \
1675   do {                                                                  \
1676     GET_BUFFER_SPACE (1);                                               \
1677     *buf_end++ = (unsigned char) (c);                                   \
1678   } while (0)
1679
1680 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1681 #define BUF_PUSH_2(c1, c2)                                              \
1682   do {                                                                  \
1683     GET_BUFFER_SPACE (2);                                               \
1684     *buf_end++ = (unsigned char) (c1);                                  \
1685     *buf_end++ = (unsigned char) (c2);                                  \
1686   } while (0)
1687
1688 /* As with BUF_PUSH_2, except for three bytes.  */
1689 #define BUF_PUSH_3(c1, c2, c3)                                          \
1690   do {                                                                  \
1691     GET_BUFFER_SPACE (3);                                               \
1692     *buf_end++ = (unsigned char) (c1);                                  \
1693     *buf_end++ = (unsigned char) (c2);                                  \
1694     *buf_end++ = (unsigned char) (c3);                                  \
1695   } while (0)
1696
1697 /* Store a jump with opcode OP at LOC to location TO.  We store a
1698    relative address offset by the three bytes the jump itself occupies.  */
1699 #define STORE_JUMP(op, loc, to) \
1700   store_op1 (op, loc, (to) - (loc) - 3)
1701
1702 /* Likewise, for a two-argument jump.  */
1703 #define STORE_JUMP2(op, loc, to, arg) \
1704   store_op2 (op, loc, (to) - (loc) - 3, arg)
1705
1706 /* Like `STORE_JUMP', but for inserting.  Assume `buf_end' is the
1707    buffer end.  */
1708 #define INSERT_JUMP(op, loc, to) \
1709   insert_op1 (op, loc, (to) - (loc) - 3, buf_end)
1710
1711 /* Like `STORE_JUMP2', but for inserting.  Assume `buf_end' is the
1712    buffer end.  */
1713 #define INSERT_JUMP2(op, loc, to, arg) \
1714   insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end)
1715
1716 /* This is not an arbitrary limit: the arguments which represent offsets
1717    into the pattern are two bytes long.  So if 2^16 bytes turns out to
1718    be too small, many things would have to change.  */
1719 #define MAX_BUF_SIZE (1L << 16)
1720
1721 /* Extend the buffer by twice its current size via realloc and
1722    reset the pointers that pointed into the old block to point to the
1723    correct places in the new one.  If extending the buffer results in it
1724    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1725 #define EXTEND_BUFFER()                                                 \
1726         do {                                                            \
1727                 re_char *old_buffer = bufp->buffer;                     \
1728                 if (bufp->allocated == MAX_BUF_SIZE)                    \
1729                         return REG_ESIZE;                               \
1730                 bufp->allocated <<= 1;                                  \
1731                 if (bufp->allocated > MAX_BUF_SIZE)                     \
1732                         bufp->allocated = MAX_BUF_SIZE;                 \
1733                 bufp->buffer = (unsigned char *)xrealloc(               \
1734                         bufp->buffer, bufp->allocated);                 \
1735                 if (bufp->buffer == NULL) {                             \
1736                         return REG_ESPACE;                              \
1737                 }                                                       \
1738                 /* If the buffer moved, move all the pointers into it.  */ \
1739                 if (old_buffer != bufp->buffer) {                       \
1740                         buf_end = (buf_end - old_buffer) + bufp->buffer; \
1741                         begalt = (begalt - old_buffer) + bufp->buffer;  \
1742                         if (fixup_alt_jump) {                           \
1743                                 fixup_alt_jump =                        \
1744                                         (fixup_alt_jump - old_buffer) + \
1745                                         bufp->buffer;                   \
1746                         }                                               \
1747                         if (laststart) {                                \
1748                                 laststart = (laststart - old_buffer) +  \
1749                                         bufp->buffer;                   \
1750                         }                                               \
1751                         if (pending_exact) {                            \
1752                                 pending_exact =                         \
1753                                         (pending_exact - old_buffer) +  \
1754                                         bufp->buffer;                   \
1755                         }                                               \
1756                 }                                                       \
1757         } while (0)
1758
1759 /* Since we have one byte reserved for the register number argument to
1760    {start,stop}_memory, the maximum number of groups we can report
1761    things about is what fits in that byte.  */
1762 #define MAX_REGNUM 255
1763
1764 /* But patterns can have more than `MAX_REGNUM' registers.  We just
1765    ignore the excess.  */
1766 typedef unsigned regnum_t;
1767
1768 #define INIT_REG_TRANSLATE_SIZE 5
1769
1770 /* Macros for the compile stack.  */
1771
1772 /* Since offsets can go either forwards or backwards, this type needs to
1773    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1774 typedef int pattern_offset_t;
1775
1776 typedef struct {
1777         pattern_offset_t begalt_offset;
1778         pattern_offset_t fixup_alt_jump;
1779         pattern_offset_t inner_group_offset;
1780         pattern_offset_t laststart_offset;
1781         regnum_t regnum;
1782 } compile_stack_elt_t;
1783
1784 typedef struct {
1785         compile_stack_elt_t *stack;
1786         unsigned size;
1787         unsigned avail;         /* Offset of next open position.  */
1788 } compile_stack_type;
1789
1790 #define INIT_COMPILE_STACK_SIZE 32
1791
1792 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1793 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1794
1795 /* The next available element.  */
1796 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1797
1798 /* Set the bit for character C in a bit vector.  */
1799 #define SET_LIST_BIT(c)                         \
1800   (buf_end[((unsigned char) (c)) / BYTEWIDTH]   \
1801    |= 1 << (((unsigned char) c) % BYTEWIDTH))
1802
1803 #ifdef MULE
1804
1805 /* Set the "bit" for character C in a range table. */
1806 #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt)
1807
1808 /* Set the "bit" for character c in the appropriate table. */
1809 #define SET_EITHER_BIT(c)                       \
1810         do {                                    \
1811                 if (has_extended_chars)         \
1812                         SET_RANGETAB_BIT (c);   \
1813                 else                            \
1814                         SET_LIST_BIT (c);       \
1815         } while (0)
1816
1817 #else                           /* not MULE */
1818
1819 #define SET_EITHER_BIT(c) SET_LIST_BIT (c)
1820
1821 #endif
1822
1823 /* Get the next unsigned number in the uncompiled pattern.  */
1824 #define GET_UNSIGNED_NUMBER(num)                                        \
1825   { if (p != pend)                                                      \
1826      {                                                                  \
1827        PATFETCH (c);                                                    \
1828        while (ISDIGIT (c))                                              \
1829          {                                                              \
1830            if (num < 0)                                                 \
1831               num = 0;                                                  \
1832            num = num * 10 + c - '0';                                    \
1833            if (p == pend)                                               \
1834               break;                                                    \
1835            PATFETCH (c);                                                \
1836          }                                                              \
1837        }                                                                \
1838     }
1839
1840 #define CHAR_CLASS_MAX_LENGTH  9        /* Namely, `multibyte'.  */
1841
1842 \f
1843 static void store_op1(re_opcode_t op, unsigned char *loc, int arg);
1844 static void store_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2);
1845 static void insert_op1(re_opcode_t op, unsigned char *loc, int arg,
1846                        unsigned char *end);
1847 static void insert_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2,
1848                        unsigned char *end);
1849 static re_bool at_begline_loc_p(re_char*, re_char*, reg_syntax_t);
1850 static re_bool at_endline_loc_p(re_char*, re_char*, int syntax);
1851 static re_bool group_in_compile_stack(compile_stack_type compile_stack,
1852                                       regnum_t regnum);
1853 static reg_errcode_t compile_range(re_char ** p_ptr, re_char * pend,
1854                                    RE_TRANSLATE_TYPE translate,
1855                                    reg_syntax_t syntax, unsigned char *b);
1856 #ifdef MULE
1857 static reg_errcode_t compile_extended_range(re_char ** p_ptr,
1858                                             re_char * pend,
1859                                             RE_TRANSLATE_TYPE translate,
1860                                             reg_syntax_t syntax,
1861                                             Lisp_Object rtab);
1862 #endif                          /* MULE */
1863 static re_bool group_match_null_string_p(unsigned char **p,
1864                                          unsigned char *end,
1865                                          register_info_type * reg_info);
1866 static re_bool alt_match_null_string_p(unsigned char *p, unsigned char *end,
1867                                        register_info_type * reg_info);
1868 static re_bool common_op_match_null_string_p(unsigned char **p,
1869                                              unsigned char *end,
1870                                              register_info_type * reg_info);
1871 static int bcmp_translate(const unsigned char *s1, const unsigned char *s2,
1872                           REGISTER int len, RE_TRANSLATE_TYPE translate);
1873 static int re_match_2_internal(struct re_pattern_buffer *bufp,
1874                                re_char * string1, int size1,
1875                                re_char * string2, int size2, int pos,
1876                                struct re_registers *regs, int stop);
1877 \f
1878 #ifndef MATCH_MAY_ALLOCATE
1879
1880 /* If we cannot allocate large objects within re_match_2_internal,
1881    we make the fail stack and register vectors global.
1882    The fail stack, we grow to the maximum size when a regexp
1883    is compiled.
1884    The register vectors, we adjust in size each time we
1885    compile a regexp, according to the number of registers it needs.  */
1886
1887 static fail_stack_type fail_stack;
1888
1889 /* Size with which the following vectors are currently allocated.
1890    That is so we can make them bigger as needed,
1891    but never make them smaller.  */
1892 static int regs_allocated_size;
1893
1894 static re_char **regstart, **regend;
1895 static re_char **old_regstart, **old_regend;
1896 static re_char **best_regstart, **best_regend;
1897 static register_info_type *reg_info;
1898 static re_char **reg_dummy;
1899 static register_info_type *reg_info_dummy;
1900
1901 /* Make the register vectors big enough for NUM_REGS registers,
1902    but don't make them smaller.  */
1903
1904 static void regex_grow_registers(int num_regs)
1905 {
1906         if (num_regs > regs_allocated_size) {
1907                 RETALLOC_IF(regstart, num_regs, re_char *);
1908                 RETALLOC_IF(regend, num_regs, re_char *);
1909                 RETALLOC_IF(old_regstart, num_regs, re_char *);
1910                 RETALLOC_IF(old_regend, num_regs, re_char *);
1911                 RETALLOC_IF(best_regstart, num_regs, re_char *);
1912                 RETALLOC_IF(best_regend, num_regs, re_char *);
1913                 RETALLOC_IF(reg_info, num_regs, register_info_type);
1914                 RETALLOC_IF(reg_dummy, num_regs, re_char *);
1915                 RETALLOC_IF(reg_info_dummy, num_regs, register_info_type);
1916
1917                 regs_allocated_size = num_regs;
1918         }
1919 }
1920
1921 #endif                          /* not MATCH_MAY_ALLOCATE */
1922
1923 /* auxiliary stuff, FSF theft */
1924 /* Character classes.  */
1925 typedef enum {
1926         RECC_ERROR = 0,
1927         RECC_ALNUM, RECC_ALPHA, RECC_WORD,
1928         RECC_GRAPH, RECC_PRINT,
1929         RECC_LOWER, RECC_UPPER,
1930         RECC_PUNCT, RECC_CNTRL,
1931         RECC_DIGIT, RECC_XDIGIT,
1932         RECC_BLANK, RECC_SPACE,
1933         RECC_MULTIBYTE, RECC_NONASCII,
1934         RECC_ASCII, RECC_UNIBYTE
1935 } re_wctype_t;
1936
1937 /* Map a string to the char class it names (if any).  */
1938 static inline re_wctype_t
1939 re_wctype(re_char *str)
1940 {
1941         const char *string = (const char*)str;
1942
1943         if (STREQ (string, "alnum")) {
1944                 return RECC_ALNUM;
1945         } else if (STREQ (string, "alpha")) {
1946                 return RECC_ALPHA;
1947         } else if (STREQ (string, "digit")) {
1948                 return RECC_DIGIT;
1949         } else if (STREQ (string, "graph")) {
1950                 return RECC_GRAPH;
1951         } else if (STREQ (string, "space")) {
1952                 return RECC_SPACE;
1953         } else if (STREQ (string, "upper")) {
1954                 return RECC_UPPER;
1955         } else if (STREQ (string, "lower")) {
1956                 return RECC_LOWER;
1957         } else if (STREQ (string, "word")) {
1958                 return RECC_WORD;
1959         } else if (STREQ (string, "print")) {
1960                 return RECC_PRINT;
1961         } else if (STREQ (string, "punct")) {
1962                 return RECC_PUNCT;
1963         } else if (STREQ (string, "xdigit")) {
1964                 return RECC_XDIGIT;
1965         } else if (STREQ (string, "cntrl")) {
1966                 return RECC_CNTRL;
1967         } else if (STREQ (string, "blank")) {
1968                 return RECC_BLANK;
1969         } else if (STREQ (string, "ascii")) {
1970                 return RECC_ASCII;
1971         } else if (STREQ (string, "nonascii")) {
1972                 return RECC_NONASCII;
1973         } else if (STREQ (string, "unibyte")) {
1974                 return RECC_UNIBYTE;
1975         } else if (STREQ (string, "multibyte")) {
1976                 return RECC_MULTIBYTE;
1977         } else {
1978                 return RECC_ERROR;
1979         }
1980 }
1981
1982 /* True if CH is in the char class CC.  */
1983 static inline bool
1984 re_iswctype(int ch, re_wctype_t cc)
1985 {
1986         switch (cc) {
1987         case RECC_ALNUM:
1988                 return ISALNUM (ch);
1989         case RECC_ALPHA:
1990                 return ISALPHA (ch);
1991         case RECC_BLANK:
1992                 return ISBLANK (ch);
1993         case RECC_CNTRL:
1994                 return ISCNTRL (ch);
1995         case RECC_DIGIT:
1996                 return ISDIGIT (ch);
1997         case RECC_GRAPH:
1998                 return ISGRAPH (ch);
1999         case RECC_LOWER:
2000                 return ISLOWER (ch);
2001         case RECC_PRINT:
2002                 return ISPRINT (ch);
2003         case RECC_PUNCT:
2004                 return ISPUNCT (ch);
2005         case RECC_SPACE:
2006                 return ISSPACE (ch);
2007         case RECC_UPPER:
2008                 return ISUPPER (ch);
2009         case RECC_XDIGIT:
2010                 return ISXDIGIT (ch);
2011         case RECC_ASCII:
2012                 return IS_REAL_ASCII (ch);
2013         case RECC_NONASCII:
2014                 return !IS_REAL_ASCII (ch);
2015         case RECC_WORD:
2016                 return ISWORD(ch);
2017         case RECC_UNIBYTE:
2018                 return ISUNIBYTE((unsigned int)ch);
2019         case RECC_MULTIBYTE:
2020                 return !ISUNIBYTE((unsigned int)ch);
2021         case RECC_ERROR:
2022                 return false;
2023         default:
2024                 abort();
2025         }
2026         return false;
2027 }
2028
2029 \f
2030 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
2031    Returns one of error codes defined in `regex.h', or zero for success.
2032
2033    Assumes the `allocated' (and perhaps `buffer') and `translate'
2034    fields are set in BUFP on entry.
2035
2036    If it succeeds, results are put in BUFP (if it returns an error, the
2037    contents of BUFP are undefined):
2038      `buffer' is the compiled pattern;
2039      `syntax' is set to SYNTAX;
2040      `used' is set to the length of the compiled pattern;
2041      `fastmap_accurate' is zero;
2042      `re_ngroups' is the number of groups/subexpressions (including shy
2043         groups) in PATTERN;
2044      `re_nsub' is the number of non-shy groups in PATTERN;
2045      `not_bol' and `not_eol' are zero;
2046
2047    The `fastmap' and `newline_anchor' fields are neither
2048    examined nor set.  */
2049
2050 static reg_errcode_t
2051 regex_compile(re_char * pattern, int size, reg_syntax_t syntax,
2052               struct re_pattern_buffer *bufp)
2053 {
2054         /* We fetch characters from PATTERN here.  We declare these as int
2055            (or possibly long) so that chars above 127 can be used as
2056            array indices.  The macros that fetch a character from the pattern
2057            make sure to coerce to unsigned char before assigning, so we won't
2058            get bitten by negative numbers here. */
2059         /* XEmacs change: used to be unsigned char. */
2060         REGISTER EMACS_INT c, c1;
2061
2062         /* A random temporary spot in PATTERN.  */
2063         re_char *p1;
2064
2065         /* Points to the end of the buffer, where we should append.  */
2066         REGISTER unsigned char *buf_end;
2067
2068         /* Keeps track of unclosed groups.  */
2069         compile_stack_type compile_stack;
2070
2071         /* Points to the current (ending) position in the pattern.  */
2072         re_char *p = pattern;
2073         re_char *pend = pattern + size;
2074
2075         /* How to translate the characters in the pattern.  */
2076         RE_TRANSLATE_TYPE translate = bufp->translate;
2077
2078         /* Address of the count-byte of the most recently inserted `exactn'
2079            command.  This makes it possible to tell if a new exact-match
2080            character can be added to that command or if the character requires
2081            a new `exactn' command.  */
2082         unsigned char *pending_exact = 0;
2083
2084         /* Address of start of the most recently finished expression.
2085            This tells, e.g., postfix * where to find the start of its
2086            operand.  Reset at the beginning of groups and alternatives.  */
2087         unsigned char *laststart = 0;
2088
2089         /* Address of beginning of regexp, or inside of last group.  */
2090         unsigned char *begalt;
2091
2092         /* Place in the uncompiled pattern (i.e., the {) to
2093            which to go back if the interval is invalid.  */
2094         re_char *beg_interval;
2095
2096         /* Address of the place where a forward jump should go to the end of
2097            the containing expression.  Each alternative of an `or' -- except the
2098            last -- ends with a forward jump of this sort.  */
2099         unsigned char *fixup_alt_jump = 0;
2100
2101         /* Counts open-groups as they are encountered.  Remembered for the
2102            matching close-group on the compile stack, so the same register
2103            number is put in the stop_memory as the start_memory.  */
2104         regnum_t regnum = 0;
2105
2106         /* we need to close over compile_stack */
2107 #       define free_stack(args...)      xfree(compile_stack.stack)
2108
2109 #ifdef REGEX_DEBUG_FLAG
2110         DEBUG_PRINT1("\nCompiling pattern: ");
2111         if (debug) {
2112                 int debug_count;
2113
2114                 for (debug_count = 0; debug_count < size; debug_count++)
2115                         putchar(pattern[debug_count]);
2116                 putchar('\n');
2117         }
2118 #endif                          /* DEBUG */
2119
2120         /* Initialize the compile stack.  */
2121         compile_stack.stack =
2122                 TALLOC(INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2123         if (compile_stack.stack == NULL) {
2124                 return REG_ESPACE;
2125         }
2126         compile_stack.size = INIT_COMPILE_STACK_SIZE;
2127         compile_stack.avail = 0;
2128
2129         /* Initialize the pattern buffer.  */
2130         bufp->syntax = syntax;
2131         bufp->fastmap_accurate = 0;
2132         bufp->not_bol = bufp->not_eol = 0;
2133
2134         /* Set `used' to zero, so that if we return an error, the pattern
2135            printer (for debugging) will think there's no pattern.  We reset it
2136            at the end.  */
2137         bufp->used = 0;
2138
2139         /* Always count groups, whether or not bufp->no_sub is set.  */
2140         bufp->re_nsub = 0;
2141         bufp->re_ngroups = 0;
2142
2143         /* Allocate index translation array if needed. */
2144         if (bufp->external_to_internal_register == 0) {
2145                 bufp->external_to_internal_register_size =
2146                     INIT_REG_TRANSLATE_SIZE;
2147                 RETALLOC(bufp->external_to_internal_register,
2148                          bufp->external_to_internal_register_size, int);
2149         }
2150         /* Initialize translations to impossible value to aid debugging. */
2151         {
2152                 int i;
2153
2154                 bufp->external_to_internal_register[0] = 0;
2155                 for (i = 1; i < bufp->external_to_internal_register_size; i++)
2156                         bufp->external_to_internal_register[i] =
2157                             (int)0xDEADBEEF;
2158         }
2159
2160 #if !defined (emacs) && !defined (SYNTAX_TABLE)
2161         /* Initialize the syntax table.  */
2162         init_syntax_once();
2163 #endif
2164
2165         if (bufp->allocated == 0) {
2166                 if (bufp->buffer) {
2167                         /* If zero allocated, but buffer is non-null, try to
2168                            realloc enough space.  This loses if buffer's address
2169                            is bogus, but that is the user's responsibility.  */
2170                         RETALLOC(bufp->buffer, INIT_BUF_SIZE, unsigned char);
2171                 } else {
2172                         /* Caller did not allocate a buffer.  Do it for them. */
2173                         bufp->buffer = TALLOC(INIT_BUF_SIZE, unsigned char);
2174                 }
2175                 if (!bufp->buffer) {
2176                         free_stack();
2177                         return REG_ESPACE;
2178                 }
2179
2180                 bufp->allocated = INIT_BUF_SIZE;
2181         }
2182
2183         begalt = buf_end = bufp->buffer;
2184
2185         /* Loop through the uncompiled pattern until we're at the end.  */
2186         while (p != pend) {
2187                 PATFETCH(c);
2188
2189                 switch (c) {
2190                 case '^': {
2191                         if (    /* If at start of pattern, it's an operator.  */
2192                                 p == pattern + 1
2193                                 /* If context independent, it's an operator.  */
2194                                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2195                                 /* Otherwise, depends on what's come before.  */
2196                                 || at_begline_loc_p(pattern, p, syntax))
2197                                 BUF_PUSH(begline);
2198                         else
2199                                 goto normal_char;
2200                 }
2201                         break;
2202
2203                 case '$': {
2204                         if (    /* If at end of pattern, it's an
2205                                    operator.  */
2206                                 p == pend
2207                                 /* If context independent, it's an
2208                                    operator.  */
2209                                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2210                                 /* Otherwise, depends on what's
2211                                    next.  */
2212                                 || at_endline_loc_p(p, pend, syntax))
2213                                 BUF_PUSH(endline);
2214                         else
2215                                 goto normal_char;
2216                 }
2217                         break;
2218
2219                 case '+':
2220                 case '?':
2221                         if ((syntax & RE_BK_PLUS_QM) ||
2222                             (syntax & RE_LIMITED_OPS)) {
2223                                 goto normal_char;
2224                         }
2225                         /* pardon? */
2226                 handle_plus:
2227                 case '*': {
2228                         re_bool zero_times_ok;
2229                         re_bool many_times_ok;
2230                         re_bool minimal;
2231
2232                         /* If there is no previous pattern... */
2233                         if (!laststart) {
2234                                 if (syntax & RE_CONTEXT_INVALID_OPS) {
2235                                         free_stack();
2236                                         return REG_BADRPT;
2237                                 } else if (!(syntax & RE_CONTEXT_INDEP_OPS)) {
2238                                         goto normal_char;
2239                                 }
2240                         }
2241
2242                         /* true means zero/many matches are allowed. */
2243                         zero_times_ok = c != '+';
2244                         many_times_ok = c != '?';
2245
2246                         /* true means match shortest string possible. */
2247                         minimal = false;
2248
2249                         /* If there is a sequence of repetition chars, collapse it
2250                            down to just one (the right one).  We can't combine
2251                            interval operators with these because of, e.g., `a{2}*',
2252                            which should only match an even number of `a's.  */
2253                         while (p != pend) {
2254                                 PATFETCH(c);
2255
2256                                 if (c == '*'
2257                                     || (!(syntax & RE_BK_PLUS_QM)
2258                                         && (c == '+' || c == '?'))) ;
2259
2260                                 else if (syntax & RE_BK_PLUS_QM
2261                                          && c == '\\') {
2262                                         if (p == pend) {
2263                                                 free_stack();
2264                                                 return REG_EESCAPE;
2265                                         }
2266
2267                                         PATFETCH(c1);
2268                                         if (!(c1 == '+' || c1 == '?')) {
2269                                                 PATUNFETCH;
2270                                                 PATUNFETCH;
2271                                                 break;
2272                                         }
2273
2274                                         c = c1;
2275                                 } else {
2276                                         PATUNFETCH;
2277                                         break;
2278                                 }
2279
2280                                 /* If we get here, we found another repeat
2281                                    character.  */
2282                                 if (!(syntax & RE_NO_MINIMAL_MATCHING)) {
2283                                         /* "*?" and "+?" and "??" are okay (and
2284                                            mean match minimally), but other
2285                                            sequences (such as "*??" and "+++")
2286                                            are rejected (reserved for future
2287                                            use). */
2288                                         if (minimal || c != '?') {
2289                                                 free_stack();
2290                                                 return REG_BADRPT;
2291                                         }
2292                                         minimal = true;
2293                                 } else {
2294                                         zero_times_ok |= c != '+';
2295                                         many_times_ok |= c != '?';
2296                                 }
2297                         }
2298
2299                         /* Star, etc. applied to an empty pattern is equivalent
2300                            to an empty pattern.  */
2301                         if (!laststart) {
2302                                 break;
2303                         }
2304
2305                         /* Now we know whether zero matches is allowed
2306                            and whether two or more matches is allowed
2307                            and whether we want minimal or maximal matching. */
2308                         if (minimal) {
2309                                 if (!many_times_ok) {
2310                                         /* "a??" becomes:
2311                                            0: /on_failure_jump to 6
2312                                            3: /jump to 9
2313                                            6: /exactn/1/A
2314                                            9: end of pattern.
2315                                         */
2316                                         GET_BUFFER_SPACE(6);
2317                                         INSERT_JUMP(jump, laststart,
2318                                                     buf_end + 3);
2319                                         buf_end += 3;
2320                                         INSERT_JUMP(on_failure_jump,
2321                                                     laststart,
2322                                                     laststart + 6);
2323                                         buf_end += 3;
2324                                 } else if (zero_times_ok) {
2325                                         /* "a*?" becomes:
2326                                            0: /jump to 6
2327                                            3: /exactn/1/A
2328                                            6: /on_failure_jump to 3
2329                                            9: end of pattern.
2330                                         */
2331                                         GET_BUFFER_SPACE(6);
2332                                         INSERT_JUMP(jump, laststart,
2333                                                     buf_end + 3);
2334                                         buf_end += 3;
2335                                         STORE_JUMP(on_failure_jump,
2336                                                    buf_end,
2337                                                    laststart + 3);
2338                                         buf_end += 3;
2339                                 } else {
2340                                         /* "a+?" becomes:
2341                                            0: /exactn/1/A
2342                                            3: /on_failure_jump to 0
2343                                            6: end of pattern.
2344                                         */
2345                                         GET_BUFFER_SPACE(3);
2346                                         STORE_JUMP(on_failure_jump,
2347                                                    buf_end, laststart);
2348                                         buf_end += 3;
2349                                 }
2350                         } else {
2351                                 /* Are we optimizing this jump?  */
2352                                 re_bool keep_string_p = false;
2353
2354                                 if (many_times_ok) {
2355                                         /* More than one repetition is allowed,
2356                                            so put in at the end a backward
2357                                            relative jump from `buf_end' to
2358                                            before the next jump we're going to
2359                                            put in below (which jumps from
2360                                            laststart to after this jump).
2361
2362                                            But if we are at the `*' in the exact
2363                                            sequence `.*\n', insert an
2364                                            unconditional jump backwards to the
2365                                            ., instead of the beginning of the
2366                                            loop.  This way we only push a
2367                                            failure point once, instead of every
2368                                            time through the loop.  */
2369                                         assert(p - 1 > pattern);
2370
2371                                         /* Allocate the space for the jump.  */
2372                                         GET_BUFFER_SPACE(3);
2373
2374                                         /* We know we are not at the first
2375                                            character of the pattern, because
2376                                            laststart was nonzero.  And we've
2377                                            already incremented `p', by the way,
2378                                            to be the character after the `*'.
2379                                            Do we have to do something analogous
2380                                            here for null bytes, because of
2381                                            RE_DOT_NOT_NULL? */
2382                                         if (*(p - 2) == '.' &&
2383                                             zero_times_ok &&
2384                                             p < pend &&
2385                                             *p == '\n' &&
2386                                             !(syntax & RE_DOT_NEWLINE)) {
2387                                                 /* We have .*\n.  */
2388                                                 STORE_JUMP(jump,
2389                                                            buf_end,
2390                                                            laststart);
2391                                                 keep_string_p = true;
2392                                         } else {
2393                                                 /* Anything else.  */
2394                                                 STORE_JUMP
2395                                                         (maybe_pop_jump,
2396                                                          buf_end,
2397                                                          laststart - 3);
2398                                         }
2399                                         /* We've added more stuff to the
2400                                            buffer.  */
2401                                         buf_end += 3;
2402                                 }
2403
2404                                 /* On failure, jump from laststart to buf_end +
2405                                    3, which will be the end of the buffer after
2406                                    this jump is inserted.  */
2407                                 GET_BUFFER_SPACE(3);
2408                                 INSERT_JUMP(keep_string_p ?
2409                                             on_failure_keep_string_jump
2410                                             : on_failure_jump,
2411                                             laststart, buf_end + 3);
2412                                 buf_end += 3;
2413
2414                                 if (!zero_times_ok) {
2415                                         /* At least one repetition is required,
2416                                            so insert a `dummy_failure_jump'
2417                                            before the initial `on_failure_jump'
2418                                            instruction of the loop. This effects
2419                                            a skip over that instruction the
2420                                            first time we hit that loop.  */
2421                                         GET_BUFFER_SPACE(3);
2422                                         INSERT_JUMP(dummy_failure_jump,
2423                                                     laststart,
2424                                                     laststart + 6);
2425                                         buf_end += 3;
2426                                 }
2427                         }
2428                         pending_exact = 0;
2429                 }
2430                         break;
2431
2432                 case '.':
2433                         laststart = buf_end;
2434                         BUF_PUSH(anychar);
2435                         break;
2436
2437                 case '[': {
2438                         /* XEmacs change: this whole section */
2439                         re_bool had_char_class = false;
2440 #ifdef MULE
2441                         re_bool has_extended_chars = false;
2442                         REGISTER Lisp_Object rtab = Qnil;
2443 #endif
2444
2445                         if (p == pend) {
2446                                 free_stack();
2447                                 return REG_EBRACK;
2448                         }
2449
2450                         /* Ensure that we have enough space to push a charset:
2451                            the opcode, the length count, and the bitset; 34
2452                            bytes in all.  */
2453                         GET_BUFFER_SPACE(34);
2454
2455                         laststart = buf_end;
2456
2457                         /* We test `*p == '^' twice, instead of using an if
2458                            statement, so we only need one BUF_PUSH.  */
2459                         BUF_PUSH(*p == '^' ? charset_not : charset);
2460                         if (*p == '^')
2461                                 p++;
2462
2463                         /* Remember the first position in the bracket
2464                            expression.  */
2465                         p1 = p;
2466
2467                         /* Push the number of bytes in the bitmap.  */
2468                         BUF_PUSH((1 << BYTEWIDTH) / BYTEWIDTH);
2469
2470                         /* Clear the whole map.  */
2471                         memset(buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
2472
2473                         /* charset_not matches newline according to a syntax
2474                            bit.  */
2475                         if ((re_opcode_t) buf_end[-2] == charset_not
2476                             && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2477                                 SET_LIST_BIT('\n');
2478
2479 #ifdef MULE
2480                         start_over_with_extended:
2481                         if (has_extended_chars) {
2482                                 /* There are extended chars here, which means we
2483                                    need to start over and shift to unified
2484                                    range-table format. */
2485                                 if (buf_end[-2] == charset)
2486                                         buf_end[-2] = charset_mule;
2487                                 else
2488                                         buf_end[-2] = charset_mule_not;
2489                                 buf_end--;
2490                                 p = p1;
2491                                 /* go back to the beginning of the charset,
2492                                    after a possible ^. */
2493                                 rtab = Vthe_lisp_rangetab;
2494                                 Fclear_range_table(rtab);
2495
2496                                 /* charset_not matches newline according to a
2497                                    syntax bit.  */
2498                                 if ((re_opcode_t) buf_end[-1] ==
2499                                     charset_mule_not
2500                                     && (syntax &
2501                                         RE_HAT_LISTS_NOT_NEWLINE))
2502                                         SET_EITHER_BIT('\n');
2503                         }
2504 #endif                          /* MULE */
2505
2506                                 /* Read in characters and ranges, setting map bits.  */
2507                         for (;;) {
2508                                 if (p == pend) {
2509                                         free_stack();
2510                                         return REG_EBRACK;
2511                                 }
2512                                 PATFETCH(c);
2513
2514 #ifdef MULE
2515                                 if (c >= 0x80 && !has_extended_chars) {
2516                                         has_extended_chars = 1;
2517                                         /* Frumble-bumble, we've found some
2518                                            extended chars.  Need to start over,
2519                                            process everything using the general
2520                                            extended-char mechanism, and need to
2521                                            use charset_mule and charset_mule_not
2522                                            instead of charset and
2523                                            charset_not. */
2524                                         goto start_over_with_extended;
2525                                 }
2526 #endif                          /* MULE */
2527                                 /* \ might escape characters inside [...] and
2528                                    [^...].  */
2529                                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) &&
2530                                     c == '\\') {
2531                                         if (p == pend) {
2532                                                 free_stack();
2533                                                 return REG_EESCAPE;
2534                                         }
2535
2536                                         PATFETCH(c1);
2537 #ifdef MULE
2538                                         if (c1 >= 0x80
2539                                             && !has_extended_chars) {
2540                                                 has_extended_chars = 1;
2541                                                 goto start_over_with_extended;
2542                                         }
2543 #endif                          /* MULE */
2544                                         SET_EITHER_BIT(c1);
2545                                         continue;
2546                                 }
2547
2548                                 /* Could be the end of the bracket
2549                                    expression.  If it's not (i.e., when
2550                                    the bracket expression is `[]' so
2551                                    far), the ']' character bit gets set
2552                                    way below.  */
2553                                 if (c == ']' && p != p1 + 1)
2554                                         break;
2555
2556                                 /* Look ahead to see if it's a range
2557                                    when the last thing was a character
2558                                    class.  */
2559                                 if (had_char_class && c == '-'
2560                                     && *p != ']') {
2561                                         free_stack();
2562                                         return REG_ERANGE;
2563                                 }
2564
2565                                 /* Look ahead to see if it's a range
2566                                    when the last thing was a character:
2567                                    if this is a hyphen not at the
2568                                    beginning or the end of a list, then
2569                                    it's the range operator.  */
2570                                 if (c == '-'
2571                                     && !(p - 2 >= pattern
2572                                          && p[-2] == '[')
2573                                     && !(p - 3 >= pattern
2574                                          && p[-3] == '['
2575                                          && p[-2] == '^')
2576                                     && *p != ']') {
2577                                         reg_errcode_t ret;
2578
2579 #ifdef MULE
2580                                         if (*(const unsigned char *)p >= 0x80
2581                                             && !has_extended_chars) {
2582                                                 has_extended_chars = 1;
2583                                                 goto start_over_with_extended;
2584                                         }
2585                                         if (has_extended_chars)
2586                                                 ret = compile_extended_range
2587                                                         (&p, pend, translate,
2588                                                          syntax, rtab);
2589                                         else
2590 #endif                          /* MULE */
2591                                                 ret = compile_range
2592                                                         (&p, pend, translate,
2593                                                          syntax, buf_end);
2594                                         if (ret != REG_NOERROR) {
2595                                                 free_stack();
2596                                                 return ret;
2597                                         }
2598                                 }
2599
2600                                 else if (p[0] == '-' && p[1] != ']') {
2601                                         /* This handles ranges made up of
2602                                            characters only.  */
2603                                         reg_errcode_t ret;
2604
2605                                         /* Move past the `-'.  */
2606                                         PATFETCH(c1);
2607
2608 #ifdef MULE
2609                                         if (*(const unsigned char*)p >= 0x80
2610                                             && !has_extended_chars) {
2611                                                 has_extended_chars = 1;
2612                                                 goto start_over_with_extended;
2613                                         }
2614                                         if (has_extended_chars)
2615                                                 ret = compile_extended_range(
2616                                                         &p, pend, translate,
2617                                                         syntax, rtab);
2618                                         else
2619 #endif                          /* MULE */
2620                                                 ret = compile_range(
2621                                                         &p, pend, translate,
2622                                                         syntax, buf_end);
2623                                         if (ret != REG_NOERROR) {
2624                                                 free_stack();
2625                                                 return ret;
2626                                         }
2627                                 }
2628
2629                                 /* See if we're at the beginning of a possible
2630                                    character class.  */
2631
2632                                 else if (syntax & RE_CHAR_CLASSES &&
2633                                          c == '[' && *p == ':') {
2634                                         /* Leave room for the null.  */
2635                                         char str[CHAR_CLASS_MAX_LENGTH + 1];
2636
2637                                         PATFETCH(c);
2638                                         c1 = 0;
2639
2640                                         /* If pattern is `[[:'.  */
2641                                         if (p == pend) {
2642                                                 free_stack();
2643                                                 return REG_EBRACK;
2644                                         }
2645                                         for (;;) {
2646                                                 /* #### This code is unused.
2647                                                    Correctness is not checked
2648                                                    after TRT table change.  */
2649                                                 PATFETCH(c);
2650                                                 if (c == ':' || c == ']'
2651                                                     || p == pend
2652                                                     || c1 ==
2653                                                     CHAR_CLASS_MAX_LENGTH)
2654                                                         break;
2655                                                 str[c1++] = (char)c;
2656                                         }
2657                                         str[c1] = '\0';
2658
2659                                         /* If isn't a word bracketed by
2660                                            `[:' and `:]': undo the
2661                                            ending character, the
2662                                            letters, and leave the
2663                                            leading `:' and `[' (but set
2664                                            bits for them).  */
2665                                         if (c == ':' && *p == ']') {
2666                                                 re_wctype_t wct =
2667                                                         re_wctype(
2668                                                                 (unsigned char*)
2669                                                                 str);
2670
2671                                                 if (wct == RECC_ERROR) {
2672                                                         free_stack();
2673                                                         return REG_ECTYPE;
2674                                                 }
2675
2676                                                 /* Throw away the ] at
2677                                                    the end of the
2678                                                    character class.  */
2679                                                 PATFETCH(c);
2680
2681                                                 if (p == pend) {
2682                                                         free_stack();
2683                                                         return REG_EBRACK;
2684                                                 }
2685
2686                                                 for (int ch = 0;
2687                                                      ch < 1<<BYTEWIDTH;
2688                                                      ch++) {
2689                                                         /* rmsmacs has
2690                                                            this */
2691 #if 0
2692                                                         int translated =
2693                                                                 TRANSLATE(ch);
2694                                                         if (translated <
2695                                                             (1 << BYTEWIDTH) &&
2696                                                             re_iswctype(ch, wct)) {
2697                                                                 SET_EITHER_BIT(ch);
2698                                                         }
2699 #else
2700                                                         if (re_iswctype(ch, wct)) {
2701                                                                 SET_EITHER_BIT(ch);
2702                                                         }
2703 #endif
2704                                                 }
2705                                                 had_char_class = true;
2706                                         } else {
2707                                                 c1++;
2708                                                 while (c1--) {
2709                                                         PATUNFETCH;
2710                                                 }
2711                                                 SET_EITHER_BIT('[');
2712                                                 SET_EITHER_BIT(':');
2713                                                 had_char_class = false;
2714                                         }
2715                                 } else {
2716                                         had_char_class = false;
2717                                         SET_EITHER_BIT(c);
2718                                 }
2719                         }
2720
2721 #ifdef MULE
2722                         if (has_extended_chars) {
2723                                 /* We have a range table, not a bit vector. */
2724                                 int bytes_needed =
2725                                         unified_range_table_bytes_needed
2726                                         (rtab);
2727                                 GET_BUFFER_SPACE(bytes_needed);
2728                                 unified_range_table_copy_data(rtab,
2729                                                               buf_end);
2730                                 buf_end +=
2731                                         unified_range_table_bytes_used
2732                                         (buf_end);
2733                                 break;
2734                         }
2735 #endif                          /* MULE */
2736                                 /* Discard any (non)matching list bytes that are
2737                                    all 0 at the end of the map.  Decrease the
2738                                    map-length byte too.  */
2739                         while ((int)buf_end[-1] > 0
2740                                && buf_end[buf_end[-1] - 1] == 0)
2741                                 buf_end[-1]--;
2742                         buf_end += buf_end[-1];
2743                 }
2744                         break;
2745
2746                 case '(':
2747                         if (syntax & RE_NO_BK_PARENS)
2748                                 goto handle_open;
2749                         else
2750                                 goto normal_char;
2751
2752                 case ')':
2753                         if (syntax & RE_NO_BK_PARENS)
2754                                 goto handle_close;
2755                         else
2756                                 goto normal_char;
2757
2758                 case '\n':
2759                         if (syntax & RE_NEWLINE_ALT)
2760                                 goto handle_alt;
2761                         else
2762                                 goto normal_char;
2763
2764                 case '|':
2765                         if (syntax & RE_NO_BK_VBAR)
2766                                 goto handle_alt;
2767                         else
2768                                 goto normal_char;
2769
2770                 case '{':
2771                         if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2772                                 goto handle_interval;
2773                         else
2774                                 goto normal_char;
2775
2776                 case '\\':
2777                         if (p == pend) {
2778                                 free_stack();
2779                                 return REG_EESCAPE;
2780                         }
2781
2782                         /* Do not translate the character after the \, so that we can
2783                            distinguish, e.g., \B from \b, even if we normally would
2784                            translate, e.g., B to b.  */
2785                         PATFETCH_RAW(c);
2786
2787                         switch (c) {
2788                         case '(':
2789                                 if (syntax & RE_NO_BK_PARENS)
2790                                         goto normal_backslash;
2791
2792                               handle_open:
2793                                 {
2794                                         regnum_t r;
2795                                         int shy = 0;
2796
2797                                         if (!(syntax & RE_NO_SHY_GROUPS)
2798                                             && p != pend && *p == '?') {
2799                                                 p++;
2800                                                 PATFETCH(c);
2801                                                 switch (c) {
2802                                                 case ':':
2803                                                         /* shy groups */
2804                                                         shy = 1;
2805                                                         break;
2806
2807                                                         /* All others are
2808                                                            reserved for future
2809                                                            constructs. */
2810                                                 default:
2811                                                         free_stack();
2812                                                         return REG_BADPAT;
2813                                                 }
2814                                         }
2815
2816                                         r = ++regnum;
2817                                         bufp->re_ngroups++;
2818                                         if (!shy) {
2819                                                 /* Record the translation from capturing group index to
2820                                                    register number, reallocating table as needed. */
2821                                                 bufp->re_nsub++;
2822                                                 while (bufp->
2823                                                        external_to_internal_register_size
2824                                                        <= bufp->re_nsub) {
2825                                                         int i;
2826                                                         int old_size =
2827                                                             bufp->
2828                                                             external_to_internal_register_size;
2829                                                         bufp->
2830                                                             external_to_internal_register_size
2831                                                             += 5;
2832                                                         RETALLOC(bufp->
2833                                                                  external_to_internal_register,
2834                                                                  bufp->
2835                                                                  external_to_internal_register_size,
2836                                                                  int);
2837                                                         /* debugging */
2838                                                         for (i = old_size;
2839                                                              i <
2840                                                              bufp->
2841                                                              external_to_internal_register_size;
2842                                                              i++)
2843                                                                 bufp->
2844                                                                     external_to_internal_register
2845                                                                     [i] =
2846                                                                     (int)
2847                                                                     0xDEADBEEF;
2848                                                 }
2849
2850                                                 bufp->
2851                                                     external_to_internal_register
2852                                                     [bufp->re_nsub] =
2853                                                     bufp->re_ngroups;
2854                                         }
2855
2856                                         if (COMPILE_STACK_FULL) {
2857                                                 RETALLOC(compile_stack.stack,
2858                                                          compile_stack.
2859                                                          size << 1,
2860                                                          compile_stack_elt_t);
2861                                                 if (compile_stack.stack == NULL)
2862                                                         return REG_ESPACE;
2863
2864                                                 compile_stack.size <<= 1;
2865                                         }
2866
2867                                         /* These are the values to restore when
2868                                            we hit end of this group.  They are
2869                                            all relative offsets, so that if the
2870                                            whole pattern moves because of
2871                                            realloc, they will still be
2872                                            valid.  */
2873                                         COMPILE_STACK_TOP.begalt_offset =
2874                                                 begalt - bufp->buffer;
2875                                         COMPILE_STACK_TOP.fixup_alt_jump =
2876                                                 fixup_alt_jump
2877                                                 ? fixup_alt_jump -
2878                                                 bufp->buffer + 1
2879                                                 : 0;
2880                                         COMPILE_STACK_TOP.laststart_offset =
2881                                                 buf_end - bufp->buffer;
2882                                         COMPILE_STACK_TOP.regnum = r;
2883
2884                                         /* We will eventually replace the 0 with
2885                                            the number of groups inner to this
2886                                            one.  But do not push a start_memory
2887                                            for groups beyond the last one we can
2888                                            represent in the compiled pattern.
2889                                            #### bad bad bad.  this will fail in
2890                                            lots of ways, if we ever have to
2891                                            backtrack for these groups.
2892                                          */
2893                                         if (r <= MAX_REGNUM) {
2894                                                 COMPILE_STACK_TOP.
2895                                                         inner_group_offset =
2896                                                         buf_end - bufp->buffer +
2897                                                         2;
2898                                                 BUF_PUSH_3(start_memory, r, 0);
2899                                         }
2900
2901                                         compile_stack.avail++;
2902
2903                                         fixup_alt_jump = 0;
2904                                         laststart = 0;
2905                                         begalt = buf_end;
2906                                         /* If we've reached MAX_REGNUM groups,
2907                                            then this open won't actually
2908                                            generate any code, so we'll have to
2909                                            clear pending_exact explicitly.  */
2910                                         pending_exact = 0;
2911                                 }
2912                                 break;
2913
2914                         case ')':
2915                                 if (syntax & RE_NO_BK_PARENS)
2916                                         goto normal_backslash;
2917
2918                                 if (COMPILE_STACK_EMPTY) {
2919                                         if (syntax &
2920                                             RE_UNMATCHED_RIGHT_PAREN_ORD) {
2921                                                 goto normal_backslash;
2922                                         } else {
2923                                                 free_stack();
2924                                                 return REG_ERPAREN;
2925                                         }
2926                                 }
2927
2928                               handle_close:
2929                                 if (fixup_alt_jump) {
2930                                         /* Push a dummy failure point at the end
2931                                            of the alternative for a possible
2932                                            future `pop_failure_jump' to pop.
2933                                            See comments at `push_dummy_failure'
2934                                            in `re_match_2'. */
2935                                         BUF_PUSH(push_dummy_failure);
2936
2937                                         /* We allocated space for this jump when
2938                                            we assigned to `fixup_alt_jump', in
2939                                            the `handle_alt' case below.  */
2940                                         STORE_JUMP(jump_past_alt,
2941                                                    fixup_alt_jump, buf_end - 1);
2942                                 }
2943
2944                                 /* See similar code for backslashed left paren
2945                                    above.  */
2946                                 if (COMPILE_STACK_EMPTY) {
2947                                         if (syntax &
2948                                             RE_UNMATCHED_RIGHT_PAREN_ORD) {
2949                                                 goto normal_char;
2950                                         } else {
2951                                                 free_stack();
2952                                                 return REG_EPAREN;
2953                                         }
2954                                 }
2955
2956                                 /* Since we just checked for an empty stack
2957                                    above, this ``can't happen''.  */
2958                                 assert(compile_stack.avail != 0);
2959                                 {
2960                                         /* We don't just want to restore into
2961                                            `regnum', because later groups should
2962                                            continue to be numbered higher, as in
2963                                            `(ab)c(de)' -- the second group is
2964                                            #2.  */
2965                                         regnum_t this_group_regnum;
2966
2967                                         compile_stack.avail--;
2968                                         begalt =
2969                                                 bufp->buffer +
2970                                                 COMPILE_STACK_TOP.begalt_offset;
2971                                         fixup_alt_jump =
2972                                                 COMPILE_STACK_TOP.
2973                                                 fixup_alt_jump
2974                                                 ? bufp->buffer +
2975                                                 COMPILE_STACK_TOP.
2976                                                 fixup_alt_jump - 1
2977                                                 : 0;
2978                                         laststart =
2979                                                 bufp->buffer +
2980                                                 COMPILE_STACK_TOP.
2981                                                 laststart_offset;
2982                                         this_group_regnum =
2983                                                 COMPILE_STACK_TOP.regnum;
2984                                         /* If we've reached MAX_REGNUM groups,
2985                                            then this open won't actually
2986                                            generate any code, so we'll have to
2987                                            clear pending_exact explicitly.  */
2988                                         pending_exact = 0;
2989
2990                                         /* We're at the end of the group, so now
2991                                            we know how many groups were inside
2992                                            this one.  */
2993                                         if (this_group_regnum <= MAX_REGNUM) {
2994                                                 unsigned char *inner_group_loc
2995                                                         =
2996                                                         bufp->buffer +
2997                                                         COMPILE_STACK_TOP.
2998                                                         inner_group_offset;
2999
3000                                                 *inner_group_loc =
3001                                                         regnum -
3002                                                         this_group_regnum;
3003                                                 BUF_PUSH_3(stop_memory,
3004                                                            this_group_regnum,
3005                                                            regnum -
3006                                                            this_group_regnum);
3007                                         }
3008                                 }
3009                                 break;
3010
3011                         case '|':       /* `\|'.  */
3012                                 if (syntax & RE_LIMITED_OPS
3013                                     || syntax & RE_NO_BK_VBAR) {
3014                                         goto normal_backslash;
3015                                 }
3016                         handle_alt:
3017                                 if (syntax & RE_LIMITED_OPS) {
3018                                         goto normal_char;
3019                                 }
3020
3021                                 /* Insert before the previous alternative a jump
3022                                    which jumps to this alternative if the former
3023                                    fails.  */
3024                                 GET_BUFFER_SPACE(3);
3025                                 INSERT_JUMP(on_failure_jump, begalt,
3026                                             buf_end + 6);
3027                                 pending_exact = 0;
3028                                 buf_end += 3;
3029
3030                                 /* The alternative before this one has a jump after it
3031                                    which gets executed if it gets matched.  Adjust that
3032                                    jump so it will jump to this alternative's analogous
3033                                    jump (put in below, which in turn will jump to the next
3034                                    (if any) alternative's such jump, etc.).  The last such
3035                                    jump jumps to the correct final destination.  A picture:
3036                                    _____ _____
3037                                    |   | |   |
3038                                    |   v |   v
3039                                    a | b   | c
3040
3041                                    If we are at `b', then fixup_alt_jump right now points to a
3042                                    three-byte space after `a'.  We'll put in the jump, set
3043                                    fixup_alt_jump to right after `b', and leave behind three
3044                                    bytes which we'll fill in when we get to after `c'.  */
3045
3046                                 if (fixup_alt_jump)
3047                                         STORE_JUMP(jump_past_alt,
3048                                                    fixup_alt_jump, buf_end);
3049
3050                                 /* Mark and leave space for a jump after this alternative,
3051                                    to be filled in later either by next alternative or
3052                                    when know we're at the end of a series of alternatives.  */
3053                                 fixup_alt_jump = buf_end;
3054                                 GET_BUFFER_SPACE(3);
3055                                 buf_end += 3;
3056
3057                                 laststart = 0;
3058                                 begalt = buf_end;
3059                                 break;
3060
3061                         case '{':
3062                                 /* If \{ is a literal.  */
3063                                 if (!(syntax & RE_INTERVALS)
3064                                     /* If we're at `\{' and it's not the open-interval
3065                                        operator.  */
3066                                     || ((syntax & RE_INTERVALS)
3067                                         && (syntax & RE_NO_BK_BRACES))
3068                                     || (p - 2 == pattern && p == pend))
3069                                         goto normal_backslash;
3070
3071                               handle_interval:
3072                                 {
3073                                         /* If got here, then the syntax allows
3074                                            intervals.  */
3075
3076                                         /* At least (most) this many matches
3077                                            must be made.  */
3078                                         int lower_bound = -1, upper_bound = -1;
3079
3080                                         beg_interval = p - 1;
3081
3082                                         if (p == pend) {
3083                                                 if (syntax & RE_NO_BK_BRACES) {
3084                                                         goto unfetch_interval;
3085                                                 } else {
3086                                                         free_stack();
3087                                                         return REG_EBRACE;
3088                                                 }
3089                                         }
3090
3091                                         GET_UNSIGNED_NUMBER(lower_bound);
3092
3093                                         if (c == ',') {
3094                                                 GET_UNSIGNED_NUMBER(
3095                                                         upper_bound);
3096                                                 if (upper_bound < 0) {
3097                                                         upper_bound =
3098                                                                 RE_DUP_MAX;
3099                                                 }
3100                                         } else {
3101                                                 /* Interval such as `{1}' =>
3102                                                    match exactly once. */
3103                                                 upper_bound = lower_bound;
3104                                         }
3105
3106                                         if (lower_bound < 0
3107                                             || upper_bound > RE_DUP_MAX
3108                                             || lower_bound > upper_bound) {
3109                                                 if (syntax & RE_NO_BK_BRACES) {
3110                                                         goto unfetch_interval;
3111                                                 } else {
3112                                                         free_stack();
3113                                                         return REG_BADBR;
3114                                                 }
3115                                         }
3116
3117                                         if (!(syntax & RE_NO_BK_BRACES)) {
3118                                                 if (c != '\\') {
3119                                                         free_stack();
3120                                                         return REG_EBRACE;
3121                                                 }
3122                                                 PATFETCH(c);
3123                                         }
3124
3125                                         if (c != '}') {
3126                                                 if (syntax & RE_NO_BK_BRACES) {
3127                                                         goto unfetch_interval;
3128                                                 } else {
3129                                                         free_stack();
3130                                                         return REG_BADBR;
3131                                                 }
3132                                         }
3133
3134                                         /* We just parsed a valid interval.  */
3135
3136                                         /* If it's invalid to have no preceding
3137                                            re.  */
3138                                         if (!laststart) {
3139                                                 if (syntax &
3140                                                     RE_CONTEXT_INVALID_OPS) {
3141                                                         free_stack();
3142                                                         return REG_BADRPT;
3143                                                 } else if (
3144                                                         syntax &
3145                                                         RE_CONTEXT_INDEP_OPS) {
3146                                                         laststart = buf_end;
3147                                                 } else {
3148                                                         goto unfetch_interval;
3149                                                 }
3150                                         }
3151
3152                                         /* If the upper bound is zero, don't
3153                                            want to succeed at all; jump from
3154                                            `laststart' to `b + 3', which will be
3155                                            the end of the buffer after we insert
3156                                            the jump.  */
3157                                         if (upper_bound == 0) {
3158                                                 GET_BUFFER_SPACE(3);
3159                                                 INSERT_JUMP(jump, laststart,
3160                                                             buf_end + 3);
3161                                                 buf_end += 3;
3162                                         }
3163
3164                                         /* Otherwise, we have a nontrivial interval.  When
3165                                            we're all done, the pattern will look like:
3166                                            set_number_at <jump count> <upper bound>
3167                                            set_number_at <succeed_n count> <lower bound>
3168                                            succeed_n <after jump addr> <succeed_n count>
3169                                            <body of loop>
3170                                            jump_n <succeed_n addr> <jump count>
3171                                            (The upper bound and `jump_n' are omitted if
3172                                            `upper_bound' is 1, though.)  */
3173                                         else {  /* If the upper bound is > 1, we need to insert
3174                                                    more at the end of the loop.  */
3175                                                 Memory_count nbytes =
3176                                                     10 + (upper_bound > 1) * 10;
3177
3178                                                 GET_BUFFER_SPACE(nbytes);
3179
3180                                                 /* Initialize lower bound of the `succeed_n', even
3181                                                    though it will be set during matching by its
3182                                                    attendant `set_number_at' (inserted next),
3183                                                    because `re_compile_fastmap' needs to know.
3184                                                    Jump to the `jump_n' we might insert below.  */
3185                                                 INSERT_JUMP2(succeed_n,
3186                                                              laststart,
3187                                                              buf_end + 5 +
3188                                                              (upper_bound >
3189                                                               1) * 5,
3190                                                              lower_bound);
3191                                                 buf_end += 5;
3192
3193                                                 /* Code to initialize the lower bound.  Insert
3194                                                    before the `succeed_n'.  The `5' is the last two
3195                                                    bytes of this `set_number_at', plus 3 bytes of
3196                                                    the following `succeed_n'.  */
3197                                                 insert_op2(set_number_at,
3198                                                            laststart, 5,
3199                                                            lower_bound,
3200                                                            buf_end);
3201                                                 buf_end += 5;
3202
3203                                                 if (upper_bound > 1) {  /* More than one repetition is allowed, so
3204                                                                            append a backward jump to the `succeed_n'
3205                                                                            that starts this interval.
3206
3207                                                                            When we've reached this during matching,
3208                                                                            we'll have matched the interval once, so
3209                                                                            jump back only `upper_bound - 1' times.  */
3210                                                         STORE_JUMP2(jump_n,
3211                                                                     buf_end,
3212                                                                     laststart +
3213                                                                     5,
3214                                                                     upper_bound
3215                                                                     - 1);
3216                                                         buf_end += 5;
3217
3218                                                         /* The location we want to set is the second
3219                                                            parameter of the `jump_n'; that is `b-2' as
3220                                                            an absolute address.  `laststart' will be
3221                                                            the `set_number_at' we're about to insert;
3222                                                            `laststart+3' the number to set, the source
3223                                                            for the relative address.  But we are
3224                                                            inserting into the middle of the pattern --
3225                                                            so everything is getting moved up by 5.
3226                                                            Conclusion: (b - 2) - (laststart + 3) + 5,
3227                                                            i.e., b - laststart.
3228
3229                                                            We insert this at the beginning of the loop
3230                                                            so that if we fail during matching, we'll
3231                                                            reinitialize the bounds.  */
3232                                                         insert_op2
3233                                                             (set_number_at,
3234                                                              laststart,
3235                                                              buf_end -
3236                                                              laststart,
3237                                                              upper_bound - 1,
3238                                                              buf_end);
3239                                                         buf_end += 5;
3240                                                 }
3241                                         }
3242                                         pending_exact = 0;
3243                                         beg_interval = NULL;
3244                                 }
3245                                 break;
3246
3247                               unfetch_interval:
3248                                 /* If an invalid interval, match the characters as literals.  */
3249                                 assert(beg_interval);
3250                                 p = beg_interval;
3251                                 beg_interval = NULL;
3252
3253                                 /* normal_char and normal_backslash need `c'.  */
3254                                 PATFETCH(c);
3255
3256                                 if (!(syntax & RE_NO_BK_BRACES)) {
3257                                         if (p > pattern && p[-1] == '\\')
3258                                                 goto normal_backslash;
3259                                 }
3260                                 goto normal_char;
3261
3262 #ifdef emacs
3263                                 /* There is no way to specify the before_dot and after_dot
3264                                    operators.  rms says this is ok.  --karl  */
3265                         case '=':
3266                                 BUF_PUSH(at_dot);
3267                                 break;
3268
3269                         case 's':
3270                                 laststart = buf_end;
3271                                 PATFETCH(c);
3272                                 /* XEmacs addition */
3273                                 if (c >= 0x80 || syntax_spec_code[c] == 0377) {
3274                                         free_stack();
3275                                         return REG_ESYNTAX;
3276                                 }
3277                                 BUF_PUSH_2(syntaxspec, syntax_spec_code[c]);
3278                                 break;
3279
3280                         case 'S':
3281                                 laststart = buf_end;
3282                                 PATFETCH(c);
3283                                 /* XEmacs addition */
3284                                 if (c >= 0x80 || syntax_spec_code[c] == 0377) {
3285                                         free_stack();
3286                                         return REG_ESYNTAX;
3287                                 }
3288                                 BUF_PUSH_2(notsyntaxspec, syntax_spec_code[c]);
3289                                 break;
3290
3291 #ifdef MULE
3292 /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */
3293                         case 'c':
3294                                 laststart = buf_end;
3295                                 PATFETCH_RAW(c);
3296                                 if (c < 32 || c > 127) {
3297                                         free_stack();
3298                                         return REG_ECATEGORY;
3299                                 }
3300                                 BUF_PUSH_2(categoryspec, c);
3301                                 break;
3302
3303                         case 'C':
3304                                 laststart = buf_end;
3305                                 PATFETCH_RAW(c);
3306                                 if (c < 32 || c > 127) {
3307                                         free_stack();
3308                                         return REG_ECATEGORY;
3309                                 }
3310                                 BUF_PUSH_2(notcategoryspec, c);
3311                                 break;
3312 /* end of category patch */
3313 #endif                          /* MULE */
3314 #endif                          /* emacs */
3315
3316                         case 'w':
3317                                 laststart = buf_end;
3318                                 BUF_PUSH(wordchar);
3319                                 break;
3320
3321                         case 'W':
3322                                 laststart = buf_end;
3323                                 BUF_PUSH(notwordchar);
3324                                 break;
3325
3326                         case '<':
3327                                 BUF_PUSH(wordbeg);
3328                                 break;
3329
3330                         case '>':
3331                                 BUF_PUSH(wordend);
3332                                 break;
3333
3334                         case 'b':
3335                                 BUF_PUSH(wordbound);
3336                                 break;
3337
3338                         case 'B':
3339                                 BUF_PUSH(notwordbound);
3340                                 break;
3341
3342                         case '`':
3343                                 BUF_PUSH(begbuf);
3344                                 break;
3345
3346                         case '\'':
3347                                 BUF_PUSH(endbuf);
3348                                 break;
3349
3350                         case '1':
3351                         case '2':
3352                         case '3':
3353                         case '4':
3354                         case '5':
3355                         case '6':
3356                         case '7':
3357                         case '8':
3358                         case '9': {
3359                                 int reg;
3360
3361                                 if (syntax & RE_NO_BK_REFS) {
3362                                         goto normal_char;
3363                                 }
3364                                 /* External register indexing. */
3365                                 reg = c - '0';
3366
3367                                 if (reg > bufp->re_nsub) {
3368                                         free_stack();
3369                                         return REG_ESUBREG;
3370                                 }
3371
3372                                 /* Convert external to internal as soon
3373                                    as possible. */
3374                                 reg = bufp->external_to_internal_register[reg];
3375
3376                                 /* Can't back reference to a
3377                                    subexpression if inside it. */
3378                                 if (group_in_compile_stack(
3379                                             compile_stack, reg)) {
3380                                         goto normal_char;
3381                                 }
3382                                 laststart = buf_end;
3383                                 BUF_PUSH_2(duplicate, reg);
3384                         }
3385                                 break;
3386
3387                         case '+':
3388                         case '?':
3389                                 if (syntax & RE_BK_PLUS_QM) {
3390                                         goto handle_plus;
3391                                 } else {
3392                                         goto normal_backslash;
3393                                 }
3394                         default:
3395                         normal_backslash:
3396                                 /* You might think it would be useful for \ to
3397                                    mean not to translate; but if we don't
3398                                    translate it, it will never match
3399                                    anything.  */
3400                                 c = TRANSLATE(c);
3401                                 goto normal_char;
3402                         }
3403                         break;
3404
3405                         /* Expects the character in `c'.  */
3406                         /* `p' points to the location after where `c' came
3407                            from. */
3408                 normal_char:
3409                 default: {
3410                         /* XEmacs: modifications here for Mule. */
3411                         /* `q' points to the beginning of the next char. */
3412                         re_char *q = p;
3413
3414                         /* If no exactn currently being built.  */
3415                         if (!pending_exact
3416                             /* If last exactn not at current position.  */
3417                             || pending_exact + *pending_exact + 1 !=
3418                             buf_end
3419                             /* We have only one byte following the exactn for
3420                                the count. */
3421                             || ((unsigned int)(*pending_exact + (q - p)) >=
3422                                 ((unsigned int)(1 << BYTEWIDTH) - 1))
3423
3424                             /* If followed by a repetition operator.  */
3425                             || *q == '*' || *q == '^'
3426                             || ((syntax & RE_BK_PLUS_QM)
3427                                 ? *q == '\\' && (q[1] == '+'
3428                                                  || q[1] == '?')
3429                                 : (*q == '+' || *q == '?'))
3430                             || ((syntax & RE_INTERVALS)
3431                                 && ((syntax & RE_NO_BK_BRACES)
3432                                     ? *q == '{'
3433                                     : (q[0] == '\\' && q[1] == '{')))) {
3434                                 /* Start building a new exactn.  */
3435
3436                                 laststart = buf_end;
3437
3438                                 BUF_PUSH_2(exactn, 0);
3439                                 pending_exact = buf_end - 1;
3440                         }
3441 #ifndef MULE
3442                         BUF_PUSH(c);
3443                         (*pending_exact)++;
3444 #else
3445                         {
3446                                 Bytecount bt_count;
3447                                 Bufbyte tmp_buf[MAX_EMCHAR_LEN];
3448                                 int i;
3449
3450                                 bt_count =
3451                                         set_charptr_emchar(tmp_buf, c);
3452
3453                                 for (i = 0; i < bt_count; i++) {
3454                                         BUF_PUSH(tmp_buf[i]);
3455                                         (*pending_exact)++;
3456                                 }
3457                         }
3458 #endif
3459                         break;
3460                 }
3461                 }               /* switch (c) */
3462         }                       /* while p != pend */
3463
3464         /* Through the pattern now.  */
3465
3466         if (fixup_alt_jump) {
3467                 STORE_JUMP(jump_past_alt, fixup_alt_jump, buf_end);
3468         }
3469         if (!COMPILE_STACK_EMPTY) {
3470                 free_stack();
3471                 return REG_EPAREN;
3472         }
3473         /* If we don't want backtracking, force success
3474            the first time we reach the end of the compiled pattern.  */
3475         if (syntax & RE_NO_POSIX_BACKTRACKING) {
3476                 BUF_PUSH(succeed);
3477         }
3478         xfree(compile_stack.stack);
3479
3480         /* We have succeeded; set the length of the buffer.  */
3481         bufp->used = buf_end - bufp->buffer;
3482
3483 #ifdef REGEX_DEBUG_FLAG
3484         if (debug) {
3485                 DEBUG_PRINT1("\nCompiled pattern: \n");
3486                 print_compiled_pattern(bufp);
3487         }
3488 #endif                          /* DEBUG */
3489
3490 #ifndef MATCH_MAY_ALLOCATE
3491         /* Initialize the failure stack to the largest possible stack.  This
3492            isn't necessary unless we're trying to avoid calling alloca in
3493            the search and match routines.  */
3494         {
3495                 int num_regs = bufp->re_ngroups + 1;
3496
3497                 /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
3498                    is strictly greater than re_max_failures, the largest possible stack
3499                    is 2 * re_max_failures failure points.  */
3500                 if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) {
3501                         fail_stack.size =
3502                             (2 * re_max_failures * MAX_FAILURE_ITEMS);
3503
3504                         if (!fail_stack.stack) {
3505                                 fail_stack.stack =
3506                                         (fail_stack_elt_t *)
3507                                         xmalloc_atomic(
3508                                                 fail_stack.size *
3509                                                 sizeof(fail_stack_elt_t));
3510                         } else {
3511                                 fail_stack.stack =
3512                                         (fail_stack_elt_t *)
3513                                         xrealloc(fail_stack.stack,
3514                                                  (fail_stack.size *
3515                                                   sizeof(fail_stack_elt_t)));
3516                         }
3517                 }
3518
3519                 regex_grow_registers(num_regs);
3520         }
3521 #endif  /* not MATCH_MAY_ALLOCATE */
3522
3523         return REG_NOERROR;
3524 }
3525 /* regex_compile */
3526 \f
3527 /* Subroutines for `regex_compile'.  */
3528
3529 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
3530
3531 static void store_op1(re_opcode_t op, unsigned char *loc, int arg)
3532 {
3533         *loc = (unsigned char)op;
3534         STORE_NUMBER(loc + 1, arg);
3535 }
3536
3537 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
3538
3539 static void store_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2)
3540 {
3541         *loc = (unsigned char)op;
3542         STORE_NUMBER(loc + 1, arg1);
3543         STORE_NUMBER(loc + 3, arg2);
3544 }
3545
3546 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
3547    for OP followed by two-byte integer parameter ARG.  */
3548
3549 static void
3550 insert_op1(re_opcode_t op, unsigned char *loc, int arg, unsigned char *end)
3551 {
3552         REGISTER unsigned char *pfrom = end;
3553         REGISTER unsigned char *pto = end + 3;
3554
3555         while (pfrom != loc)
3556                 *--pto = *--pfrom;
3557
3558         store_op1(op, loc, arg);
3559 }
3560
3561 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
3562
3563 static void
3564 insert_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2,
3565            unsigned char *end)
3566 {
3567         REGISTER unsigned char *pfrom = end;
3568         REGISTER unsigned char *pto = end + 5;
3569
3570         while (pfrom != loc)
3571                 *--pto = *--pfrom;
3572
3573         store_op2(op, loc, arg1, arg2);
3574 }
3575
3576 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
3577    after an alternative or a begin-subexpression.  We assume there is at
3578    least one character before the ^.  */
3579
3580 static re_bool
3581 at_begline_loc_p(re_char *pattern, re_char *p, reg_syntax_t syntax)
3582 {
3583         re_char *prev = p - 2;
3584         re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3585
3586         return
3587             /* After a subexpression?  */
3588             (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3589             /* After an alternative?  */
3590             || (*prev == '|'
3591                 && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
3592 }
3593
3594 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
3595    at least one character after the $, i.e., `P < PEND'.  */
3596
3597 static re_bool
3598 at_endline_loc_p(re_char *p, re_char *pend, int syntax)
3599 {
3600         re_char *next = p;
3601         re_bool next_backslash = *next == '\\';
3602         re_char *next_next = p + 1 < pend ? p + 1 : 0;
3603
3604         return
3605             /* Before a subexpression?  */
3606             (syntax & RE_NO_BK_PARENS ? *next == ')'
3607              : next_backslash && next_next && *next_next == ')')
3608             /* Before an alternative?  */
3609             || (syntax & RE_NO_BK_VBAR ? *next == '|'
3610                 : next_backslash && next_next && *next_next == '|');
3611 }
3612
3613 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3614    false if it's not.  */
3615
3616 static re_bool
3617 group_in_compile_stack(compile_stack_type compile_stack, regnum_t regnum)
3618 {
3619         int this_element;
3620
3621         for (this_element = compile_stack.avail - 1;
3622              this_element >= 0; this_element--)
3623                 if (compile_stack.stack[this_element].regnum == regnum) {
3624                         return true;
3625                 }
3626         return false;
3627 }
3628
3629 /* Read the ending character of a range (in a bracket expression) from the
3630    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
3631    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
3632    Then we set the translation of all bits between the starting and
3633    ending characters (inclusive) in the compiled pattern B.
3634
3635    Return an error code.
3636
3637    We use these short variable names so we can use the same macros as
3638    `regex_compile' itself.  */
3639
3640 static reg_errcode_t
3641 compile_range(re_char ** p_ptr, re_char * pend, RE_TRANSLATE_TYPE translate,
3642               reg_syntax_t syntax, unsigned char *buf_end)
3643 {
3644         Element_count this_char;
3645
3646         re_char *p = *p_ptr;
3647         int range_start, range_end;
3648
3649         if (p == pend)
3650                 return REG_ERANGE;
3651
3652         /* Even though the pattern is a signed `char *', we need to fetch
3653            with unsigned char *'s; if the high bit of the pattern character
3654            is set, the range endpoints will be negative if we fetch using a
3655            signed char *.
3656
3657            We also want to fetch the endpoints without translating them; the
3658            appropriate translation is done in the bit-setting loop below.  */
3659         /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
3660         range_start = ((const unsigned char *)p)[-2];
3661         range_end = ((const unsigned char *)p)[0];
3662
3663         /* Have to increment the pointer into the pattern string, so the
3664            caller isn't still at the ending character.  */
3665         (*p_ptr)++;
3666
3667         /* If the start is after the end, the range is empty.  */
3668         if (range_start > range_end)
3669                 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3670
3671         /* Here we see why `this_char' has to be larger than an `unsigned
3672            char' -- the range is inclusive, so if `range_end' == 0xff
3673            (assuming 8-bit characters), we would otherwise go into an infinite
3674            loop, since all characters <= 0xff.  */
3675         for (this_char = range_start; this_char <= range_end; this_char++) {
3676                 SET_LIST_BIT(TRANSLATE(this_char));
3677         }
3678
3679         return REG_NOERROR;
3680 }
3681
3682 #ifdef MULE
3683
3684 static reg_errcode_t
3685 compile_extended_range(re_char ** p_ptr, re_char * pend,
3686                        RE_TRANSLATE_TYPE translate,
3687                        reg_syntax_t syntax, Lisp_Object rtab)
3688 {
3689         Emchar this_char, range_start, range_end;
3690         const Bufbyte *p;
3691
3692         if (*p_ptr == pend)
3693                 return REG_ERANGE;
3694
3695         p = (const Bufbyte *)*p_ptr;
3696         range_end = charptr_emchar(p);
3697         p--;                    /* back to '-' */
3698         DEC_CHARPTR(p);         /* back to start of range */
3699         /* We also want to fetch the endpoints without translating them; the
3700            appropriate translation is done in the bit-setting loop below.  */
3701         range_start = charptr_emchar(p);
3702         INC_CHARPTR(*p_ptr);
3703
3704         /* If the start is after the end, the range is empty.  */
3705         if (range_start > range_end)
3706                 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3707
3708         /* Can't have ranges spanning different charsets, except maybe for
3709            ranges entirely within the first 256 chars. */
3710
3711         if ((range_start >= 0x100 || range_end >= 0x100)
3712             && CHAR_LEADING_BYTE(range_start) != CHAR_LEADING_BYTE(range_end))
3713                 return REG_ERANGESPAN;
3714
3715         /* As advertised, translations only work over the 0 - 0x7F range.
3716            Making this kind of stuff work generally is much harder.
3717            Iterating over the whole range like this would be way efficient
3718            if the range encompasses 10,000 chars or something.  You'd have
3719            to do something like this:
3720
3721            range_table a;
3722            range_table b;
3723            map over translation table in [range_start, range_end] of
3724            (put the mapped range in a;
3725            put the translation in b)
3726            invert the range in a and truncate to [range_start, range_end]
3727            compute the union of a, b
3728            union the result into rtab
3729          */
3730         for (this_char = range_start;
3731              this_char <= range_end && this_char < 0x80; this_char++) {
3732                 SET_RANGETAB_BIT(TRANSLATE(this_char));
3733         }
3734
3735         if (this_char <= range_end)
3736                 put_range_table(rtab, this_char, range_end, Qt);
3737
3738         return REG_NOERROR;
3739 }
3740
3741 #endif                          /* MULE */
3742 \f
3743 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3744    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
3745    characters can start a string that matches the pattern.  This fastmap
3746    is used by re_search to skip quickly over impossible starting points.
3747
3748    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3749    area as BUFP->fastmap.
3750
3751    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3752    the pattern buffer.
3753
3754    Returns 0 if we succeed, -2 if an internal error.   */
3755
3756 int re_compile_fastmap(struct re_pattern_buffer *bufp)
3757 {
3758         int j, k;
3759 #ifdef MATCH_MAY_ALLOCATE
3760         fail_stack_type fail_stack;
3761 #endif
3762         DECLARE_DESTINATION;
3763         /* We don't push any register information onto the failure stack.  */
3764
3765         REGISTER char *fastmap = bufp->fastmap;
3766         unsigned char *pattern = bufp->buffer;
3767         unsigned long size = bufp->used;
3768         /* actually p supposed to carry the const qualifier, however, some
3769            silly mule code below CHANGES p and hence we cant go with the const
3770            qualifier here, leaving an `unfixable' warning on the way */
3771 #ifdef MULE
3772         unsigned char *p = pattern;
3773 #else
3774         re_char *p = pattern;
3775 #endif
3776         REGISTER unsigned char *pend = pattern + size;
3777
3778 #ifdef REL_ALLOC
3779         /* This holds the pointer to the failure stack, when
3780            it is allocated relocatably.  */
3781         fail_stack_elt_t *failure_stack_ptr;
3782 #endif
3783
3784         /* Assume that each path through the pattern can be null until
3785            proven otherwise.  We set this false at the bottom of switch
3786            statement, to which we get only if a particular path doesn't
3787            match the empty string.  */
3788         re_bool path_can_be_null = true;
3789
3790         /* We aren't doing a `succeed_n' to begin with.  */
3791         re_bool succeed_n_p = false;
3792
3793         assert(fastmap != NULL && p != NULL);
3794
3795         INIT_FAIL_STACK();
3796         memset(fastmap, 0, 1 << BYTEWIDTH);     /* Assume nothing's valid.  */
3797         bufp->fastmap_accurate = 1;     /* It will be when we're done.  */
3798         bufp->can_be_null = 0;
3799
3800         while (1) {
3801                 if (p == pend || *p == succeed) {
3802                         /* We have reached the (effective) end of pattern.  */
3803                         if (!FAIL_STACK_EMPTY()) {
3804                                 bufp->can_be_null |= path_can_be_null;
3805
3806                                 /* Reset for next path.  */
3807                                 path_can_be_null = true;
3808
3809                                 /* fuck, p isnt const thanks to that
3810                                  * unified range table function below */
3811 #ifdef MULE
3812                                 p = (unsigned char*)fail_stack.
3813                                     stack[--fail_stack.avail].pointer;
3814 #else
3815                                 p = fail_stack.stack[--fail_stack.avail]
3816                                         .pointer;
3817 #endif
3818
3819                                 continue;
3820                         } else {
3821                                 break;
3822                         }
3823                 }
3824
3825                 /* We should never be about to go beyond the end of the pattern.  */
3826                 assert(p < pend);
3827
3828                 switch (SWITCH_ENUM_CAST((re_opcode_t) * p++)) {
3829
3830                         /* I guess the idea here is to simply not bother with a
3831                            fastmap if a backreference is used, since it's too
3832                            hard to figure out the fastmap for the corresponding
3833                            group.  Setting `can_be_null' stops `re_search_2'
3834                            from using the fastmap, so that is all we do.  */
3835                 case duplicate:
3836                         bufp->can_be_null = 1;
3837                         goto done;
3838
3839                         /* Following are the cases which match a character.
3840                            These end with `break'.  */
3841
3842                 case exactn:
3843                         fastmap[p[1]] = 1;
3844                         break;
3845
3846                 case charset:
3847                         /* XEmacs: Under Mule, these bit vectors will
3848                            only contain values for characters below 0x80. */
3849                         for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3850                                 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3851                                         fastmap[j] = 1;
3852                         break;
3853
3854                 case charset_not:
3855                         /* Chars beyond end of map must be allowed.  */
3856 #ifdef MULE
3857                         for (j = *p * BYTEWIDTH; j < 0x80; j++)
3858                                 fastmap[j] = 1;
3859                         /* And all extended characters must be allowed, too. */
3860                         for (j = 0x80; j < 0xA0; j++)
3861                                 fastmap[j] = 1;
3862 #else                           /* not MULE */
3863                         for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3864                                 fastmap[j] = 1;
3865 #endif                          /* MULE */
3866
3867                         for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3868                                 if (!
3869                                     (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3870                                         fastmap[j] = 1;
3871                         break;
3872
3873 #ifdef MULE
3874                 case charset_mule: {
3875                         int nentries;
3876                         int i;
3877
3878                         nentries = unified_range_table_nentries(p);
3879                         for (i = 0; i < nentries; i++) {
3880                                 EMACS_INT first, last;
3881                                 Lisp_Object dummy_val;
3882                                 int jj;
3883                                 Bufbyte strr[MAX_EMCHAR_LEN];
3884
3885                                 unified_range_table_get_range(p, i,
3886                                                               &first,
3887                                                               &last,
3888                                                               &dummy_val);
3889                                 for (jj = first;
3890                                      jj <= last && jj < 0x80; jj++)
3891                                         fastmap[jj] = 1;
3892                                 /* Ranges below 0x100 can span charsets, but there
3893                                    are only two (Control-1 and Latin-1), and
3894                                    either first or last has to be in them. */
3895                                 set_charptr_emchar(strr, first);
3896                                 fastmap[*strr] = 1;
3897                                 if (last < 0x100) {
3898                                         set_charptr_emchar(strr, last);
3899                                         fastmap[*strr] = 1;
3900                                 }
3901                         }
3902                 }
3903                         break;
3904
3905                 case charset_mule_not: {
3906                         int nentries;
3907                         int i;
3908
3909                         nentries = unified_range_table_nentries(p);
3910                         for (i = 0; i < nentries; i++) {
3911                                 EMACS_INT first, last;
3912                                 Lisp_Object dummy_val;
3913                                 int jj;
3914                                 int smallest_prev = 0;
3915
3916                                 unified_range_table_get_range(p, i,
3917                                                               &first,
3918                                                               &last,
3919                                                               &dummy_val);
3920                                 for (jj = smallest_prev;
3921                                      jj < first && jj < 0x80; jj++)
3922                                         fastmap[jj] = 1;
3923                                 smallest_prev = last + 1;
3924                                 if (smallest_prev >= 0x80)
3925                                         break;
3926                         }
3927                         /* Calculating which leading bytes are actually allowed
3928                            here is rather difficult, so we just punt and allow
3929                            all of them. */
3930                         for (i = 0x80; i < 0xA0; i++)
3931                                 fastmap[i] = 1;
3932                 }
3933                         break;
3934 #endif                          /* MULE */
3935
3936                 case wordchar:
3937 #ifdef emacs
3938                         k = (int)Sword;
3939                         goto matchsyntax;
3940 #else
3941                         for (j = 0; j < (1 << BYTEWIDTH); j++)
3942                                 if (SYNTAX_UNSAFE
3943                                     (XCHAR_TABLE
3944                                      (regex_emacs_buffer->mirror_syntax_table),
3945                                      j) == Sword)
3946                                         fastmap[j] = 1;
3947                         break;
3948 #endif
3949
3950                 case notwordchar:
3951 #ifdef emacs
3952                         k = (int)Sword;
3953                         goto matchnotsyntax;
3954 #else
3955                         for (j = 0; j < (1 << BYTEWIDTH); j++)
3956                                 if (SYNTAX_UNSAFE
3957                                     (XCHAR_TABLE
3958                                      (regex_emacs_buffer->mirror_syntax_table),
3959                                      j) != Sword)
3960                                         fastmap[j] = 1;
3961                         break;
3962 #endif
3963
3964                 case anychar: {
3965                         int fastmap_newline = fastmap['\n'];
3966
3967                         /* `.' matches anything ...  */
3968 #ifdef MULE
3969                         /* "anything" only includes bytes that can be the
3970                            first byte of a character. */
3971                         for (j = 0; j < 0xA0; j++)
3972                                 fastmap[j] = 1;
3973 #else
3974                         for (j = 0; j < (1 << BYTEWIDTH); j++)
3975                                 fastmap[j] = 1;
3976 #endif
3977
3978                         /* ... except perhaps newline.  */
3979                         if (!(bufp->syntax & RE_DOT_NEWLINE))
3980                                 fastmap['\n'] = fastmap_newline;
3981
3982                         /* Return if we have already set `can_be_null'; if we
3983                            have, then the fastmap is irrelevant.  Something's
3984                            wrong here.  */
3985                         else if (bufp->can_be_null)
3986                                 goto done;
3987
3988                         /* Otherwise, have to check alternative paths.  */
3989                         break;
3990                 }
3991
3992 #ifdef emacs
3993                 case wordbound:
3994                 case notwordbound:
3995                 case wordbeg:
3996                 case wordend:
3997                 case notsyntaxspec:
3998                 case syntaxspec:
3999                         /* This match depends on text properties.  These end with
4000                            aborting optimizations.  */
4001                         bufp->can_be_null = 1;
4002                         goto done;
4003
4004 #ifdef emacs
4005                 matchsyntax:
4006 #ifdef MULE
4007                         for (j = 0; j < 0x80; j++)
4008                                 if (SYNTAX_UNSAFE
4009                                     (XCHAR_TABLE
4010                                      (regex_emacs_buffer->mirror_syntax_table),
4011                                      j) == (enum syntaxcode)k)
4012                                         fastmap[j] = 1;
4013                         for (j = 0x80; j < 0xA0; j++) {
4014                                 if (LEADING_BYTE_PREFIX_P(j))
4015                                         /* too complicated to calculate this right */
4016                                         fastmap[j] = 1;
4017                                 else {
4018                                         int multi_p;
4019                                         Lisp_Object cset;
4020
4021                                         cset = CHARSET_BY_LEADING_BYTE(j);
4022                                         if (CHARSETP(cset)) {
4023                                                 if (charset_syntax
4024                                                     (regex_emacs_buffer, cset,
4025                                                      &multi_p)
4026                                                     == Sword || multi_p)
4027                                                         fastmap[j] = 1;
4028                                         }
4029                                 }
4030                         }
4031 #else                           /* not MULE */
4032                         for (j = 0; j < (1 << BYTEWIDTH); j++)
4033                                 if (SYNTAX_UNSAFE
4034                                     (XCHAR_TABLE
4035                                      (regex_emacs_buffer->mirror_syntax_table),
4036                                      j) == (enum syntaxcode)k)
4037                                         fastmap[j] = 1;
4038 #endif                          /* MULE */
4039                         break;
4040
4041                 matchnotsyntax:
4042 #ifdef MULE
4043                         for (j = 0; j < 0x80; j++)
4044                                 if (SYNTAX_UNSAFE
4045                                     (XCHAR_TABLE
4046                                      (regex_emacs_buffer->mirror_syntax_table),
4047                                      j) != (enum syntaxcode)k)
4048                                         fastmap[j] = 1;
4049                         for (j = 0x80; j < 0xA0; j++) {
4050                                 if (LEADING_BYTE_PREFIX_P(j))
4051                                         /* too complicated to calculate this right */
4052                                         fastmap[j] = 1;
4053                                 else {
4054                                         int multi_p;
4055                                         Lisp_Object cset;
4056
4057                                         cset = CHARSET_BY_LEADING_BYTE(j);
4058                                         if (CHARSETP(cset)) {
4059                                                 if (charset_syntax
4060                                                     (regex_emacs_buffer, cset,
4061                                                      &multi_p)
4062                                                     != Sword || multi_p)
4063                                                         fastmap[j] = 1;
4064                                         }
4065                                 }
4066                         }
4067 #else                           /* not MULE */
4068                         for (j = 0; j < (1 << BYTEWIDTH); j++)
4069                                 if (SYNTAX_UNSAFE
4070                                     (XCHAR_TABLE
4071                                      (regex_emacs_buffer->mirror_syntax_table),
4072                                      j) != (enum syntaxcode)k)
4073                                         fastmap[j] = 1;
4074 #endif                          /* MULE */
4075                         break;
4076 #endif                          /* emacs */
4077
4078 #ifdef MULE
4079 /* 97/2/17 jhod category patch */
4080                 case categoryspec:
4081                 case notcategoryspec:
4082                         bufp->can_be_null = 1;
4083                         return 0;
4084 /* end if category patch */
4085 #endif                          /* MULE */
4086
4087                         /* All cases after this match the empty string.  These
4088                            end with `continue'.  */
4089
4090                 case before_dot:
4091                 case at_dot:
4092                 case after_dot:
4093                         continue;
4094 #endif                          /* emacs */
4095
4096                 case no_op:
4097                 case begline:
4098                 case endline:
4099                 case begbuf:
4100                 case endbuf:
4101 #ifndef emacs
4102                 case wordbound:
4103                 case notwordbound:
4104                 case wordbeg:
4105                 case wordend:
4106 #endif
4107                 case push_dummy_failure:
4108                         continue;
4109
4110                 case jump_n:
4111                 case pop_failure_jump:
4112                 case maybe_pop_jump:
4113                 case jump:
4114                 case jump_past_alt:
4115                 case dummy_failure_jump:
4116                         EXTRACT_NUMBER_AND_INCR(j, p);
4117                         p += j;
4118                         if (j > 0)
4119                                 continue;
4120
4121                         /* Jump backward implies we just went through the body
4122                            of a loop and matched nothing.  Opcode jumped to
4123                            should be `on_failure_jump' or `succeed_n'.  Just
4124                            treat it like an ordinary jump.  For a * loop, it has
4125                            pushed its failure point already; if so, discard that
4126                            as redundant.  */
4127                         if ((re_opcode_t) * p != on_failure_jump
4128                             && (re_opcode_t) * p != succeed_n)
4129                                 continue;
4130
4131                         p++;
4132                         EXTRACT_NUMBER_AND_INCR(j, p);
4133                         p += j;
4134
4135                         /* If what's on the stack is where we are now, pop
4136                            it.  */
4137                         if (!FAIL_STACK_EMPTY()
4138                             && fail_stack.stack[fail_stack.avail - 1].pointer ==
4139                             p)
4140                                 fail_stack.avail--;
4141
4142                         continue;
4143
4144                 case on_failure_jump:
4145                 case on_failure_keep_string_jump:
4146                       handle_on_failure_jump:
4147                         EXTRACT_NUMBER_AND_INCR(j, p);
4148
4149                         /* For some patterns, e.g., `(a?)?', `p+j' here points
4150                            to the end of the pattern.  We don't want to push
4151                            such a point, since when we restore it above,
4152                            entering the switch will increment `p' past the end
4153                            of the pattern.  We don't need to push such a point
4154                            since we obviously won't find any more fastmap
4155                            entries beyond `pend'.  Such a pattern can match the
4156                            null string, though.  */
4157                         if (p + j < pend) {
4158                                 if (!PUSH_PATTERN_OP(p + j, fail_stack)) {
4159                                         RESET_FAIL_STACK();
4160                                         return -2;
4161                                 }
4162                         } else
4163                                 bufp->can_be_null = 1;
4164
4165                         if (succeed_n_p) {
4166                                 EXTRACT_NUMBER_AND_INCR(k, p);  /* Skip the n.  */
4167                                 succeed_n_p = false;
4168                         }
4169
4170                         continue;
4171
4172                 case succeed_n:
4173                         /* Get to the number of times to succeed.  */
4174                         p += 2;
4175
4176                         /* Increment p past the n for when k != 0.  */
4177                         EXTRACT_NUMBER_AND_INCR(k, p);
4178                         if (k == 0) {
4179                                 p -= 4;
4180                                 succeed_n_p = true;     /* Spaghetti code alert.  */
4181                                 goto handle_on_failure_jump;
4182                         }
4183                         continue;
4184
4185                 case set_number_at:
4186                         p += 4;
4187                         continue;
4188
4189                 case start_memory:
4190                 case stop_memory:
4191                         p += 2;
4192                         continue;
4193
4194                 case succeed:
4195                 default:
4196                         abort();        /* We have listed all the cases.  */
4197                 }               /* switch *p++ */
4198
4199                 /* Getting here means we have found the possible starting
4200                    characters for one path of the pattern -- and that the empty
4201                    string does not match.  We need not follow this path further.
4202                    Instead, look at the next alternative (remembered on the
4203                    stack), or quit if no more.  The test at the top of the loop
4204                    does these things.  */
4205                 path_can_be_null = false;
4206                 p = pend;
4207         }                       /* while p */
4208
4209         /* Set `can_be_null' for the last path (also the first path, if the
4210            pattern is empty).  */
4211         bufp->can_be_null |= path_can_be_null;
4212
4213 done:
4214         RESET_FAIL_STACK();
4215         return 0;
4216 }       /* re_compile_fastmap */
4217 \f
4218 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
4219    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
4220    this memory for recording register information.  STARTS and ENDS
4221    must be allocated using the malloc library routine, and must each
4222    be at least NUM_REGS * sizeof (regoff_t) bytes long.
4223
4224    If NUM_REGS == 0, then subsequent matches should allocate their own
4225    register data.
4226
4227    Unless this function is called, the first search or match using
4228    PATTERN_BUFFER will allocate its own register data, without
4229    freeing the old data.  */
4230
4231 void
4232 re_set_registers(struct re_pattern_buffer *bufp, struct re_registers *regs,
4233                  unsigned num_regs, regoff_t * starts, regoff_t * ends)
4234 {
4235         if (num_regs) {
4236                 bufp->regs_allocated = REGS_REALLOCATE;
4237                 regs->num_regs = num_regs;
4238                 regs->start = starts;
4239                 regs->end = ends;
4240         } else {
4241                 bufp->regs_allocated = REGS_UNALLOCATED;
4242                 regs->num_regs = 0;
4243                 regs->start = regs->end = (regoff_t *) 0;
4244         }
4245 }
4246 \f
4247 /* Searching routines.  */
4248
4249 /* Like re_search_2, below, but only one string is specified, and
4250    doesn't let you say where to stop matching. */
4251
4252 int
4253 re_search(struct re_pattern_buffer *bufp, const char *string, int size,
4254           int startpos, int range, struct re_registers *regs)
4255 {
4256         return re_search_2(bufp, NULL, 0, string, size, startpos, range,
4257                            regs, size);
4258 }
4259
4260 #ifndef emacs
4261 /* Snarfed from src/lisp.h, needed for compiling [ce]tags. */
4262 # define bytecount_to_charcount(ptr, len) (len)
4263 # define charcount_to_bytecount(ptr, len) (len)
4264 typedef int Charcount;
4265 #endif
4266
4267 /* Using the compiled pattern in BUFP->buffer, first tries to match the
4268    virtual concatenation of STRING1 and STRING2, starting first at index
4269    STARTPOS, then at STARTPOS + 1, and so on.
4270
4271    With MULE, STARTPOS is a byte position, not a char position.  And the
4272    search will increment STARTPOS by the width of the current leading
4273    character.
4274
4275    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
4276
4277    RANGE is how far to scan while trying to match.  RANGE = 0 means try
4278    only at STARTPOS; in general, the last start tried is STARTPOS +
4279    RANGE.
4280
4281    With MULE, RANGE is a byte position, not a char position.  The last
4282    start tried is the character starting <= STARTPOS + RANGE.
4283
4284    In REGS, return the indices of the virtual concatenation of STRING1
4285    and STRING2 that matched the entire BUFP->buffer and its contained
4286    subexpressions.
4287
4288    Do not consider matching one past the index STOP in the virtual
4289    concatenation of STRING1 and STRING2.
4290
4291    We return either the position in the strings at which the match was
4292    found, -1 if no match, or -2 if error (such as failure
4293    stack overflow).  */
4294
4295 int
4296 re_search_2(struct re_pattern_buffer *bufp, const char *str1,
4297             int size1, const char *str2, int size2, int startpos,
4298             int range, struct re_registers *regs, int stop)
4299 {
4300         int val;
4301         re_char *string1 = (re_char *) str1;
4302         re_char *string2 = (re_char *) str2;
4303         REGISTER char *fastmap = bufp->fastmap;
4304         REGISTER RE_TRANSLATE_TYPE translate = bufp->translate;
4305         int total_size = size1 + size2;
4306         int endpos = startpos + range;
4307 #ifdef REGEX_BEGLINE_CHECK
4308         int anchored_at_begline = 0;
4309 #endif
4310         re_char *d;
4311         Charcount d_size;
4312
4313         /* Check for out-of-range STARTPOS.  */
4314         if (startpos < 0 || startpos > total_size)
4315                 return -1;
4316
4317         /* Fix up RANGE if it might eventually take us outside
4318            the virtual concatenation of STRING1 and STRING2.  */
4319         if (endpos < 0)
4320                 range = 0 - startpos;
4321         else if (endpos > total_size)
4322                 range = total_size - startpos;
4323
4324         /* If the search isn't to be a backwards one, don't waste time in a
4325            search for a pattern that must be anchored.  */
4326         if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf
4327             && range > 0) {
4328                 if (startpos > 0)
4329                         return -1;
4330                 else {
4331                         d = ((const unsigned char *)
4332                              (startpos >=
4333                               size1 ? string2 - size1 : string1) + startpos);
4334                         range = charcount_to_bytecount(d, 1);
4335                 }
4336         }
4337 #ifdef emacs
4338         /* In a forward search for something that starts with \=.
4339            don't keep searching past point.  */
4340         if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot
4341             && range > 0) {
4342                 range =
4343                     BUF_PT(regex_emacs_buffer) - BUF_BEGV(regex_emacs_buffer)
4344                     - startpos;
4345                 if (range < 0)
4346                         return -1;
4347         }
4348 #endif                          /* emacs */
4349
4350         /* Update the fastmap now if not correct already.  */
4351         if (fastmap && !bufp->fastmap_accurate)
4352                 if (re_compile_fastmap(bufp) == -2)
4353                         return -2;
4354
4355 #ifdef REGEX_BEGLINE_CHECK
4356         {
4357                 unsigned long i = 0;
4358
4359                 while (i < bufp->used) {
4360                         if (bufp->buffer[i] == start_memory ||
4361                             bufp->buffer[i] == stop_memory)
4362                                 i += 2;
4363                         else
4364                                 break;
4365                 }
4366                 anchored_at_begline = i < bufp->used
4367                     && bufp->buffer[i] == begline;
4368         }
4369 #endif
4370
4371 #ifdef emacs
4372         SETUP_SYNTAX_CACHE_FOR_OBJECT(regex_match_object,
4373                                       regex_emacs_buffer,
4374                                       SYNTAX_CACHE_OBJECT_BYTE_TO_CHAR
4375                                       (regex_match_object, regex_emacs_buffer,
4376                                        startpos), 1);
4377 #endif
4378
4379         /* Loop through the string, looking for a place to start matching.  */
4380         for (;;) {
4381 #ifdef REGEX_BEGLINE_CHECK
4382                 /* If the regex is anchored at the beginning of a line (i.e. with a ^),
4383                    then we can speed things up by skipping to the next beginning-of-
4384                    line. */
4385                 if (anchored_at_begline && startpos > 0 && startpos != size1 &&
4386                     range > 0) {
4387                         /* whose stupid idea was it anyway to make this
4388                            function take two strings to match?? */
4389                         int lim = 0;
4390                         int irange = range;
4391
4392                         if (startpos < size1 && startpos + range >= size1)
4393                                 lim = range - (size1 - startpos);
4394
4395                         d = ((const unsigned char *)
4396                              (startpos >=
4397                               size1 ? string2 - size1 : string1) + startpos);
4398                         DEC_CHARPTR(d); /* Ok, since startpos != size1. */
4399                         d_size = charcount_to_bytecount(d, 1);
4400
4401                         if (TRANSLATE_P(translate))
4402                                 while (range > lim && *d != '\n') {
4403                                         d += d_size;    /* Speedier INC_CHARPTR(d) */
4404                                         d_size = charcount_to_bytecount(d, 1);
4405                                         range -= d_size;
4406                         } else
4407                                 while (range > lim && *d != '\n') {
4408                                         d += d_size;    /* Speedier INC_CHARPTR(d) */
4409                                         d_size = charcount_to_bytecount(d, 1);
4410                                         range -= d_size;
4411                                 }
4412
4413                         startpos += irange - range;
4414                 }
4415 #endif                          /* REGEX_BEGLINE_CHECK */
4416
4417                 /* If a fastmap is supplied, skip quickly over characters that
4418                    cannot be the start of a match.  If the pattern can match the
4419                    null string, however, we don't need to skip characters; we want
4420                    the first null string.  */
4421                 if (fastmap && startpos < total_size && !bufp->can_be_null) {
4422                         if (range > 0) {        /* Searching forwards.  */
4423                                 int lim = 0;
4424                                 int irange = range;
4425
4426                                 if (startpos < size1
4427                                     && startpos + range >= size1)
4428                                         lim = range - (size1 - startpos);
4429
4430                                 d = ((const unsigned char *)
4431                                      (startpos >=
4432                                       size1 ? string2 - size1 : string1) +
4433                                      startpos);
4434
4435                                 /* Written out as an if-else to avoid testing `translate'
4436                                    inside the loop.  */
4437                                 if (TRANSLATE_P(translate))
4438                                         while (range > lim) {
4439 #ifdef MULE
4440                                                 Emchar buf_ch;
4441                                                 Bufbyte str[MAX_EMCHAR_LEN];
4442
4443                                                 buf_ch = charptr_emchar(d);
4444                                                 buf_ch = RE_TRANSLATE(buf_ch);
4445                                                 set_charptr_emchar(str, buf_ch);
4446                                                 if (buf_ch >= 0200
4447                                                     || fastmap[(unsigned char)
4448                                                                *str])
4449                                                         break;
4450 #else
4451                                                 if (fastmap
4452                                                     [(unsigned char)
4453                                                      RE_TRANSLATE(*d)])
4454                                                         break;
4455 #endif                          /* MULE */
4456                                                 d_size =
4457                                                     charcount_to_bytecount(d,
4458                                                                            1);
4459                                                 range -= d_size;
4460                                                 d += d_size;    /* Speedier INC_CHARPTR(d) */
4461                                 } else
4462                                         while (range > lim && !fastmap[*d]) {
4463                                                 d_size =
4464                                                     charcount_to_bytecount(d,
4465                                                                            1);
4466                                                 range -= d_size;
4467                                                 d += d_size;    /* Speedier INC_CHARPTR(d) */
4468                                         }
4469
4470                                 startpos += irange - range;
4471                         } else {        /* Searching backwards.  */
4472
4473                                 Emchar c = (size1 == 0 || startpos >= size1
4474                                             ? charptr_emchar(string2 +
4475                                                              startpos - size1)
4476                                             : charptr_emchar(string1 +
4477                                                              startpos));
4478                                 c = TRANSLATE(c);
4479 #ifdef MULE
4480                                 if (!(c >= 0200 || fastmap[(unsigned char)c]))
4481                                         goto advance;
4482 #else
4483                                 if (!fastmap[(unsigned char)c])
4484                                         goto advance;
4485 #endif
4486                         }
4487                 }
4488
4489                 /* If can't match the null string, and that's all we have left, fail.  */
4490                 if (range >= 0 && startpos == total_size && fastmap
4491                     && !bufp->can_be_null)
4492                         return -1;
4493
4494 #ifdef emacs                    /* XEmacs added, w/removal of immediate_quit */
4495                 if (!no_quit_in_re_search)
4496                         QUIT;
4497 #endif
4498                 val = re_match_2_internal(bufp, string1, size1, string2, size2,
4499                                           startpos, regs, stop);
4500 #ifndef REGEX_MALLOC
4501 #ifdef C_ALLOCA
4502                 alloca(0);
4503 #endif
4504 #endif
4505
4506                 if (val >= 0)
4507                         return startpos;
4508
4509                 if (val == -2)
4510                         return -2;
4511
4512               advance:
4513                 if (!range)
4514                         break;
4515                 else if (range > 0) {
4516                         d = ((const unsigned char *)
4517                              (startpos >=
4518                               size1 ? string2 - size1 : string1) + startpos);
4519                         d_size = charcount_to_bytecount(d, 1);
4520                         range -= d_size;
4521                         startpos += d_size;
4522                 } else {
4523                         /* Note startpos > size1 not >=.  If we are on the
4524                            string1/string2 boundary, we want to backup into string1. */
4525                         d = ((const unsigned char *)
4526                              (startpos >
4527                               size1 ? string2 - size1 : string1) + startpos);
4528                         DEC_CHARPTR(d);
4529                         d_size = charcount_to_bytecount(d, 1);
4530                         range += d_size;
4531                         startpos -= d_size;
4532                 }
4533         }
4534         return -1;
4535 }                               /* re_search_2 */
4536 \f
4537 /* Declarations and macros for re_match_2.  */
4538
4539 /* This converts PTR, a pointer into one of the search strings `string1'
4540    and `string2' into an offset from the beginning of that string.  */
4541 #define POINTER_TO_OFFSET(ptr)                  \
4542   (FIRST_STRING_P (ptr)                         \
4543    ? ((regoff_t) ((ptr) - string1))             \
4544    : ((regoff_t) ((ptr) - string2 + size1)))
4545
4546 /* Macros for dealing with the split strings in re_match_2.  */
4547
4548 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
4549
4550 /* Call before fetching a character with *d.  This switches over to
4551    string2 if necessary.  */
4552 #define REGEX_PREFETCH()                                                        \
4553   while (d == dend)                                                     \
4554     {                                                                   \
4555       /* End of string2 => fail.  */                                    \
4556       if (dend == end_match_2)                                          \
4557         goto fail;                                                      \
4558       /* End of string1 => advance to string2.  */                      \
4559       d = string2;                                                      \
4560       dend = end_match_2;                                               \
4561     }
4562
4563 /* Test if at very beginning or at very end of the virtual concatenation
4564    of `string1' and `string2'.  If only one string, it's `string2'.  */
4565 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
4566 #define AT_STRINGS_END(d) ((d) == end2)
4567
4568 /* XEmacs change:
4569    If the given position straddles the string gap, return the equivalent
4570    position that is before or after the gap, respectively; otherwise,
4571    return the same position. */
4572 #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d))
4573 #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d))
4574
4575 /* Test if CH is a word-constituent character. (XEmacs change) */
4576 #define WORDCHAR_P_UNSAFE(ch)                                              \
4577   (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),   \
4578                                ch) == Sword)
4579
4580 /* Free everything we malloc.  */
4581 #ifdef MATCH_MAY_ALLOCATE
4582 #define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
4583 #define FREE_VARIABLES()                                                \
4584   do {                                                                  \
4585     REGEX_FREE_STACK (fail_stack.stack);                                \
4586     FREE_VAR (regstart);                                                \
4587     FREE_VAR (regend);                                                  \
4588     FREE_VAR (old_regstart);                                            \
4589     FREE_VAR (old_regend);                                              \
4590     FREE_VAR (best_regstart);                                           \
4591     FREE_VAR (best_regend);                                             \
4592     FREE_VAR (reg_info);                                                \
4593     FREE_VAR (reg_dummy);                                               \
4594     FREE_VAR (reg_info_dummy);                                          \
4595   } while (0)
4596 #else                           /* not MATCH_MAY_ALLOCATE */
4597 #define FREE_VARIABLES() ((void)0)      /* Do nothing!  But inhibit gcc warning.  */
4598 #endif                          /* MATCH_MAY_ALLOCATE */
4599
4600 /* These values must meet several constraints.  They must not be valid
4601    register values; since we have a limit of 255 registers (because
4602    we use only one byte in the pattern for the register number), we can
4603    use numbers larger than 255.  They must differ by 1, because of
4604    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
4605    be larger than the value for the highest register, so we do not try
4606    to actually save any registers when none are active.  */
4607 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
4608 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
4609 \f
4610 /* Matching routines.  */
4611
4612 #ifndef emacs                   /* Emacs never uses this.  */
4613 /* re_match is like re_match_2 except it takes only a single string.  */
4614
4615 int
4616 re_match(struct re_pattern_buffer *bufp, const char *string, int size,
4617          int pos, struct re_registers *regs)
4618 {
4619         int result =
4620             re_match_2_internal(bufp, NULL, 0, (re_char *) string, size,
4621                                 pos, regs, size);
4622         alloca(0);
4623         return result;
4624 }
4625 #endif                          /* not emacs */
4626
4627 /* re_match_2 matches the compiled pattern in BUFP against the
4628    (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and
4629    SIZE2, respectively).  We start matching at POS, and stop matching
4630    at STOP.
4631
4632    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
4633    store offsets for the substring each group matched in REGS.  See the
4634    documentation for exactly how many groups we fill.
4635
4636    We return -1 if no match, -2 if an internal error (such as the
4637    failure stack overflowing).  Otherwise, we return the length of the
4638    matched substring.  */
4639
4640 int
4641 re_match_2(struct re_pattern_buffer *bufp, const char *string1,
4642            int size1, const char *string2, int size2, int pos,
4643            struct re_registers *regs, int stop)
4644 {
4645         int result;
4646
4647 #ifdef emacs
4648         SETUP_SYNTAX_CACHE_FOR_OBJECT(regex_match_object,
4649                                       regex_emacs_buffer,
4650                                       SYNTAX_CACHE_OBJECT_BYTE_TO_CHAR
4651                                       (regex_match_object, regex_emacs_buffer,
4652                                        pos), 1);
4653 #endif
4654
4655         result = re_match_2_internal(bufp, (re_char *) string1, size1,
4656                                      (re_char *) string2, size2,
4657                                      pos, regs, stop);
4658
4659         alloca(0);
4660         return result;
4661 }
4662
4663 /* This is a separate function so that we can force an alloca cleanup
4664    afterwards.  */
4665 static int
4666 re_match_2_internal(struct re_pattern_buffer *bufp, re_char * string1,
4667                     int size1, re_char * string2, int size2, int pos,
4668                     struct re_registers *regs, int stop)
4669 {
4670         /* General temporaries.  */
4671         int mcnt;
4672         unsigned char *p1;
4673         int should_succeed;     /* XEmacs change */
4674
4675         /* Just past the end of the corresponding string.  */
4676         re_char *end1, *end2;
4677
4678         /* Pointers into string1 and string2, just past the last characters in
4679            each to consider matching.  */
4680         re_char *end_match_1, *end_match_2;
4681
4682         /* Where we are in the data, and the end of the current string.  */
4683         const re_char *d, *dend;
4684
4685         /* Where we are in the pattern, and the end of the pattern.  */
4686         unsigned char *p = bufp->buffer;
4687         REGISTER unsigned char *pend = p + bufp->used;
4688
4689         /* Mark the opcode just after a start_memory, so we can test for an
4690            empty subpattern when we get to the stop_memory.  */
4691         re_char *just_past_start_mem = 0;
4692
4693         /* We use this to map every character in the string.  */
4694         RE_TRANSLATE_TYPE translate = bufp->translate;
4695
4696         /* Failure point stack.  Each place that can handle a failure further
4697            down the line pushes a failure point on this stack.  It consists of
4698            restart, regend, and reg_info for all registers corresponding to
4699            the subexpressions we're currently inside, plus the number of such
4700            registers, and, finally, two char *'s.  The first char * is where
4701            to resume scanning the pattern; the second one is where to resume
4702            scanning the strings.  If the latter is zero, the failure point is
4703            a ``dummy''; if a failure happens and the failure point is a dummy,
4704            it gets discarded and the next one is tried.  */
4705 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, this is global.  */
4706         fail_stack_type fail_stack;
4707 #endif
4708 #ifdef REGEX_DEBUG_FLAG
4709         static unsigned failure_id;
4710         int nfailure_points_pushed = 0, nfailure_points_popped = 0;
4711 #endif
4712
4713 #ifdef REL_ALLOC
4714         /* This holds the pointer to the failure stack, when
4715            it is allocated relocatably.  */
4716         fail_stack_elt_t *failure_stack_ptr;
4717 #endif
4718
4719         /* We fill all the registers internally, independent of what we
4720            return, for use in backreferences.  The number here includes
4721            an element for register zero.  */
4722         int num_regs = bufp->re_ngroups + 1;
4723
4724         /* The currently active registers.  */
4725         int lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4726         int highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4727
4728         /* Information on the contents of registers. These are pointers into
4729            the input strings; they record just what was matched (on this
4730            attempt) by a subexpression part of the pattern, that is, the
4731            regnum-th regstart pointer points to where in the pattern we began
4732            matching and the regnum-th regend points to right after where we
4733            stopped matching the regnum-th subexpression.  (The zeroth register
4734            keeps track of what the whole pattern matches.)  */
4735 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, these are global.  */
4736         re_char **regstart, **regend;
4737 #endif
4738
4739         /* If a group that's operated upon by a repetition operator fails to
4740            match anything, then the register for its start will need to be
4741            restored because it will have been set to wherever in the string we
4742            are when we last see its open-group operator.  Similarly for a
4743            register's end.  */
4744 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, these are global.  */
4745         re_char **old_regstart, **old_regend;
4746 #endif
4747
4748         /* The is_active field of reg_info helps us keep track of which (possibly
4749            nested) subexpressions we are currently in. The matched_something
4750            field of reg_info[reg_num] helps us tell whether or not we have
4751            matched any of the pattern so far this time through the reg_num-th
4752            subexpression.  These two fields get reset each time through any
4753            loop their register is in.  */
4754 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, this is global.  */
4755         register_info_type *reg_info;
4756 #endif
4757
4758         /* The following record the register info as found in the above
4759            variables when we find a match better than any we've seen before.
4760            This happens as we backtrack through the failure points, which in
4761            turn happens only if we have not yet matched the entire string. */
4762         unsigned best_regs_set = false;
4763 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, these are global.  */
4764         re_char **best_regstart, **best_regend;
4765 #endif
4766
4767         /* Logically, this is `best_regend[0]'.  But we don't want to have to
4768            allocate space for that if we're not allocating space for anything
4769            else (see below).  Also, we never need info about register 0 for
4770            any of the other register vectors, and it seems rather a kludge to
4771            treat `best_regend' differently than the rest.  So we keep track of
4772            the end of the best match so far in a separate variable.  We
4773            initialize this to NULL so that when we backtrack the first time
4774            and need to test it, it's not garbage.  */
4775         re_char *match_end = NULL;
4776
4777         /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
4778         int set_regs_matched_done = 0;
4779
4780         /* Used when we pop values we don't care about.  */
4781 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, these are global.  */
4782         re_char **reg_dummy;
4783         register_info_type *reg_info_dummy;
4784 #endif
4785
4786 #ifdef REGEX_DEBUG_FLAG
4787         /* Counts the total number of registers pushed.  */
4788         unsigned num_regs_pushed = 0;
4789 #endif
4790
4791         /* 1 if this match ends in the same string (string1 or string2)
4792            as the best previous match.  */
4793         re_bool same_str_p;
4794
4795         /* 1 if this match is the best seen so far.  */
4796         re_bool best_match_p;
4797
4798         DEBUG_PRINT1("\n\nEntering re_match_2.\n");
4799
4800         INIT_FAIL_STACK();
4801
4802 #ifdef MATCH_MAY_ALLOCATE
4803         /* Do not bother to initialize all the register variables if there are
4804            no groups in the pattern, as it takes a fair amount of time.  If
4805            there are groups, we include space for register 0 (the whole
4806            pattern), even though we never use it, since it simplifies the
4807            array indexing.  We should fix this.  */
4808         if (bufp->re_ngroups) {
4809                 regstart = REGEX_TALLOC(num_regs, re_char *);
4810                 regend = REGEX_TALLOC(num_regs, re_char *);
4811                 old_regstart = REGEX_TALLOC(num_regs, re_char *);
4812                 old_regend = REGEX_TALLOC(num_regs, re_char *);
4813                 best_regstart = REGEX_TALLOC(num_regs, re_char *);
4814                 best_regend = REGEX_TALLOC(num_regs, re_char *);
4815                 reg_info = REGEX_TALLOC(num_regs, register_info_type);
4816                 reg_dummy = REGEX_TALLOC(num_regs, re_char *);
4817                 reg_info_dummy = REGEX_TALLOC(num_regs, register_info_type);
4818
4819                 if (!
4820                     (regstart && regend && old_regstart && old_regend
4821                      && reg_info && best_regstart && best_regend && reg_dummy
4822                      && reg_info_dummy)) {
4823                         FREE_VARIABLES();
4824                         return -2;
4825                 }
4826         } else {
4827                 /* We must initialize all our variables to NULL, so that
4828                    `FREE_VARIABLES' doesn't try to free them.  */
4829                 regstart = regend = old_regstart = old_regend = best_regstart
4830                     = best_regend = reg_dummy = NULL;
4831                 reg_info = reg_info_dummy = (register_info_type *) NULL;
4832         }
4833 #endif                          /* MATCH_MAY_ALLOCATE */
4834
4835         /* The starting position is bogus.  */
4836         if (pos < 0 || pos > size1 + size2) {
4837                 FREE_VARIABLES();
4838                 return -1;
4839         }
4840
4841         /* Initialize subexpression text positions to -1 to mark ones that no
4842            start_memory/stop_memory has been seen for. Also initialize the
4843            register information struct.  */
4844         for (mcnt = 1; mcnt < num_regs; mcnt++) {
4845                 regstart[mcnt] = regend[mcnt]
4846                     = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
4847
4848                 REG_MATCH_NULL_STRING_P(reg_info[mcnt]) =
4849                     MATCH_NULL_UNSET_VALUE;
4850                 IS_ACTIVE(reg_info[mcnt]) = 0;
4851                 MATCHED_SOMETHING(reg_info[mcnt]) = 0;
4852                 EVER_MATCHED_SOMETHING(reg_info[mcnt]) = 0;
4853         }
4854         /* We move `string1' into `string2' if the latter's empty -- but not if
4855            `string1' is null.  */
4856         if (size2 == 0 && string1 != NULL) {
4857                 string2 = string1;
4858                 size2 = size1;
4859                 string1 = 0;
4860                 size1 = 0;
4861         }
4862         end1 = string1 + size1;
4863         end2 = string2 + size2;
4864
4865         /* Compute where to stop matching, within the two strings.  */
4866         if (stop <= size1) {
4867                 end_match_1 = string1 + stop;
4868                 end_match_2 = string2;
4869         } else {
4870                 end_match_1 = end1;
4871                 end_match_2 = string2 + stop - size1;
4872         }
4873
4874         /* `p' scans through the pattern as `d' scans through the data.
4875            `dend' is the end of the input string that `d' points within.  `d'
4876            is advanced into the following input string whenever necessary, but
4877            this happens before fetching; therefore, at the beginning of the
4878            loop, `d' can be pointing at the end of a string, but it cannot
4879            equal `string2'.  */
4880         if (size1 > 0 && pos <= size1) {
4881                 d = string1 + pos;
4882                 dend = end_match_1;
4883         } else {
4884                 d = string2 + pos - size1;
4885                 dend = end_match_2;
4886         }
4887
4888         DEBUG_PRINT1("The compiled pattern is: \n");
4889         DEBUG_PRINT_COMPILED_PATTERN(bufp, p, pend);
4890         DEBUG_PRINT1("The string to match is: `");
4891         DEBUG_PRINT_DOUBLE_STRING(d, string1, size1, string2, size2);
4892         DEBUG_PRINT1("'\n");
4893
4894         /* This loops over pattern commands.  It exits by returning from the
4895            function if the match is complete, or it drops through if the match
4896            fails at this starting point in the input data.  */
4897         for (;;) {
4898                 DEBUG_PRINT2("\n0x%lx: ", (long)p);
4899 #ifdef emacs                    /* XEmacs added, w/removal of immediate_quit */
4900                 if (!no_quit_in_re_search)
4901                         QUIT;
4902 #endif
4903
4904                 if (p == pend) {
4905                         /* End of pattern means we might have succeeded.  */
4906                         DEBUG_PRINT1("end of pattern ... ");
4907
4908                         /* If we haven't matched the entire string, and we want
4909                            the longest match, try backtracking.  */
4910                         if (d != end_match_2) {
4911                                 same_str_p = (FIRST_STRING_P(match_end)
4912                                               == MATCHING_IN_FIRST_STRING);
4913
4914                                 /* AIX compiler got confused when this was
4915                                    combined with the previous declaration.  */
4916                                 if (same_str_p)
4917                                         best_match_p = d > match_end;
4918                                 else
4919                                         best_match_p =
4920                                             !MATCHING_IN_FIRST_STRING;
4921
4922                                 DEBUG_PRINT1("backtracking.\n");
4923
4924                                 if (!FAIL_STACK_EMPTY()) {
4925                                         /* More failure points to try.  */
4926
4927                                         /* If exceeds best match so far, save
4928                                            it.  */
4929                                         if (!best_regs_set || best_match_p) {
4930                                                 best_regs_set = true;
4931                                                 match_end = d;
4932
4933                                                 DEBUG_PRINT1
4934                                                     ("\nSAVING match as best so far.\n");
4935
4936                                                 for (mcnt = 1; mcnt < num_regs;
4937                                                      mcnt++) {
4938                                                         best_regstart[mcnt] =
4939                                                             regstart[mcnt];
4940                                                         best_regend[mcnt] =
4941                                                             regend[mcnt];
4942                                                 }
4943                                         }
4944                                         goto fail;
4945                                 }
4946
4947                                 /* If no failure points, don't restore garbage.
4948                                    And if last match is real best match, don't
4949                                    restore second best one. */
4950                                 else if (best_regs_set && !best_match_p) {
4951                                       restore_best_regs:
4952                                         /* Restore best match.  It may happen
4953                                            that `dend == end_match_1' while the
4954                                            restored d is in string2.  For
4955                                            example, the pattern `x.*y.*z'
4956                                            against the strings `x-' and `y-z-',
4957                                            if the two strings are not
4958                                            consecutive in memory.  */
4959                                         DEBUG_PRINT1
4960                                             ("Restoring best registers.\n");
4961
4962                                         d = match_end;
4963                                         dend = ((d >= string1 && d <= end1)
4964                                                 ? end_match_1 : end_match_2);
4965
4966                                         for (mcnt = 1; mcnt < num_regs; mcnt++) {
4967                                                 regstart[mcnt] =
4968                                                     best_regstart[mcnt];
4969                                                 regend[mcnt] =
4970                                                     best_regend[mcnt];
4971                                         }
4972                                 }
4973                         }
4974                         /* d != end_match_2 */
4975                 succeed_label:
4976                         DEBUG_PRINT1("Accepting match.\n");
4977                         {
4978                                 int num_nonshy_regs = bufp->re_nsub + 1;
4979                                 /* If caller wants register contents data back,
4980                                    fill REGS.  */
4981                                 if (regs && !bufp->no_sub) {
4982                                         /* Have the register data arrays been
4983                                            allocated?  */
4984                                         if (bufp->regs_allocated == REGS_UNALLOCATED) {
4985                                           /* No.  So allocate them with malloc.
4986                                              We need one extra element beyond
4987                                              `num_regs' for the `-1' marker GNU
4988                                              code uses.  */
4989                                                 regs->num_regs = MAX(RE_NREGS, num_nonshy_regs + 1);
4990                                                 regs->start = TALLOC(regs->num_regs, regoff_t);
4991                                                 regs->end = TALLOC(regs->num_regs, regoff_t);
4992                                                 if (regs->start == NULL || regs->end == NULL) {
4993                                                         FREE_VARIABLES ();
4994                                                         return -2;
4995                                                 }
4996                                                 bufp->regs_allocated = REGS_REALLOCATE;
4997                                         } else if (bufp->regs_allocated == REGS_REALLOCATE) {
4998                                           /* Yes.  If we need more elements than were already
4999                                              allocated, reallocate them.  If we need fewer, just
5000                                              leave it alone.  */
5001                                                 if (regs->num_regs < num_nonshy_regs + 1) {
5002                                                         regs->num_regs = num_nonshy_regs + 1;
5003                                                         RETALLOC(regs->start, regs->num_regs, regoff_t);
5004                                                         RETALLOC(regs->end, regs->num_regs, regoff_t);
5005                                                         if (regs->start == NULL || regs->end == NULL) {
5006                                                                 FREE_VARIABLES();
5007                                                                 return -2;
5008                                                         }
5009                                                 }
5010                                         } else {
5011                                                 /* The braces fend off a "empty body in an else-statement"
5012                                                    warning under GCC when assert expands to nothing.  */
5013                                                 assert (bufp->regs_allocated == REGS_FIXED);
5014                                         }
5015
5016                                         /* Convert the pointer data in `regstart' and `regend' to
5017                                            indices.  Register zero has to be set differently,
5018                                            since we haven't kept track of any info for it.  */
5019                                         if (regs->num_regs > 0) {
5020                                                 regs->start[0] = pos;
5021                                                 regs->end[0] = (MATCHING_IN_FIRST_STRING
5022                                                                 ? ((regoff_t) (d - string1))
5023                                                                 : ((regoff_t) (d - string2 + size1)));
5024                                         }
5025
5026                                         /* Map over the NUM_NONSHY_REGS non-shy internal registers.
5027                                            Copy each into the corresponding external register.
5028                                            N.B. MCNT indexes external registers. */
5029                                         for (mcnt = 1;
5030                                              mcnt < MIN (num_nonshy_regs, regs->num_regs);
5031                                              mcnt++) {
5032                                                 int ireg = bufp->external_to_internal_register[mcnt];
5033
5034                                                 if (REG_UNSET (regstart[ireg]) || REG_UNSET (regend[ireg]))
5035                                                         regs->start[mcnt] = regs->end[mcnt] = -1;
5036                                                 else {
5037                                                         regs->start[mcnt]
5038                                                                 = (regoff_t) POINTER_TO_OFFSET (regstart[ireg]);
5039                                                         regs->end[mcnt]
5040                                                                 = (regoff_t) POINTER_TO_OFFSET (regend[ireg]);
5041                                                 }
5042                                         }
5043                                 } /* regs && !bufp->no_sub */
5044
5045                                 /* If we have regs and the regs structure has
5046                                    more elements than were in the pattern, set
5047                                    the extra elements to -1.  If we
5048                                    (re)allocated the registers, this is the
5049                                    case, because we always allocate enough to
5050                                    have at least one -1 at the end.
5051
5052                                    We do this even when no_sub is set because
5053                                    some applications (XEmacs) reuse register
5054                                    structures which may contain stale
5055                                    information, and permit attempts to access
5056                                    those registers.
5057
5058                                    It would be possible to require the caller to
5059                                    do this, but we'd have to change the API for
5060                                    this function to reflect that, and audit all
5061                                    callers. */
5062                                 if (regs && regs->num_regs > 0)
5063                                         for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++)
5064                                                 regs->start[mcnt] = regs->end[mcnt] = -1;
5065                         }
5066
5067                         DEBUG_PRINT4("%u failure points pushed, %u popped (%u remain).\n",
5068                                       nfailure_points_pushed, nfailure_points_popped,
5069                                       nfailure_points_pushed - nfailure_points_popped);
5070                         DEBUG_PRINT2("%u registers pushed.\n", num_regs_pushed);
5071
5072                         mcnt = d - pos - (MATCHING_IN_FIRST_STRING
5073                                           ? string1
5074                                           : string2 - size1);
5075
5076                         DEBUG_PRINT2("Returning %d from re_match_2.\n", mcnt);
5077
5078                         FREE_VARIABLES();
5079                         return mcnt;
5080                 }
5081
5082                 /* Otherwise match next pattern command.  */
5083                 switch (SWITCH_ENUM_CAST((re_opcode_t) *p++)) {
5084                         /* Ignore these.  Used to ignore the n of succeed_n's
5085                            which currently have n == 0.  */
5086                 case no_op:
5087                         DEBUG_PRINT1("EXECUTING no_op.\n");
5088                         break;
5089
5090                 case succeed:
5091                         DEBUG_PRINT1("EXECUTING succeed.\n");
5092                         goto succeed_label;
5093
5094                         /* Match the next n pattern characters exactly.  The
5095                            following byte in the pattern defines n, and the n
5096                            bytes after that are the characters to match.  */
5097                 case exactn:
5098                         mcnt = *p++;
5099                         DEBUG_PRINT2("EXECUTING exactn %d.\n", mcnt);
5100
5101                         /* This is written out as an if-else so we don't waste
5102                            time testing `translate' inside the loop.  */
5103                         if (TRANSLATE_P(translate)) {
5104                                 do {
5105 #ifdef MULE
5106                                         Emchar pat_ch, buf_ch;
5107                                         Bytecount pat_len;
5108
5109                                         REGEX_PREFETCH();
5110                                         pat_ch = charptr_emchar(p);
5111                                         buf_ch = charptr_emchar(d);
5112                                         if (RE_TRANSLATE(buf_ch) != pat_ch)
5113                                                 goto fail;
5114
5115                                         pat_len = charcount_to_bytecount(p, 1);
5116                                         p += pat_len;
5117                                         INC_CHARPTR(d);
5118
5119                                         mcnt -= pat_len;
5120 #else                           /* not MULE */
5121                                         REGEX_PREFETCH();
5122                                         if ((unsigned char)RE_TRANSLATE(*d++) !=
5123                                             *p++)
5124                                                 goto fail;
5125                                         mcnt--;
5126 #endif
5127                                 }
5128                                 while (mcnt > 0);
5129                         } else {
5130                                 do {
5131                                         REGEX_PREFETCH();
5132                                         if (*d++ != *p++)
5133                                                 goto fail;
5134                                 }
5135                                 while (--mcnt);
5136                         }
5137                         SET_REGS_MATCHED();
5138                         break;
5139
5140                         /* Match any character except possibly a newline or a
5141                            null.  */
5142                 case anychar:
5143                         DEBUG_PRINT1("EXECUTING anychar.\n");
5144
5145                         REGEX_PREFETCH();
5146
5147                         if ((!(bufp->syntax & RE_DOT_NEWLINE)
5148                              && TRANSLATE(*d) == '\n')
5149                             || (bufp->syntax & RE_DOT_NOT_NULL
5150                                 && TRANSLATE(*d) == '\000'))
5151                                 goto fail;
5152
5153                         SET_REGS_MATCHED();
5154                         DEBUG_PRINT2("  Matched `%d'.\n", *d);
5155                         INC_CHARPTR(d); /* XEmacs change */
5156                         break;
5157
5158                 case charset:
5159                 case charset_not: {
5160                         REGISTER unsigned char c;
5161                         re_bool not_p =
5162                                 (re_opcode_t) * (p - 1) == charset_not;
5163
5164                         DEBUG_PRINT2("EXECUTING charset%s.\n",
5165                                      not_p ? "_not" : "");
5166
5167                         REGEX_PREFETCH();
5168                         /* The character to match.  */
5169                         c = TRANSLATE(*d);
5170
5171                         /* Cast to `unsigned' instead of `unsigned char'
5172                            in case the bit list is a full 32 bytes
5173                            long.  */
5174                         if (c < (unsigned)(*p * BYTEWIDTH)
5175                             && p[1 +
5176                                  c / BYTEWIDTH] & (1 << (c %
5177                                                          BYTEWIDTH)))
5178                                 not_p = !not_p;
5179
5180                         p += 1 + *p;
5181
5182                         if (!not_p)
5183                                 goto fail;
5184
5185                         SET_REGS_MATCHED();
5186                         INC_CHARPTR(d); /* XEmacs change */
5187                         break;
5188                 }
5189
5190 #ifdef MULE
5191                 case charset_mule:
5192                 case charset_mule_not: {
5193                         REGISTER Emchar c;
5194                         re_bool not_p =
5195                                 (re_opcode_t) * (p - 1) == charset_mule_not;
5196
5197                         DEBUG_PRINT2("EXECUTING charset_mule%s.\n",
5198                                      not_p ? "_not" : "");
5199
5200                         REGEX_PREFETCH();
5201                         c = charptr_emchar((const Bufbyte *)d);
5202                         /* The character to match.  */
5203                         c = TRANSLATE(c);
5204
5205                         if (EQ
5206                             (Qt,
5207                              unified_range_table_lookup(p, c, Qnil)))
5208                                 not_p = !not_p;
5209
5210                         p += unified_range_table_bytes_used(p);
5211
5212                         if (!not_p)
5213                                 goto fail;
5214
5215                         SET_REGS_MATCHED();
5216                         INC_CHARPTR(d);
5217                         break;
5218                 }
5219 #endif                          /* MULE */
5220
5221                         /* The beginning of a group is represented by
5222                            start_memory.  The arguments are the register number
5223                            in the next byte, and the number of groups inner to
5224                            this one in the next.  The text matched within the
5225                            group is recorded (in the internal registers data
5226                            structure) under the register number.  */
5227                 case start_memory:
5228                         DEBUG_PRINT3("EXECUTING start_memory %d (%d):\n", *p,
5229                                      p[1]);
5230
5231                         /* Find out if this group can match the empty string.  */
5232                         p1 = p; /* To send to group_match_null_string_p.  */
5233
5234                         if (REG_MATCH_NULL_STRING_P(reg_info[*p]) ==
5235                             MATCH_NULL_UNSET_VALUE)
5236                                 REG_MATCH_NULL_STRING_P(reg_info[*p])
5237                                     = group_match_null_string_p(&p1, pend,
5238                                                                 reg_info);
5239
5240                         /* Save the position in the string where we were the
5241                            last time we were at this open-group operator in case
5242                            the group is operated upon by a repetition operator,
5243                            e.g., with `(a*)*b' against `ab'; then we want to
5244                            ignore where we are now in the string in case this
5245                            attempt to match fails.  */
5246                         old_regstart[*p] = REG_MATCH_NULL_STRING_P(reg_info[*p])
5247                             ? REG_UNSET(regstart[*p]) ? d : regstart[*p]
5248                             : regstart[*p];
5249                         DEBUG_PRINT2("  old_regstart: %d\n",
5250                                      POINTER_TO_OFFSET(old_regstart[*p]));
5251
5252                         regstart[*p] = d;
5253                         DEBUG_PRINT2("  regstart: %d\n",
5254                                      POINTER_TO_OFFSET(regstart[*p]));
5255
5256                         IS_ACTIVE(reg_info[*p]) = 1;
5257                         MATCHED_SOMETHING(reg_info[*p]) = 0;
5258
5259                         /* Clear this whenever we change the register activity
5260                            status.  */
5261                         set_regs_matched_done = 0;
5262
5263                         /* This is the new highest active register.  */
5264                         highest_active_reg = *p;
5265
5266                         /* If nothing was active before, this is the new lowest
5267                            active register.  */
5268                         if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
5269                                 lowest_active_reg = *p;
5270
5271                         /* Move past the register number and inner group
5272                            count.  */
5273                         p += 2;
5274                         just_past_start_mem = p;
5275
5276                         break;
5277
5278                         /* The stop_memory opcode represents the end of a group.
5279                            Its arguments are the same as start_memory's: the
5280                            register number, and the number of inner groups.  */
5281                 case stop_memory:
5282                         DEBUG_PRINT3("EXECUTING stop_memory %d (%d):\n", *p,
5283                                      p[1]);
5284
5285                         /* We need to save the string position the last time we
5286                            were at this close-group operator in case the group
5287                            is operated upon by a repetition operator, e.g., with
5288                            `((a*)*(b*)*)*' against `aba'; then we want to ignore
5289                            where we are now in the string in case this attempt
5290                            to match fails.  */
5291                         old_regend[*p] = REG_MATCH_NULL_STRING_P(reg_info[*p])
5292                             ? REG_UNSET(regend[*p]) ? d : regend[*p]
5293                             : regend[*p];
5294                         DEBUG_PRINT2("      old_regend: %d\n",
5295                                      POINTER_TO_OFFSET(old_regend[*p]));
5296
5297                         regend[*p] = d;
5298                         DEBUG_PRINT2("      regend: %d\n",
5299                                      POINTER_TO_OFFSET(regend[*p]));
5300
5301                         /* This register isn't active anymore.  */
5302                         IS_ACTIVE(reg_info[*p]) = 0;
5303
5304                         /* Clear this whenever we change the register activity
5305                            status.  */
5306                         set_regs_matched_done = 0;
5307
5308                         /* If this was the only register active, nothing is
5309                            active anymore.  */
5310                         if (lowest_active_reg == highest_active_reg) {
5311                                 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
5312                                 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
5313                         } else {        /* We must scan for the new highest
5314                                            active register, since it isn't
5315                                            necessarily one less than now:
5316                                            consider (a(b)c(d(e)f)g).  When group
5317                                            3 ends, after the f), the new highest
5318                                            active register is 1.  */
5319                                 unsigned char r = *p - 1;
5320                                 while (r > 0 && !IS_ACTIVE(reg_info[r]))
5321                                         r--;
5322
5323                                 /* If we end up at register zero, that means
5324                                    that we saved the registers as the result of
5325                                    an `on_failure_jump', not a `start_memory',
5326                                    and we jumped to past the innermost
5327                                    `stop_memory'.  For example, in ((.)*) we
5328                                    save registers 1 and 2 as a result of the *,
5329                                    but when we pop back to the second ), we are
5330                                    at the stop_memory 1.  Thus, nothing is
5331                                    active.  */
5332                                 if (r == 0) {
5333                                         lowest_active_reg =
5334                                             NO_LOWEST_ACTIVE_REG;
5335                                         highest_active_reg =
5336                                             NO_HIGHEST_ACTIVE_REG;
5337                                 } else {
5338                                         highest_active_reg = r;
5339
5340                                         /* 98/9/21 jhod: We've also gotta set
5341                                            lowest_active_reg, don't we? */
5342                                         r = 1;
5343                                         while (r < highest_active_reg
5344                                                && !IS_ACTIVE(reg_info[r]))
5345                                                 r++;
5346                                         lowest_active_reg = r;
5347                                 }
5348                         }
5349
5350                         /* If just failed to match something this time around
5351                            with a group that's operated on by a repetition
5352                            operator, try to force exit from the ``loop'', and
5353                            restore the register information for this group that
5354                            we had before trying this last match.  */
5355                         if ((!MATCHED_SOMETHING(reg_info[*p])
5356                              || just_past_start_mem == p - 1)
5357                             && (p + 2) < pend) {
5358                                 re_bool is_a_jump_n = false;
5359
5360                                 p1 = p + 2;
5361                                 mcnt = 0;
5362                                 switch ((unsigned int)(re_opcode_t)*p1++) {
5363                                 case jump_n:
5364                                         is_a_jump_n = true;
5365                                 case pop_failure_jump:
5366                                 case maybe_pop_jump:
5367                                 case jump:
5368                                 case dummy_failure_jump:
5369                                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
5370                                         if (is_a_jump_n)
5371                                                 p1 += 2;
5372                                         break;
5373
5374                                 default:
5375                                         /* do nothing */ ;
5376                                 }
5377                                 p1 += mcnt;
5378
5379                                 /* If the next operation is a jump backwards in
5380                                    the pattern to an on_failure_jump right
5381                                    before the start_memory corresponding to this
5382                                    stop_memory, exit from the loop by forcing a
5383                                    failure after pushing on the stack the
5384                                    on_failure_jump's jump in the pattern, and
5385                                    d.  */
5386                                 if (mcnt < 0
5387                                     && (re_opcode_t) * p1 == on_failure_jump
5388                                     && (re_opcode_t) p1[3] == start_memory
5389                                     && p1[4] == *p) {
5390                                         /* If this group ever matched anything,
5391                                            then restore what its registers were
5392                                            before trying this last failed match,
5393                                            e.g., with `(a*)*b' against `ab' for
5394                                            regstart[1], and, e.g., with
5395                                            `((a*)*(b*)*)*' against `aba' for
5396                                            regend[3].
5397
5398                                            Also restore the registers for inner
5399                                            groups for, e.g., `((a*)(b*))*'
5400                                            against `aba' (register 3 would
5401                                            otherwise get trashed).  */
5402
5403                                         if (EVER_MATCHED_SOMETHING
5404                                             (reg_info[*p])) {
5405                                                 int r;
5406
5407                                                 EVER_MATCHED_SOMETHING(reg_info
5408                                                                        [*p]) =
5409                                                     0;
5410
5411                                                 /* Restore this and inner
5412                                                    groups' (if any)
5413                                                    registers.  */
5414                                                 for (r = *p; r < *p + *(p + 1);
5415                                                      r++) {
5416                                                         regstart[r] =
5417                                                             old_regstart[r];
5418
5419                                                         /* xx why this test?  */
5420                                                         if (old_regend[r] >=
5421                                                             regstart[r])
5422                                                                 regend[r] =
5423                                                                     old_regend
5424                                                                     [r];
5425                                                 }
5426                                         }
5427                                         p1++;
5428                                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
5429                                         PUSH_FAILURE_POINT(p1 + mcnt, d, -2);
5430
5431                                         goto fail;
5432                                 }
5433                         }
5434
5435                         /* Move past the register number and the inner group
5436                            count.  */
5437                         p += 2;
5438                         break;
5439
5440                         /* \<digit> has been turned into a `duplicate' command
5441                            which is followed by the numeric value of <digit> as
5442                            the register number.  (Already passed through
5443                            external-to-internal-register mapping, so it refers
5444                            to the actual group number, not the non-shy-only
5445                            numbering used in the external world.) */
5446                 case duplicate:
5447                         {
5448                                 REGISTER re_char *d2, *dend2;
5449                                 /* Get which register to match against.  */
5450                                 int regno = *p++;
5451                                 DEBUG_PRINT2("EXECUTING duplicate %d.\n",
5452                                              regno);
5453
5454                                 /* Can't back reference a group which we've
5455                                    never matched.  */
5456                                 if (REG_UNSET(regstart[regno])
5457                                     || REG_UNSET(regend[regno]))
5458                                         goto fail;
5459
5460                                 /* Where in input to try to start matching.  */
5461                                 d2 = regstart[regno];
5462
5463                                 /* Where to stop matching; if both the place to
5464                                    start and the place to stop matching are in
5465                                    the same string, then set to the place to
5466                                    stop, otherwise, for now have to use the end
5467                                    of the first string.  */
5468
5469                                 dend2 = ((FIRST_STRING_P(regstart[regno])
5470                                           == FIRST_STRING_P(regend[regno]))
5471                                          ? regend[regno] : end_match_1);
5472                                 for (;;) {
5473                                         /* If necessary, advance to next segment
5474                                            in register contents.  */
5475                                         while (d2 == dend2) {
5476                                                 if (dend2 == end_match_2)
5477                                                         break;
5478                                                 if (dend2 == regend[regno])
5479                                                         break;
5480
5481                                                 /* End of string1 => advance to
5482                                                    string2. */
5483                                                 d2 = string2;
5484                                                 dend2 = regend[regno];
5485                                         }
5486                                         /* At end of register contents =>
5487                                            success */
5488                                         if (d2 == dend2)
5489                                                 break;
5490
5491                                         /* If necessary, advance to next segment
5492                                            in data.  */
5493                                         REGEX_PREFETCH();
5494
5495                                         /* How many characters left in this segment to match.  */
5496                                         mcnt = dend - d;
5497
5498                                         /* Want how many consecutive characters
5499                                            we can match in one shot, so, if
5500                                            necessary, adjust the count.  */
5501                                         if (mcnt > dend2 - d2)
5502                                                 mcnt = dend2 - d2;
5503
5504                                         /* Compare that many; failure if
5505                                            mismatch, else move past them.  */
5506                                         if (TRANSLATE_P(translate)
5507                                             ? bcmp_translate(
5508                                                     (const unsigned char*)d,
5509                                                     (const unsigned char*)d2,
5510                                                     mcnt, translate)
5511                                             : memcmp(d, d2, mcnt)) {
5512                                                 goto fail;
5513                                         }
5514                                         d += mcnt, d2 += mcnt;
5515
5516                                         /* Do this because we've match some
5517                                            characters.  */
5518                                         SET_REGS_MATCHED();
5519                                 }
5520                         }
5521                         break;
5522
5523                         /* begline matches the empty string at the beginning of
5524                            the string (unless `not_bol' is set in `bufp'), and,
5525                            if `newline_anchor' is set, after newlines.  */
5526                 case begline:
5527                         DEBUG_PRINT1("EXECUTING begline.\n");
5528
5529                         if (AT_STRINGS_BEG(d)) {
5530                                 if (!bufp->not_bol)
5531                                         break;
5532                         } else if (d[-1] == '\n' && bufp->newline_anchor) {
5533                                 break;
5534                         }
5535                         /* In all other cases, we fail.  */
5536                         goto fail;
5537
5538                         /* endline is the dual of begline.  */
5539                 case endline:
5540                         DEBUG_PRINT1("EXECUTING endline.\n");
5541
5542                         if (AT_STRINGS_END(d)) {
5543                                 if (!bufp->not_eol)
5544                                         break;
5545                         }
5546
5547                         /* We have to ``prefetch'' the next character.  */
5548                         else if ((d == end1 ? *string2 : *d) == '\n'
5549                                  && bufp->newline_anchor) {
5550                                 break;
5551                         }
5552                         goto fail;
5553
5554                         /* Match at the very beginning of the data.  */
5555                 case begbuf:
5556                         DEBUG_PRINT1("EXECUTING begbuf.\n");
5557                         if (AT_STRINGS_BEG(d))
5558                                 break;
5559                         goto fail;
5560
5561                         /* Match at the very end of the data.  */
5562                 case endbuf:
5563                         DEBUG_PRINT1("EXECUTING endbuf.\n");
5564                         if (AT_STRINGS_END(d))
5565                                 break;
5566                         goto fail;
5567
5568                         /* on_failure_keep_string_jump is used to optimize
5569                            `.*\n'.  It pushes NULL as the value for the string
5570                            on the stack.  Then `pop_failure_point' will keep the
5571                            current value for the string, instead of restoring
5572                            it.  To see why, consider matching `foo\nbar' against
5573                            `.*\n'.  The .* matches the foo; then the . fails
5574                            against the \n.  But the next thing we want to do is
5575                            match the \n against the \n; if we restored the
5576                            string value, we would be back at the foo.
5577
5578                            Because this is used only in specific cases, we don't
5579                            need to check all the things that `on_failure_jump'
5580                            does, to make sure the right things get saved on the
5581                            stack.  Hence we don't share its code.  The only
5582                            reason to push anything on the stack at all is that
5583                            otherwise we would have to change `anychar's code to
5584                            do something besides goto fail in this case; that
5585                            seems worse than this.  */
5586                 case on_failure_keep_string_jump:
5587                         DEBUG_PRINT1("EXECUTING on_failure_keep_string_jump");
5588
5589                         EXTRACT_NUMBER_AND_INCR(mcnt, p);
5590                         DEBUG_PRINT3(" %d (to 0x%lx):\n", mcnt,
5591                                      (long)(p + mcnt));
5592
5593                         PUSH_FAILURE_POINT(p + mcnt, (unsigned char *)0, -2);
5594                         break;
5595
5596                         /* Uses of on_failure_jump:
5597
5598                            Each alternative starts with an on_failure_jump that
5599                            points to the beginning of the next alternative.
5600                            Each alternative except the last ends with a jump
5601                            that in effect jumps past the rest of the
5602                            alternatives.  (They really jump to the ending jump
5603                            of the following alternative, because tensioning
5604                            these jumps is a hassle.)
5605
5606                            Repeats start with an on_failure_jump that points
5607                            past both the repetition text and either the
5608                            following jump or pop_failure_jump back to this
5609                            on_failure_jump.  */
5610                 case on_failure_jump:
5611                       on_failure:
5612                         DEBUG_PRINT1("EXECUTING on_failure_jump");
5613
5614                         EXTRACT_NUMBER_AND_INCR(mcnt, p);
5615                         DEBUG_PRINT3(" %d (to 0x%lx)", mcnt, (long)(p + mcnt));
5616
5617                         /* If this on_failure_jump comes right before a group
5618                            (i.e., the original * applied to a group), save the
5619                            information for that group and all inner ones, so
5620                            that if we fail back to this point, the group's
5621                            information will be correct.  For example, in
5622                            \(a*\)*\1, we need the preceding group, and in
5623                            \(\(a*\)b*\)\2, we need the inner group.  */
5624
5625                         /* We can't use `p' to check ahead because we push
5626                            a failure point to `p + mcnt' after we do this.  */
5627                         p1 = p;
5628
5629                         /* We need to skip no_op's before we look for the
5630                            start_memory in case this on_failure_jump is
5631                            happening as the result of a completed succeed_n, as
5632                            in \(a\)\{1,3\}b\1 against aba.  */
5633                         while (p1 < pend && (re_opcode_t) * p1 == no_op)
5634                                 p1++;
5635
5636                         if (p1 < pend && (re_opcode_t) * p1 == start_memory) {
5637                                 /* We have a new highest active register now.
5638                                    This will get reset at the start_memory we
5639                                    are about to get to, but we will have saved
5640                                    all the registers relevant to this repetition
5641                                    op, as described above.  */
5642                                 highest_active_reg = *(p1 + 1) + *(p1 + 2);
5643                                 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
5644                                         lowest_active_reg = *(p1 + 1);
5645                         }
5646
5647                         DEBUG_PRINT1(":\n");
5648                         PUSH_FAILURE_POINT(p + mcnt, d, -2);
5649                         break;
5650
5651                         /* A smart repeat ends with `maybe_pop_jump'.
5652                            We change it to either `pop_failure_jump' or `jump'.  */
5653                 case maybe_pop_jump:
5654                         EXTRACT_NUMBER_AND_INCR(mcnt, p);
5655                         DEBUG_PRINT2("EXECUTING maybe_pop_jump %d.\n", mcnt);
5656                         {
5657                                 REGISTER const unsigned char *p2 = p;
5658
5659                                 /* Compare the beginning of the repeat with what
5660                                    in the pattern follows its end. If we can
5661                                    establish that there is nothing that they
5662                                    would both match, i.e., that we would have to
5663                                    backtrack because of (as in, e.g., `a*a')
5664                                    then we can change to pop_failure_jump,
5665                                    because we'll never have to backtrack.
5666
5667                                    This is not true in the case of alternatives:
5668                                    in `(a|ab)*' we do need to backtrack to the
5669                                    `ab' alternative (e.g., if the string was
5670                                    `ab').  But instead of trying to detect that
5671                                    here, the alternative has put on a dummy
5672                                    failure point which is what we will end up
5673                                    popping.  */
5674
5675                                 /* Skip over open/close-group commands.  If what
5676                                    follows this loop is a ...+ construct, look
5677                                    at what begins its body, since we will have
5678                                    to match at least one of that.  */
5679                                 while (1) {
5680                                         if (p2 + 2 < pend
5681                                             && ((re_opcode_t) * p2 ==
5682                                                 stop_memory
5683                                                 || (re_opcode_t) * p2 ==
5684                                                 start_memory))
5685                                                 p2 += 3;
5686                                         else if (p2 + 6 < pend
5687                                                  && (re_opcode_t) * p2 ==
5688                                                  dummy_failure_jump)
5689                                                 p2 += 6;
5690                                         else
5691                                                 break;
5692                                 }
5693
5694                                 p1 = p + mcnt;
5695                                 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
5696                                    to the `maybe_finalize_jump' of this case.  Examine what
5697                                    follows.  */
5698
5699                                 /* If we're at the end of the pattern, we can change.  */
5700                                 if (p2 == pend) {
5701                                         /* Consider what happens when matching ":\(.*\)"
5702                                            against ":/".  I don't really understand this code
5703                                            yet.  */
5704                                         p[-3] = (unsigned char)pop_failure_jump;
5705                                         DEBUG_PRINT1
5706                                             ("  End of pattern: change to `pop_failure_jump'.\n");
5707                                 }
5708
5709                                 else if ((re_opcode_t) * p2 == exactn
5710                                          || (bufp->newline_anchor
5711                                              && (re_opcode_t) * p2 ==
5712                                              endline)) {
5713                                         REGISTER unsigned char c =
5714                                             *p2 ==
5715                                             (unsigned char)endline ? '\n' :
5716                                             p2[2];
5717
5718                                         if ((re_opcode_t) p1[3] == exactn
5719                                             && p1[5] != c) {
5720                                                 p[-3] =
5721                                                     (unsigned char)
5722                                                     pop_failure_jump;
5723                                                 DEBUG_PRINT3
5724                                                     ("  %c != %c => pop_failure_jump.\n",
5725                                                      c, p1[5]);
5726                                         }
5727
5728                                         else if ((re_opcode_t) p1[3] == charset
5729                                                  || (re_opcode_t) p1[3] ==
5730                                                  charset_not) {
5731                                                 int not_p =
5732                                                     (re_opcode_t) p1[3] ==
5733                                                     charset_not;
5734
5735                                                 if (c <
5736                                                     (unsigned char)(p1[4] *
5737                                                                     BYTEWIDTH)
5738                                                     && p1[5 +
5739                                                           c /
5740                                                           BYTEWIDTH] & (1 << (c
5741                                                                               %
5742                                                                               BYTEWIDTH)))
5743                                                         not_p = !not_p;
5744
5745                                                 /* `not_p' is equal to 1 if c would match, which means
5746                                                    that we can't change to pop_failure_jump.  */
5747                                                 if (!not_p) {
5748                                                         p[-3] =
5749                                                             (unsigned char)
5750                                                             pop_failure_jump;
5751                                                         DEBUG_PRINT1
5752                                                             ("  No match => pop_failure_jump.\n");
5753                                                 }
5754                                         }
5755                                 } else if ((re_opcode_t) * p2 == charset) {
5756 #ifdef REGEX_DEBUG_FLAG
5757                                         REGISTER unsigned char c
5758                                             =
5759                                             *p2 ==
5760                                             (unsigned char)endline ? '\n' :
5761                                             p2[2];
5762 #endif
5763
5764                                         if ((re_opcode_t) p1[3] == exactn
5765                                             && !((int)p2[1] * BYTEWIDTH >
5766                                                  (int)p1[5]
5767                                                  && (p2[2 + p1[5] / BYTEWIDTH]
5768                                                      & (1 <<
5769                                                         (p1[5] %
5770                                                          BYTEWIDTH))))) {
5771                                                 p[-3] =
5772                                                     (unsigned char)
5773                                                     pop_failure_jump;
5774                                                 DEBUG_PRINT3
5775                                                     ("  %c != %c => pop_failure_jump.\n",
5776                                                      c, p1[5]);
5777                                         }
5778
5779                                         else if ((re_opcode_t) p1[3] ==
5780                                                  charset_not) {
5781                                                 int idx;
5782                                                 /* We win if the charset_not inside the loop
5783                                                    lists every character listed in the charset after.  */
5784                                                 for (idx = 0; idx < (int)p2[1];
5785                                                      idx++)
5786                                                         if (!
5787                                                             (p2[2 + idx] == 0
5788                                                              || (idx <
5789                                                                  (int)p1[4]
5790                                                                  &&
5791                                                                  ((p2[2 + idx] &
5792                                                                    ~p1[5 +
5793                                                                        idx]) ==
5794                                                                   0))))
5795                                                                 break;
5796
5797                                                 if (idx == p2[1]) {
5798                                                         p[-3] =
5799                                                             (unsigned char)
5800                                                             pop_failure_jump;
5801                                                         DEBUG_PRINT1
5802                                                             ("  No match => pop_failure_jump.\n");
5803                                                 }
5804                                         } else if ((re_opcode_t) p1[3] ==
5805                                                    charset) {
5806                                                 int idx;
5807                                                 /* We win if the charset inside the loop
5808                                                    has no overlap with the one after the loop.  */
5809                                                 for (idx = 0;
5810                                                      idx < (int)p2[1]
5811                                                      && idx < (int)p1[4]; idx++)
5812                                                         if ((p2[2 + idx] &
5813                                                              p1[5 + idx]) != 0)
5814                                                                 break;
5815
5816                                                 if (idx == p2[1]
5817                                                     || idx == p1[4]) {
5818                                                         p[-3] =
5819                                                             (unsigned char)
5820                                                             pop_failure_jump;
5821                                                         DEBUG_PRINT1
5822                                                             ("  No match => pop_failure_jump.\n");
5823                                                 }
5824                                         }
5825                                 }
5826                         }
5827                         p -= 2; /* Point at relative address again.  */
5828                         if ((re_opcode_t) p[-1] != pop_failure_jump) {
5829                                 p[-1] = (unsigned char)jump;
5830                                 DEBUG_PRINT1("  Match => jump.\n");
5831                                 goto unconditional_jump;
5832                         }
5833                         /* Note fall through.  */
5834
5835                         /* The end of a simple repeat has a pop_failure_jump
5836                            back to its matching on_failure_jump, where the
5837                            latter will push a failure point.  The
5838                            pop_failure_jump takes off failure points put on by
5839                            this pop_failure_jump's matching on_failure_jump; we
5840                            got through the pattern to here from the matching
5841                            on_failure_jump, so didn't fail.  */
5842                 case pop_failure_jump: {
5843                         /* We need to pass separate storage for the
5844                            lowest and highest registers, even though we
5845                            don't care about the actual values.
5846                            Otherwise, we will restore only one register
5847                            from the stack, since lowest will == highest
5848                            in `pop_failure_point'.  */
5849                         int dummy_low_reg, dummy_high_reg;
5850                         const unsigned char *pdummy = NULL;
5851                         const unsigned char *sdummy = NULL;
5852
5853                         DEBUG_PRINT1("EXECUTING pop_failure_jump.\n");
5854                         POP_FAILURE_POINT(sdummy, pdummy,
5855                                           dummy_low_reg, dummy_high_reg,
5856                                           reg_dummy, reg_dummy,
5857                                           reg_info_dummy);
5858                 }
5859                         /* Note fall through.  */
5860
5861                         /* Unconditionally jump (without popping any failure
5862                            points).  */
5863                 case jump:
5864                 unconditional_jump:
5865                         /* Get the amount to jump. */
5866                         EXTRACT_NUMBER_AND_INCR(mcnt, p);
5867                         DEBUG_PRINT2("EXECUTING jump %d ", mcnt);
5868                         p += mcnt;      /* Do the jump.  */
5869                         DEBUG_PRINT2("(to 0x%lx).\n", (long)p);
5870                         break;
5871
5872                         /* We need this opcode so we can detect where alternatives end
5873                            in `group_match_null_string_p' et al.  */
5874                 case jump_past_alt:
5875                         DEBUG_PRINT1("EXECUTING jump_past_alt.\n");
5876                         goto unconditional_jump;
5877
5878                         /* Normally, the on_failure_jump pushes a failure point, which
5879                            then gets popped at pop_failure_jump.  We will end up at
5880                            pop_failure_jump, also, and with a pattern of, say, `a+', we
5881                            are skipping over the on_failure_jump, so we have to push
5882                            something meaningless for pop_failure_jump to pop.  */
5883                 case dummy_failure_jump:
5884                         DEBUG_PRINT1("EXECUTING dummy_failure_jump.\n");
5885                         /* It doesn't matter what we push for the string here.  What
5886                            the code at `fail' tests is the value for the pattern.  */
5887                         PUSH_FAILURE_POINT(NULL, NULL, -2);
5888                         goto unconditional_jump;
5889
5890                         /* At the end of an alternative, we need to push a dummy failure
5891                            point in case we are followed by a `pop_failure_jump', because
5892                            we don't want the failure point for the alternative to be
5893                            popped.  For example, matching `(a|ab)*' against `aab'
5894                            requires that we match the `ab' alternative.  */
5895                 case push_dummy_failure:
5896                         DEBUG_PRINT1("EXECUTING push_dummy_failure.\n");
5897                         /* See comments just above at `dummy_failure_jump' about the
5898                            two zeroes.  */
5899                         PUSH_FAILURE_POINT(NULL, NULL, -2);
5900                         break;
5901
5902                         /* Have to succeed matching what follows at least n times.
5903                            After that, handle like `on_failure_jump'.  */
5904                 case succeed_n:
5905                         EXTRACT_NUMBER(mcnt, p + 2);
5906                         DEBUG_PRINT2("EXECUTING succeed_n %d.\n", mcnt);
5907
5908                         assert(mcnt >= 0);
5909                         /* Originally, this is how many times we HAVE to succeed.  */
5910                         if (mcnt > 0) {
5911                                 mcnt--;
5912                                 p += 2;
5913                                 STORE_NUMBER_AND_INCR(p, mcnt);
5914                                 DEBUG_PRINT3("  Setting 0x%lx to %d.\n",
5915                                              (long)p, mcnt);
5916                         } else if (mcnt == 0) {
5917                                 DEBUG_PRINT2
5918                                     ("  Setting two bytes from 0x%lx to no_op.\n",
5919                                      (long)(p + 2));
5920                                 p[2] = (unsigned char)no_op;
5921                                 p[3] = (unsigned char)no_op;
5922                                 goto on_failure;
5923                         }
5924                         break;
5925
5926                 case jump_n:
5927                         EXTRACT_NUMBER(mcnt, p + 2);
5928                         DEBUG_PRINT2("EXECUTING jump_n %d.\n", mcnt);
5929
5930                         /* Originally, this is how many times we CAN jump.  */
5931                         if (mcnt) {
5932                                 mcnt--;
5933                                 STORE_NUMBER(p + 2, mcnt);
5934                                 goto unconditional_jump;
5935                         }
5936                         /* If don't have to jump any more, skip over the rest of command.  */
5937                         else
5938                                 p += 4;
5939                         break;
5940
5941                 case set_number_at:
5942                         {
5943                                 DEBUG_PRINT1("EXECUTING set_number_at.\n");
5944
5945                                 EXTRACT_NUMBER_AND_INCR(mcnt, p);
5946                                 p1 = p + mcnt;
5947                                 EXTRACT_NUMBER_AND_INCR(mcnt, p);
5948                                 DEBUG_PRINT3("  Setting 0x%lx to %d.\n",
5949                                              (long)p1, mcnt);
5950                                 STORE_NUMBER(p1, mcnt);
5951                                 break;
5952                         }
5953
5954                 case wordbound:
5955                         DEBUG_PRINT1("EXECUTING wordbound.\n");
5956                         should_succeed = 1;
5957                       matchwordbound:
5958                         {
5959                                 /* XEmacs change */
5960                                 /* Straightforward and (I hope) correct implementation.
5961                                    Probably should be optimized by arranging to compute
5962                                    pos only once. */
5963                                 /* emch1 is the character before d, syn1 is the syntax of
5964                                    emch1, emch2 is the character at d, and syn2 is the
5965                                    syntax of emch2. */
5966                                 Emchar emch1, emch2;
5967                                 /* GCC isn't smart enough to see these are initialized if used. */
5968                                 int syn1 = 0, syn2 = 0;
5969                                 re_char *d_before, *d_after;
5970                                 int result,
5971                                     at_beg = AT_STRINGS_BEG(d),
5972                                     at_end = AT_STRINGS_END(d);
5973 #ifdef emacs
5974                                 int xpos;
5975 #endif
5976
5977                                 if (at_beg && at_end) {
5978                                         result = 0;
5979                                 } else {
5980                                         if (!at_beg) {
5981                                                 d_before =
5982                                                     POS_BEFORE_GAP_UNSAFE(d);
5983                                                 DEC_CHARPTR(d_before);
5984                                                 emch1 =
5985                                                     charptr_emchar(d_before);
5986 #ifdef emacs
5987                                                 xpos =
5988                                                     SYNTAX_CACHE_BYTE_TO_CHAR
5989                                                     (PTR_TO_OFFSET(d)) - 1;
5990                                                 UPDATE_SYNTAX_CACHE(xpos);
5991 #endif
5992                                                 syn1 = SYNTAX_FROM_CACHE
5993                                                     (XCHAR_TABLE
5994                                                      (regex_emacs_buffer->
5995                                                       mirror_syntax_table),
5996                                                      emch1);
5997                                         }
5998                                         if (!at_end) {
5999                                                 d_after =
6000                                                     POS_AFTER_GAP_UNSAFE(d);
6001                                                 emch2 = charptr_emchar(d_after);
6002 #ifdef emacs
6003                                                 xpos =
6004                                                     SYNTAX_CACHE_BYTE_TO_CHAR
6005                                                     (PTR_TO_OFFSET(d));
6006                                                 UPDATE_SYNTAX_CACHE_FORWARD(xpos
6007                                                                             +
6008                                                                             1);
6009 #endif
6010                                                 syn2 = SYNTAX_FROM_CACHE
6011                                                     (XCHAR_TABLE
6012                                                      (regex_emacs_buffer->
6013                                                       mirror_syntax_table),
6014                                                      emch2);
6015                                         }
6016
6017                                         if (at_beg)
6018                                                 result = (syn2 == Sword);
6019                                         else if (at_end)
6020                                                 result = (syn1 == Sword);
6021                                         else
6022                                                 result =
6023                                                     ((syn1 == Sword) !=
6024                                                      (syn2 == Sword));
6025                                 }
6026
6027                                 if (result == should_succeed)
6028                                         break;
6029                                 goto fail;
6030                         }
6031
6032                 case notwordbound:
6033                         DEBUG_PRINT1("EXECUTING notwordbound.\n");
6034                         should_succeed = 0;
6035                         goto matchwordbound;
6036
6037                 case wordbeg:
6038                         DEBUG_PRINT1("EXECUTING wordbeg.\n");
6039                         if (AT_STRINGS_END(d))
6040                                 goto fail;
6041                         {
6042                                 /* XEmacs: this originally read:
6043
6044                                    if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
6045                                    break;
6046
6047                                  */
6048                                 re_char *dtmp = POS_AFTER_GAP_UNSAFE(d);
6049                                 Emchar emch = charptr_emchar(dtmp);
6050 #ifdef emacs
6051                                 int charpos =
6052                                     SYNTAX_CACHE_BYTE_TO_CHAR(PTR_TO_OFFSET(d));
6053                                 UPDATE_SYNTAX_CACHE(charpos);
6054 #endif
6055                                 if (SYNTAX_FROM_CACHE
6056                                     (XCHAR_TABLE
6057                                      (regex_emacs_buffer->mirror_syntax_table),
6058                                      emch) != Sword)
6059                                         goto fail;
6060                                 if (AT_STRINGS_BEG(d))
6061                                         break;
6062                                 dtmp = POS_BEFORE_GAP_UNSAFE(d);
6063                                 DEC_CHARPTR(dtmp);
6064                                 emch = charptr_emchar(dtmp);
6065 #ifdef emacs
6066                                 UPDATE_SYNTAX_CACHE_BACKWARD(charpos - 1);
6067 #endif
6068                                 if (SYNTAX_FROM_CACHE
6069                                     (XCHAR_TABLE
6070                                      (regex_emacs_buffer->mirror_syntax_table),
6071                                      emch) != Sword)
6072                                         break;
6073                                 goto fail;
6074                         }
6075
6076                 case wordend:
6077                         DEBUG_PRINT1("EXECUTING wordend.\n");
6078                         if (AT_STRINGS_BEG(d))
6079                                 goto fail;
6080                         {
6081                                 /* XEmacs: this originally read:
6082
6083                                    if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
6084                                    && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
6085                                    break;
6086
6087                                    The or condition is incorrect (reversed).
6088                                  */
6089                                 re_char *dtmp;
6090                                 Emchar emch;
6091 #ifdef emacs
6092                                 int charpos =
6093                                     SYNTAX_CACHE_BYTE_TO_CHAR(PTR_TO_OFFSET(d))
6094                                     - 1;
6095                                 UPDATE_SYNTAX_CACHE(charpos);
6096 #endif
6097                                 dtmp = POS_BEFORE_GAP_UNSAFE(d);
6098                                 DEC_CHARPTR(dtmp);
6099                                 emch = charptr_emchar(dtmp);
6100                                 if (SYNTAX_FROM_CACHE
6101                                     (XCHAR_TABLE
6102                                      (regex_emacs_buffer->mirror_syntax_table),
6103                                      emch) != Sword)
6104                                         goto fail;
6105                                 if (AT_STRINGS_END(d))
6106                                         break;
6107                                 dtmp = POS_AFTER_GAP_UNSAFE(d);
6108                                 emch = charptr_emchar(dtmp);
6109 #ifdef emacs
6110                                 UPDATE_SYNTAX_CACHE_FORWARD(charpos + 1);
6111 #endif
6112                                 if (SYNTAX_FROM_CACHE
6113                                     (XCHAR_TABLE
6114                                      (regex_emacs_buffer->mirror_syntax_table),
6115                                      emch) != Sword)
6116                                         break;
6117                                 goto fail;
6118                         }
6119
6120 #ifdef emacs
6121                 case before_dot:
6122                         DEBUG_PRINT1("EXECUTING before_dot.\n");
6123                         if (!
6124                             (NILP(regex_match_object)
6125                              || BUFFERP(regex_match_object))
6126                             || (BUF_PTR_BYTE_POS(regex_emacs_buffer, d)
6127                                 >= BUF_PT(regex_emacs_buffer)))
6128                                 goto fail;
6129                         break;
6130
6131                 case at_dot:
6132                         DEBUG_PRINT1("EXECUTING at_dot.\n");
6133                         if (!
6134                             (NILP(regex_match_object)
6135                              || BUFFERP(regex_match_object))
6136                             || (BUF_PTR_BYTE_POS(regex_emacs_buffer, d)
6137                                 != BUF_PT(regex_emacs_buffer)))
6138                                 goto fail;
6139                         break;
6140
6141                 case after_dot:
6142                         DEBUG_PRINT1("EXECUTING after_dot.\n");
6143                         if (!
6144                             (NILP(regex_match_object)
6145                              || BUFFERP(regex_match_object))
6146                             || (BUF_PTR_BYTE_POS(regex_emacs_buffer, d)
6147                                 <= BUF_PT(regex_emacs_buffer)))
6148                                 goto fail;
6149                         break;
6150 #if 0                           /* not emacs19 */
6151                 case at_dot:
6152                         DEBUG_PRINT1("EXECUTING at_dot.\n");
6153                         if (BUF_PTR_BYTE_POS
6154                             (regex_emacs_buffer,
6155                              (unsigned char *)d) + 1 !=
6156                             BUF_PT(regex_emacs_buffer))
6157                                 goto fail;
6158                         break;
6159 #endif                          /* not emacs19 */
6160
6161                 case syntaxspec:
6162                         DEBUG_PRINT2("EXECUTING syntaxspec %d.\n", mcnt);
6163                         mcnt = *p++;
6164                         goto matchsyntax;
6165
6166                 case wordchar:
6167                         DEBUG_PRINT1("EXECUTING Emacs wordchar.\n");
6168                         mcnt = (int)Sword;
6169                       matchsyntax:
6170                         should_succeed = 1;
6171                       matchornotsyntax:
6172                         {
6173                                 int matches;
6174                                 Emchar emch;
6175
6176                                 REGEX_PREFETCH();
6177 #ifdef emacs
6178                                 {
6179                                         int charpos =
6180                                             SYNTAX_CACHE_BYTE_TO_CHAR
6181                                             (PTR_TO_OFFSET(d));
6182                                         UPDATE_SYNTAX_CACHE(charpos);
6183                                 }
6184 #endif
6185
6186                                 emch = charptr_emchar((const Bufbyte *)d);
6187                                 matches =
6188                                     (SYNTAX_FROM_CACHE
6189                                      (XCHAR_TABLE
6190                                       (regex_emacs_buffer->mirror_syntax_table),
6191                                       emch) == (enum syntaxcode)mcnt);
6192                                 INC_CHARPTR(d);
6193                                 if (matches != should_succeed)
6194                                         goto fail;
6195                                 SET_REGS_MATCHED();
6196                         }
6197                         break;
6198
6199                 case notsyntaxspec:
6200                         DEBUG_PRINT2("EXECUTING notsyntaxspec %d.\n", mcnt);
6201                         mcnt = *p++;
6202                         goto matchnotsyntax;
6203
6204                 case notwordchar:
6205                         DEBUG_PRINT1("EXECUTING Emacs notwordchar.\n");
6206                         mcnt = (int)Sword;
6207                       matchnotsyntax:
6208                         should_succeed = 0;
6209                         goto matchornotsyntax;
6210
6211 #ifdef MULE
6212 /* 97/2/17 jhod Mule category code patch */
6213                 case categoryspec:
6214                         should_succeed = 1;
6215                       matchornotcategory:
6216                         {
6217                                 Emchar emch;
6218
6219                                 mcnt = *p++;
6220                                 REGEX_PREFETCH();
6221                                 emch = charptr_emchar((const Bufbyte *)d);
6222                                 INC_CHARPTR(d);
6223                                 if (check_category_char
6224                                     (emch, regex_emacs_buffer->category_table,
6225                                      mcnt, should_succeed))
6226                                         goto fail;
6227                                 SET_REGS_MATCHED();
6228                         }
6229                         break;
6230
6231                 case notcategoryspec:
6232                         should_succeed = 0;
6233                         goto matchornotcategory;
6234 /* end of category patch */
6235 #endif                          /* MULE */
6236 #else                           /* not emacs */
6237                 case wordchar:
6238                         DEBUG_PRINT1("EXECUTING non-Emacs wordchar.\n");
6239                         REGEX_PREFETCH();
6240                         if (!WORDCHAR_P_UNSAFE((int)(*d)))
6241                                 goto fail;
6242                         SET_REGS_MATCHED();
6243                         d++;
6244                         break;
6245
6246                 case notwordchar:
6247                         DEBUG_PRINT1("EXECUTING non-Emacs notwordchar.\n");
6248                         REGEX_PREFETCH();
6249                         if (!WORDCHAR_P_UNSAFE((int)(*d)))
6250                                 goto fail;
6251                         SET_REGS_MATCHED();
6252                         d++;
6253                         break;
6254 #endif                          /* emacs */
6255
6256                 default:
6257                         abort();
6258                 }
6259                 /* Successfully executed one pattern command; keep going.  */
6260                 continue;
6261
6262                 /* We goto here if a matching operation fails. */
6263         fail:
6264                 if (!FAIL_STACK_EMPTY()) {
6265                         /* A restart point is known.  Restore to that state.  */
6266                         DEBUG_PRINT1("\nFAIL:\n");
6267                         POP_FAILURE_POINT(d, p, lowest_active_reg,
6268                                           highest_active_reg, regstart, regend,
6269                                           reg_info);
6270
6271                         /* If this failure point is a dummy, try the next one.  */
6272                         if (!p)
6273                                 goto fail;
6274
6275                         /* If we failed to the end of the pattern, don't examine
6276                            *p.  */
6277                         assert(p <= pend);
6278                         if (p < pend) {
6279                                 re_bool is_a_jump_n = false;
6280
6281                                 /* If failed to a backwards jump that's part of
6282                                    a repetition loop, need to pop this failure
6283                                    point and use the next one.  */
6284                                 switch ((unsigned int)(re_opcode_t)*p) {
6285                                 case jump_n:
6286                                         is_a_jump_n = true;
6287                                 case maybe_pop_jump:
6288                                 case pop_failure_jump:
6289                                 case jump:
6290                                         p1 = p + 1;
6291                                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6292                                         p1 += mcnt;
6293
6294                                         if ((is_a_jump_n
6295                                              && (re_opcode_t) * p1 == succeed_n)
6296                                             || (!is_a_jump_n
6297                                                 && (re_opcode_t) * p1 ==
6298                                                 on_failure_jump))
6299                                                 goto fail;
6300                                         break;
6301                                 default:
6302                                         /* do nothing */ ;
6303                                 }
6304                         }
6305
6306                         if (d >= string1 && d <= end1)
6307                                 dend = end_match_1;
6308                 } else
6309                         break;  /* Matching at this starting point really fails.  */
6310         }                       /* for (;;) */
6311
6312         if (best_regs_set)
6313                 goto restore_best_regs;
6314
6315         FREE_VARIABLES();
6316
6317         return -1;              /* Failure to match.  */
6318 }                               /* re_match_2 */
6319
6320 \f
6321 /* Subroutine definitions for re_match_2.  */
6322
6323 /* We are passed P pointing to a register number after a start_memory.
6324
6325    Return true if the pattern up to the corresponding stop_memory can
6326    match the empty string, and false otherwise.
6327
6328    If we find the matching stop_memory, sets P to point to one past its number.
6329    Otherwise, sets P to an undefined byte less than or equal to END.
6330
6331    We don't handle duplicates properly (yet).  */
6332
6333 static re_bool
6334 group_match_null_string_p(unsigned char **p, unsigned char *end,
6335                           register_info_type * register_info)
6336 {
6337         int mcnt;
6338         /* Point to after the args to the start_memory.  */
6339         unsigned char *p1 = *p + 2;
6340
6341         while (p1 < end) {
6342                 /* Skip over opcodes that can match nothing, and return true or
6343                    false, as appropriate, when we get to one that can't, or to the
6344                    matching stop_memory.  */
6345
6346                 switch ((unsigned int)(re_opcode_t)*p1) {
6347                         /* Could be either a loop or a series of alternatives.  */
6348                 case on_failure_jump:
6349                         p1++;
6350                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6351
6352                         /* If the next operation is not a jump backwards in the
6353                            pattern.  */
6354
6355                         if (mcnt >= 0) {
6356                                 /* Go through the on_failure_jumps of the alternatives,
6357                                    seeing if any of the alternatives cannot match nothing.
6358                                    The last alternative starts with only a jump,
6359                                    whereas the rest start with on_failure_jump and end
6360                                    with a jump, e.g., here is the pattern for `a|b|c':
6361
6362                                    /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
6363                                    /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
6364                                    /exactn/1/c
6365
6366                                    So, we have to first go through the first (n-1)
6367                                    alternatives and then deal with the last one separately.  */
6368
6369                                 /* Deal with the first (n-1) alternatives, which start
6370                                    with an on_failure_jump (see above) that jumps to right
6371                                    past a jump_past_alt.  */
6372
6373                                 while ((re_opcode_t) p1[mcnt - 3] ==
6374                                        jump_past_alt) {
6375                                         /* `mcnt' holds how many bytes long the alternative
6376                                            is, including the ending `jump_past_alt' and
6377                                            its number.  */
6378
6379                                         if (!alt_match_null_string_p
6380                                             (p1, p1 + mcnt - 3, register_info))
6381                                                 return false;
6382
6383                                         /* Move to right after this alternative, including the
6384                                            jump_past_alt.  */
6385                                         p1 += mcnt;
6386
6387                                         /* Break if it's the beginning of an n-th alternative
6388                                            that doesn't begin with an on_failure_jump.  */
6389                                         if ((re_opcode_t) * p1 !=
6390                                             on_failure_jump)
6391                                                 break;
6392
6393                                         /* Still have to check that it's not an n-th
6394                                            alternative that starts with an on_failure_jump.  */
6395                                         p1++;
6396                                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6397                                         if ((re_opcode_t) p1[mcnt - 3] !=
6398                                             jump_past_alt) {
6399                                                 /* Get to the beginning of the n-th alternative.  */
6400                                                 p1 -= 3;
6401                                                 break;
6402                                         }
6403                                 }
6404
6405                                 /* Deal with the last alternative: go back and get number
6406                                    of the `jump_past_alt' just before it.  `mcnt' contains
6407                                    the length of the alternative.  */
6408                                 EXTRACT_NUMBER(mcnt, p1 - 2);
6409
6410                                 if (!alt_match_null_string_p
6411                                     (p1, p1 + mcnt, register_info))
6412                                         return false;
6413
6414                                 p1 += mcnt;     /* Get past the n-th alternative.  */
6415                         }       /* if mcnt > 0 */
6416                         break;
6417
6418                 case stop_memory:
6419                         assert(p1[1] == **p);
6420                         *p = p1 + 2;
6421                         return true;
6422
6423                 default:
6424                         if (!common_op_match_null_string_p(&p1, end, register_info))
6425                                 return false;
6426                 }
6427         }                       /* while p1 < end */
6428
6429         return false;
6430 }                               /* group_match_null_string_p */
6431
6432 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
6433    It expects P to be the first byte of a single alternative and END one
6434    byte past the last. The alternative can contain groups.  */
6435
6436 static re_bool
6437 alt_match_null_string_p(unsigned char *p, unsigned char *end,
6438                         register_info_type * register_info)
6439 {
6440         int mcnt;
6441         unsigned char *p1 = p;
6442
6443         while (p1 < end) {
6444                 /* Skip over opcodes that can match nothing, and break when we get
6445                    to one that can't.  */
6446
6447                 switch ((unsigned int)(re_opcode_t)*p1) {
6448                         /* It's a loop.  */
6449                 case on_failure_jump:
6450                         p1++;
6451                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6452                         p1 += mcnt;
6453                         break;
6454
6455                 default:
6456                         if (!common_op_match_null_string_p(&p1, end, register_info))
6457                                 return false;
6458                 }
6459         }                       /* while p1 < end */
6460
6461         return true;
6462 }                               /* alt_match_null_string_p */
6463
6464 /* Deals with the ops common to group_match_null_string_p and
6465    alt_match_null_string_p.
6466
6467    Sets P to one after the op and its arguments, if any.  */
6468
6469 static re_bool
6470 common_op_match_null_string_p(unsigned char **p, unsigned char *end,
6471                               register_info_type * register_info)
6472 {
6473         int mcnt;
6474         re_bool ret;
6475         int reg_no;
6476         unsigned char *p1 = *p;
6477
6478         switch ((unsigned int)(re_opcode_t)*p1++) {
6479         case no_op:
6480         case begline:
6481         case endline:
6482         case begbuf:
6483         case endbuf:
6484         case wordbeg:
6485         case wordend:
6486         case wordbound:
6487         case notwordbound:
6488 #ifdef emacs
6489         case before_dot:
6490         case at_dot:
6491         case after_dot:
6492 #endif
6493                 break;
6494
6495         case start_memory:
6496                 reg_no = *p1;
6497                 assert(reg_no > 0 && reg_no <= MAX_REGNUM);
6498                 ret = group_match_null_string_p(&p1, end, register_info);
6499
6500                 /* Have to set this here in case we're checking a group which
6501                    contains a group and a back reference to it.  */
6502
6503                 if (REG_MATCH_NULL_STRING_P(register_info[reg_no]) ==
6504                     MATCH_NULL_UNSET_VALUE)
6505                         REG_MATCH_NULL_STRING_P(register_info[reg_no]) = ret;
6506
6507                 if (!ret)
6508                         return false;
6509                 break;
6510
6511                 /* If this is an optimized succeed_n for zero times, make the jump.  */
6512         case jump:
6513                 EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6514                 if (mcnt >= 0)
6515                         p1 += mcnt;
6516                 else
6517                         return false;
6518                 break;
6519
6520         case succeed_n:
6521                 /* Get to the number of times to succeed.  */
6522                 p1 += 2;
6523                 EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6524
6525                 if (mcnt == 0) {
6526                         p1 -= 4;
6527                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6528                         p1 += mcnt;
6529                 } else
6530                         return false;
6531                 break;
6532
6533         case duplicate:
6534                 if (!REG_MATCH_NULL_STRING_P(register_info[*p1]))
6535                         return false;
6536                 break;
6537
6538         case set_number_at:
6539                 p1 += 4;
6540                 break;
6541
6542         default:
6543                 /* All other opcodes mean we cannot match the empty string.  */
6544                 return false;
6545         }
6546
6547         *p = p1;
6548         return true;
6549 }                               /* common_op_match_null_string_p */
6550
6551 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
6552    bytes; nonzero otherwise.  */
6553
6554 static int
6555 bcmp_translate(re_char *s1, re_char *s2,
6556                REGISTER int len, RE_TRANSLATE_TYPE translate)
6557 {
6558         REGISTER const unsigned char *p1 = s1, *p2 = s2;
6559 #ifdef MULE
6560         const unsigned char *p1_end = s1 + len;
6561         const unsigned char *p2_end = s2 + len;
6562
6563         while (p1 != p1_end && p2 != p2_end) {
6564                 Emchar p1_ch, p2_ch;
6565
6566                 p1_ch = charptr_emchar(p1);
6567                 p2_ch = charptr_emchar(p2);
6568
6569                 if (RE_TRANSLATE(p1_ch)
6570                     != RE_TRANSLATE(p2_ch))
6571                         return 1;
6572                 INC_CHARPTR(p1);
6573                 INC_CHARPTR(p2);
6574         }
6575 #else                           /* not MULE */
6576         while (len) {
6577                 if (RE_TRANSLATE(*p1++) != RE_TRANSLATE(*p2++))
6578                         return 1;
6579                 len--;
6580         }
6581 #endif                          /* MULE */
6582         return 0;
6583 }
6584 \f
6585 /* Entry points for GNU code.  */
6586
6587 /* re_compile_pattern is the GNU regular expression compiler: it
6588    compiles PATTERN (of length SIZE) and puts the result in BUFP.
6589    Returns 0 if the pattern was valid, otherwise an error string.
6590
6591    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
6592    are set in BUFP on entry.
6593
6594    We call regex_compile to do the actual compilation.  */
6595
6596 const char *re_compile_pattern(const char *pattern, int length,
6597                                struct re_pattern_buffer *bufp)
6598 {
6599         reg_errcode_t ret;
6600
6601         /* GNU code is written to assume at least RE_NREGS registers will be set
6602            (and at least one extra will be -1).  */
6603         bufp->regs_allocated = REGS_UNALLOCATED;
6604
6605         /* And GNU code determines whether or not to get register information
6606            by passing null for the REGS argument to re_match, etc., not by
6607            setting no_sub.  */
6608         bufp->no_sub = 0;
6609
6610         /* Match anchors at newline.  */
6611         bufp->newline_anchor = 1;
6612
6613         ret = regex_compile((const unsigned char*)pattern,
6614                             length, re_syntax_options, bufp);
6615
6616         if (!ret)
6617                 return NULL;
6618         return gettext(re_error_msgid[(int)ret]);
6619 }
6620 \f
6621 /* Entry points compatible with 4.2 BSD regex library.  We don't define
6622    them unless specifically requested.  */
6623
6624 #ifdef _REGEX_RE_COMP
6625
6626 /* BSD has one and only one pattern buffer.  */
6627 static struct re_pattern_buffer re_comp_buf;
6628
6629 char *re_comp(const char *s)
6630 {
6631         reg_errcode_t ret;
6632
6633         if (!s) {
6634                 if (!re_comp_buf.buffer)
6635                         return gettext("No previous regular expression");
6636                 return 0;
6637         }
6638
6639         if (!re_comp_buf.buffer) {
6640                 re_comp_buf.buffer = (unsigned char *)xmalloc_atomic(200);
6641                 if (re_comp_buf.buffer == NULL)
6642                         return gettext(re_error_msgid[(int)REG_ESPACE]);
6643                 re_comp_buf.allocated = 200;
6644
6645                 re_comp_buf.fastmap = (char *)xmalloc_atomic(1 << BYTEWIDTH);
6646                 if (re_comp_buf.fastmap == NULL)
6647                         return gettext(re_error_msgid[(int)REG_ESPACE]);
6648         }
6649
6650         /* Since `re_exec' always passes NULL for the `regs' argument, we
6651            don't need to initialize the pattern buffer fields which affect it.  */
6652
6653         /* Match anchors at newlines.  */
6654         re_comp_buf.newline_anchor = 1;
6655
6656         ret =
6657             regex_compile((unsigned char *)s, strlen(s), re_syntax_options,
6658                           &re_comp_buf);
6659
6660         if (!ret)
6661                 return NULL;
6662
6663         /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
6664         return (char *)gettext(re_error_msgid[(int)ret]);
6665 }
6666
6667 int re_exec(const char *s)
6668 {
6669         const int len = strlen(s);
6670         return
6671             0 <= re_search(&re_comp_buf, s, len, 0, len,
6672                            (struct re_registers *)0);
6673 }
6674 #endif                          /* _REGEX_RE_COMP */
6675 \f
6676 /* POSIX.2 functions.  Don't define these for Emacs.  */
6677
6678 #ifndef emacs
6679
6680 /* regcomp takes a regular expression as a string and compiles it.
6681
6682    PREG is a regex_t *.  We do not expect any fields to be initialized,
6683    since POSIX says we shouldn't.  Thus, we set
6684
6685      `buffer' to the compiled pattern;
6686      `used' to the length of the compiled pattern;
6687      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
6688        REG_EXTENDED bit in CFLAGS is set; otherwise, to
6689        RE_SYNTAX_POSIX_BASIC;
6690      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
6691      `fastmap' and `fastmap_accurate' to zero;
6692      `re_nsub' to the number of subexpressions in PATTERN.
6693      (non-shy of course.  POSIX probably doesn't know about
6694      shy ones, and in any case they should be invisible.)
6695
6696    PATTERN is the address of the pattern string.
6697
6698    CFLAGS is a series of bits which affect compilation.
6699
6700      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
6701      use POSIX basic syntax.
6702
6703      If REG_NEWLINE is set, then . and [^...] don't match newline.
6704      Also, regexec will try a match beginning after every newline.
6705
6706      If REG_ICASE is set, then we considers upper- and lowercase
6707      versions of letters to be equivalent when matching.
6708
6709      If REG_NOSUB is set, then when PREG is passed to regexec, that
6710      routine will report only success or failure, and nothing about the
6711      registers.
6712
6713    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
6714    the return codes and their meanings.)  */
6715
6716 int regcomp(regex_t * preg, const char *pattern, int cflags)
6717 {
6718         reg_errcode_t ret;
6719         unsigned syntax
6720             = (cflags & REG_EXTENDED) ?
6721             RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
6722
6723         /* regex_compile will allocate the space for the compiled pattern.  */
6724         preg->buffer = 0;
6725         preg->allocated = 0;
6726         preg->used = 0;
6727
6728         /* Don't bother to use a fastmap when searching.  This simplifies the
6729            REG_NEWLINE case: if we used a fastmap, we'd have to put all the
6730            characters after newlines into the fastmap.  This way, we just try
6731            every character.  */
6732         preg->fastmap = 0;
6733
6734         if (cflags & REG_ICASE) {
6735                 int i;
6736
6737                 preg->translate = (char *)xmalloc_atomic(CHAR_SET_SIZE);
6738                 if (preg->translate == NULL)
6739                         return (int)REG_ESPACE;
6740
6741                 /* Map uppercase characters to corresponding lowercase ones.  */
6742                 for (i = 0; i < CHAR_SET_SIZE; i++)
6743                         preg->translate[i] = ISUPPER(i) ? tolower(i) : i;
6744         } else
6745                 preg->translate = NULL;
6746
6747         /* If REG_NEWLINE is set, newlines are treated differently.  */
6748         if (cflags & REG_NEWLINE) {
6749                 /* REG_NEWLINE implies neither . nor [^...] match newline.  */
6750                 syntax &= ~RE_DOT_NEWLINE;
6751                 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
6752                 /* It also changes the matching behavior.  */
6753                 preg->newline_anchor = 1;
6754         } else {
6755                 preg->newline_anchor = 0;
6756         }
6757         preg->no_sub = !!(cflags & REG_NOSUB);
6758
6759         /* POSIX says a null character in the pattern terminates it, so we
6760            can use strlen here in compiling the pattern.  */
6761         ret = regex_compile((const unsigned char*)pattern,
6762                             strlen(pattern), syntax, preg);
6763
6764         /* POSIX doesn't distinguish between an unmatched open-group and an
6765            unmatched close-group: both are REG_EPAREN.  */
6766         if (ret == REG_ERPAREN)
6767                 ret = REG_EPAREN;
6768
6769         return (int)ret;
6770 }
6771
6772 /* regexec searches for a given pattern, specified by PREG, in the
6773    string STRING.
6774
6775    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
6776    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
6777    least NMATCH elements, and we set them to the offsets of the
6778    corresponding matched substrings.
6779
6780    EFLAGS specifies `execution flags' which affect matching: if
6781    REG_NOTBOL is set, then ^ does not match at the beginning of the
6782    string; if REG_NOTEOL is set, then $ does not match at the end.
6783
6784    We return 0 if we find a match and REG_NOMATCH if not.  */
6785
6786 int
6787 regexec(const regex_t * preg, const char *string, Element_count nmatch,
6788         regmatch_t pmatch[], int eflags)
6789 {
6790         int ret;
6791         struct re_registers regs;
6792         regex_t private_preg;
6793         int len = strlen(string);
6794         re_bool want_reg_info = !preg->no_sub && nmatch > 0;
6795
6796         private_preg = *preg;
6797
6798         private_preg.not_bol = !!(eflags & REG_NOTBOL);
6799         private_preg.not_eol = !!(eflags & REG_NOTEOL);
6800
6801         /* The user has told us exactly how many registers to return
6802            information about, via `nmatch'.  We have to pass that on to the
6803            matching routines.  */
6804         private_preg.regs_allocated = REGS_FIXED;
6805
6806         if (want_reg_info) {
6807                 regs.num_regs = nmatch;
6808                 regs.start = TALLOC(nmatch, regoff_t);
6809                 regs.end = TALLOC(nmatch, regoff_t);
6810                 if (regs.start == NULL || regs.end == NULL)
6811                         return (int)REG_NOMATCH;
6812         }
6813
6814         /* Perform the searching operation.  */
6815         ret = re_search(&private_preg, string, len,
6816                         /* start: */ 0, /* range: */ len,
6817                         want_reg_info ? &regs : (struct re_registers *)0);
6818
6819         /* Copy the register information to the POSIX structure.  */
6820         if (want_reg_info) {
6821                 if (ret >= 0) {
6822                         Element_count r;
6823
6824                         for (r = 0; r < nmatch; r++) {
6825                                 pmatch[r].rm_so = regs.start[r];
6826                                 pmatch[r].rm_eo = regs.end[r];
6827                         }
6828                 }
6829
6830                 /* If we needed the temporary register info, free the space now.  */
6831                 xfree(regs.start);
6832                 xfree(regs.end);
6833         }
6834
6835         /* We want zero return to mean success, unlike `re_search'.  */
6836         return ret >= 0 ? (int)REG_NOERROR : (int)REG_NOMATCH;
6837 }
6838
6839 /* Returns a message corresponding to an error code, ERRCODE, returned
6840    from either regcomp or regexec.   We don't use PREG here.  */
6841
6842 Memory_count
6843 regerror(int errcode, const regex_t * preg, char *errbuf,
6844          Memory_count errbuf_size)
6845 {
6846         const char *msg;
6847         Memory_count msg_size;
6848
6849         if (errcode < 0 || (size_t) errcode >= (sizeof(re_error_msgid)
6850                                                 / sizeof(re_error_msgid[0])))
6851                 /* Only error codes returned by the rest of the code should be passed
6852                    to this routine.  If we are given anything else, or if other regex
6853                    code generates an invalid error code, then the program has a bug.
6854                    Dump core so we can fix it.  */
6855                 abort();
6856
6857         msg = gettext(re_error_msgid[errcode]);
6858
6859         msg_size = strlen(msg) + 1;     /* Includes the null.  */
6860
6861         if (errbuf_size != 0) {
6862                 strncpy(errbuf, msg, errbuf_size - 1);
6863                 errbuf[errbuf_size-1]='\0';
6864         }
6865
6866         return msg_size;
6867 }
6868
6869 /* Free dynamically allocated space used by PREG.  */
6870
6871 void regfree(regex_t * preg)
6872 {
6873         if (preg->buffer != NULL) {
6874                 xfree(preg->buffer);
6875         }
6876         preg->buffer = NULL;
6877
6878         preg->allocated = 0;
6879         preg->used = 0;
6880
6881         if (preg->fastmap != NULL) {
6882                 xfree(preg->fastmap);
6883         }
6884         preg->fastmap = NULL;
6885         preg->fastmap_accurate = 0;
6886
6887         if (preg->translate != NULL) {
6888                 xfree(preg->translate);
6889         }
6890         preg->translate = NULL;
6891 }
6892
6893 #endif                          /* not emacs  */
6894 \f
6895 /*
6896 Local variables:
6897 make-backup-files: t
6898 version-control: t
6899 trim-versions-without-asking: nil
6900 End:
6901 */