Compiler & warning related updates/fixes from Nelson
[sxemacs] / src / regex.c
1 /* Extended regular expression matching and search library,
2    version 0.12, extended for XEmacs.
3    (Implements POSIX draft P10003.2/D11.2, except for
4    internationalization features.)
5
6    Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
7    Copyright (C) 1995 Sun Microsystems, Inc.
8    Copyright (C) 1995 Ben Wing.
9
10 This file is part of SXEmacs
11
12 SXEmacs is free software: you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation, either version 3 of the License, or
15 (at your option) any later version.
16
17 SXEmacs is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program.  If not, see <http://www.gnu.org/licenses/>. */
24
25
26 /* Synched up with: FSF 19.29. */
27
28 /* Changes made for XEmacs:
29
30    (1) the REGEX_BEGLINE_CHECK code from the XEmacs v18 regex routines
31        was added.  This causes a huge speedup in font-locking.
32    (2) Rel-alloc is disabled when the MMAP version of rel-alloc is
33        being used, because it's too slow -- all those calls to mmap()
34        add humongous overhead.
35    (3) Lots and lots of changes for Mule.  They are bracketed by
36        `#ifdef MULE' or with comments that have `XEmacs' in them.
37  */
38
39 #ifdef HAVE_CONFIG_H
40 #include <config.h>
41 #endif
42
43 #ifndef REGISTER                /* Rigidly enforced as of 20.3 */
44 #define REGISTER
45 #endif
46
47 #ifndef _GNU_SOURCE
48 #define _GNU_SOURCE 1
49 #endif
50
51 #ifdef emacs
52 /* Converts the pointer to the char to BEG-based offset from the start.  */
53 #define PTR_TO_OFFSET(d) (MATCHING_IN_FIRST_STRING                      \
54                           ? (d) - string1 : (d) - (string2 - size1))
55 #else
56 #define PTR_TO_OFFSET(d) 0
57 #endif
58
59 /* We assume non-Mule if emacs isn't defined. */
60 #ifndef emacs
61 #undef MULE
62 #endif
63
64 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
65 #include <sys/types.h>
66
67 /* This is for other GNU distributions with internationalized messages.  */
68 #if defined (I18N3) && (defined (HAVE_LIBINTL_H) || defined (_LIBC))
69 # include <libintl.h>
70 #else
71 # define gettext(msgid) (msgid)
72 #endif
73
74 /* XEmacs: define this to add in a speedup for patterns anchored at
75    the beginning of a line.  Keep the ifdefs so that it's easier to
76    tell where/why this code has diverged from v19. */
77 #define REGEX_BEGLINE_CHECK
78
79 /* XEmacs: the current mmap-based ralloc handles small blocks very
80    poorly, so we disable it here. */
81
82 #if (defined (REL_ALLOC) && defined (HAVE_MMAP)) || defined(DOUG_LEA_MALLOC)
83 # undef REL_ALLOC
84 #endif
85
86 #include "sxe-utils.h"
87
88 /* The `emacs' switch turns on certain matching commands
89    that make sense only in Emacs. */
90 #ifdef emacs
91
92 #include "lisp.h"
93 #include "buffer.h"
94 #include "syntax.h"
95
96 #ifdef MULE
97
98 Lisp_Object Vthe_lisp_rangetab;
99
100 void complex_vars_of_regex(void)
101 {
102         Vthe_lisp_rangetab = Fmake_range_table();
103         staticpro(&Vthe_lisp_rangetab);
104 }
105
106 #else  /* not MULE */
107
108 void complex_vars_of_regex(void)
109 {
110 }
111
112 #endif  /* MULE */
113
114 #define RE_TRANSLATE(ch) TRT_TABLE_OF (translate, (Emchar) ch)
115 #define TRANSLATE_P(tr) (!NILP (tr) && CHAR_TABLEP(tr))
116
117 /* just massage that xfree thing */
118 #undef xfree
119 #if defined HAVE_BDWGC && defined EF_USE_BDWGC
120 # define xfree          zfree
121 #else  /* !BDWGC */
122 # define xfree          yfree
123 #endif  /* BDWGC */
124
125 #else  /* !emacs */
126
127 /* If we are not linking with Emacs proper,
128    we can't use the relocating allocator
129    even if config.h says that we can.  */
130 #undef REL_ALLOC
131
132 #if defined (STDC_HEADERS) || defined (_LIBC)
133 # include <stdlib.h>
134 # include <stdbool.h>
135 #else
136 char *malloc();
137 char *realloc();
138 #endif
139
140 /* Types normally included via lisp.h */
141 #include <stddef.h>             /* for ptrdiff_t */
142
143 #ifdef REGEX_MALLOC
144 #ifndef DECLARE_NOTHING
145 #define DECLARE_NOTHING struct nosuchstruct
146 #endif
147 #endif
148
149 typedef int Emchar;
150
151 #define charptr_emchar(str)             ((Emchar) (str)[0])
152
153 #define INC_CHARPTR(p) ((p)++)
154 #define DEC_CHARPTR(p) ((p)--)
155
156 #include <string.h>
157
158 /* Define the syntax stuff for \<, \>, etc.  */
159
160 /* This must be nonzero for the wordchar and notwordchar pattern
161    commands in re_match_2.  */
162 #ifndef Sword
163 #define Sword 1
164 #endif
165
166 #ifdef SYNTAX_TABLE
167
168 extern char *re_syntax_table;
169
170 #else                           /* not SYNTAX_TABLE */
171
172 /* How many characters in the character set.  */
173 #define CHAR_SET_SIZE 256
174
175 static char re_syntax_table[CHAR_SET_SIZE];
176
177 static void init_syntax_once(void)
178 {
179         static int done = 0;
180
181         if (!done) {
182                 const char *word_syntax_chars =
183                     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_";
184
185                 memset(re_syntax_table, 0, sizeof(re_syntax_table));
186
187                 while (*word_syntax_chars)
188                         re_syntax_table[(unsigned int)(*word_syntax_chars++)] =
189                             Sword;
190
191                 done = 1;
192         }
193 }
194
195 #endif                          /* SYNTAX_TABLE */
196
197 #define SYNTAX_UNSAFE(ignored, c) re_syntax_table[c]
198 #undef SYNTAX_FROM_CACHE
199 #define SYNTAX_FROM_CACHE SYNTAX_UNSAFE
200
201 #define RE_TRANSLATE(c) translate[(unsigned char) (c)]
202 #define TRANSLATE_P(tr) tr
203
204 #if defined HAVE_BDWGC && defined EF_USE_BDWGC
205 # if defined HAVE_GC_GC_H
206 #  include "gc/gc.h"
207 # elif defined HAVE_GC_H
208 #  include "gc.h"
209 # endif
210 # define xmalloc        GC_MALLOC
211 # define xmalloc_atomic GC_MALLOC_ATOMIC
212 # define xrealloc       GC_REALLOC
213 # define xfree          GC_FREE
214 # define xstrdup        GC_STRDUP
215 #else  /* !HAVE_BDWGC */
216 # define xmalloc        malloc
217 # define xmalloc_atomic malloc
218 # define xrealloc       realloc
219 # define xfree          free
220 # define xstrdup        strdup
221 #endif  /* HAVE_BDWGC */
222
223 #endif  /* emacs */
224
225
226 /* Under XEmacs, this is needed because we don't define it elsewhere. */
227 #ifdef SWITCH_ENUM_BUG
228 #define SWITCH_ENUM_CAST(x) ((int)(x))
229 #else
230 #define SWITCH_ENUM_CAST(x) (x)
231 #endif
232 \f
233 /* Get the interface, including the syntax bits.  */
234 #include "regex.h"
235
236 /* isalpha etc. are used for the character classes.  */
237 #include <ctype.h>
238
239 /* Jim Meyering writes:
240
241    "... Some ctype macros are valid only for character codes that
242    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
243    using /bin/cc or gcc but without giving an ansi option).  So, all
244    ctype uses should be through macros like ISPRINT...  If
245    STDC_HEADERS is defined, then autoconf has verified that the ctype
246    macros don't need to be guarded with references to isascii. ...
247    Defining isascii to 1 should let any compiler worth its salt
248    eliminate the && through constant folding."  */
249
250 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
251 #define ISASCII_1(c) 1
252 #else
253 #define ISASCII_1(c) isascii(c)
254 #endif
255
256 #ifdef MULE
257 /* The IS*() macros can be passed any character, including an extended
258    one.  We need to make sure there are no crashes, which would occur
259    otherwise due to out-of-bounds array references. */
260 #define ISASCII(c) (((EMACS_UINT) (c)) < 0x100 && ISASCII_1 (c))
261 #else
262 #define ISASCII(c) ISASCII_1 (c)
263 #endif                          /* MULE */
264
265 #ifdef isblank
266 #define ISBLANK(c) (ISASCII (c) && isblank (c))
267 #else
268 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
269 #endif
270 #ifdef isgraph
271 #define ISGRAPH(c) (ISASCII (c) && isgraph (c))
272 #else
273 #define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
274 #endif
275
276 /* stolen from FSF/src/charset.h */
277 #define SINGLE_BYTE_CHAR_P(c)   (((unsigned int)(c) & 0xFF) == (c))
278 /* 1 if C is an ASCII character.  */
279 #define IS_REAL_ASCII(c) ((c) < 128)
280 /* 1 if C is a unibyte character.  */
281 #define ISUNIBYTE(c)    (SINGLE_BYTE_CHAR_P(c))
282
283 #define ISPRINT(c) (ISASCII (c) && isprint (c))
284 #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
285 #define ISALNUM(c) (ISASCII (c) && isalnum (c))
286 #define ISALPHA(c) (ISASCII (c) && isalpha (c))
287 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
288 #define ISLOWER(c) (ISASCII (c) && islower (c))
289 #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
290 #define ISSPACE(c) (ISASCII (c) && isspace (c))
291 #define ISUPPER(c) (ISASCII (c) && isupper (c))
292 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
293
294 #if defined emacs
295 static inline bool
296 ISWORD(int c)
297         __attribute__((always_inline));
298 static inline bool
299 ISWORD(int c)
300 {
301         Lisp_Object ct = regex_emacs_buffer->mirror_syntax_table;
302
303         return (SYNTAX(XCHAR_TABLE(ct), c) == Sword);
304 }
305 #else
306 # define ISWORD(c)      (ISALPHA(c))
307 #endif  /* emacs */
308
309 #ifndef NULL
310 #define NULL (void *)0
311 #endif
312
313 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
314    since ours (we hope) works properly with all combinations of
315    machines, compilers, `char' and `unsigned char' argument types.
316    (Per Bothner suggested the basic approach.)  */
317 #undef SIGN_EXTEND_CHAR
318 #if __STDC__
319 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
320 #else                           /* not __STDC__ */
321 /* As in Harbison and Steele.  */
322 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
323 #endif
324 \f
325 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
326    use `alloca' instead of `malloc'.  This is because using malloc in
327    re_search* or re_match* could cause memory leaks when C-g is used in
328    Emacs; also, malloc is slower and causes storage fragmentation.  On
329    the other hand, malloc is more portable, and easier to debug.
330
331    Because we sometimes use alloca, some routines have to be macros,
332    not functions -- `alloca'-allocated space disappears at the end of the
333    function it is called in.  */
334
335 #ifdef REGEX_MALLOC
336
337 #define REGEX_ALLOCATE  xmalloc
338 #define REGEX_REALLOCATE(source, osize, nsize) xrealloc(source, nsize)
339 #define REGEX_FREE      xfree
340
341 #else  /* !REGEX_MALLOC */
342
343 /* Emacs already defines alloca, sometimes.  */
344 #ifndef alloca
345
346 /* Make alloca work the best possible way.  */
347 #ifdef __GNUC__
348 #define alloca __builtin_alloca
349 #else                           /* not __GNUC__ */
350 #if HAVE_ALLOCA_H
351 #include <alloca.h>
352 #else                           /* not __GNUC__ or HAVE_ALLOCA_H */
353 #ifndef _AIX                    /* Already did AIX, up at the top.  */
354 void *alloca();
355 #endif                          /* not _AIX */
356 #endif                          /* HAVE_ALLOCA_H */
357 #endif                          /* __GNUC__ */
358
359 #endif                          /* not alloca */
360
361 #define REGEX_ALLOCATE alloca
362
363 /* Assumes a `char *destination' variable.  */
364 #define REGEX_REALLOCATE(source, osize, nsize)                          \
365   (destination = (char *) alloca (nsize),                               \
366    memmove (destination, source, osize),                                \
367    destination)
368
369 /* No need to do anything to free, after alloca.  */
370 #define REGEX_FREE(arg) ((void)0)       /* Do nothing!  But inhibit gcc warning.  */
371
372 #endif  /* REGEX_MALLOC */
373
374 /* Define how to allocate the failure stack.  */
375
376 #ifdef REL_ALLOC
377 #error not again
378 #define REGEX_ALLOCATE_STACK(size)                              \
379         r_alloc ((char **) &failure_stack_ptr, (size))
380 #define REGEX_REALLOCATE_STACK(source, osize, nsize)            \
381         r_re_alloc ((char **) &failure_stack_ptr, (nsize))
382 #define REGEX_FREE_STACK(ptr)                                   \
383         r_alloc_free ((void **) &failure_stack_ptr)
384
385 #else  /* !REL_ALLOC */
386
387 #ifdef REGEX_MALLOC
388
389 #define REGEX_ALLOCATE_STACK    xmalloc
390 #define REGEX_REALLOCATE_STACK(source, osize, nsize) xrealloc(source, nsize)
391 #define REGEX_FREE_STACK        xfree
392
393 #else  /* !REGEX_MALLOC */
394
395 #define REGEX_ALLOCATE_STACK alloca
396
397 #define REGEX_REALLOCATE_STACK(source, osize, nsize)                    \
398    REGEX_REALLOCATE (source, osize, nsize)
399 /* No need to explicitly free anything.  */
400 #define REGEX_FREE_STACK(arg)
401
402 #endif  /* REGEX_MALLOC */
403 #endif  /* REL_ALLOC */
404
405 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
406    `string1' or just past its end.  This works if PTR is NULL, which is
407    a good thing.  */
408 #define FIRST_STRING_P(ptr)                                     \
409   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
410
411 /* (Re)Allocate N items of type T using malloc, or fail.  */
412 #define TALLOC(n, t)                            \
413         ((t *)xmalloc((n) * sizeof (t)))
414 #define TALLOC_ATOMIC(n, t)                     \
415         ((t *)xmalloc_atomic((n) * sizeof (t)))
416 #define RETALLOC(addr, n, t)                                    \
417         ((addr) = (t *)xrealloc(addr, (n) * sizeof (t)))
418 #define RETALLOC_IF(addr, n, t)                                 \
419         if (addr) {                                             \
420                 RETALLOC((addr), (n), t);                       \
421         } else                                                  \
422                 (addr) = TALLOC ((n), t)
423 #define REGEX_TALLOC(n, t)                              \
424         ((t *)REGEX_ALLOCATE((n) * sizeof (t)))
425
426 #define BYTEWIDTH 8             /* In bits.  */
427
428 #define STREQ(s1, s2) (strcmp (s1, s2) == 0)
429
430 #undef MAX
431 #undef MIN
432 #define MAX(a, b) ((a) > (b) ? (a) : (b))
433 #define MIN(a, b) ((a) < (b) ? (a) : (b))
434
435 /* Type of source-pattern and string chars.  */
436 typedef const unsigned char re_char;
437
438 typedef char re_bool;
439 #define false 0
440 #define true 1
441 \f
442 /* These are the command codes that appear in compiled regular
443    expressions.  Some opcodes are followed by argument bytes.  A
444    command code can specify any interpretation whatsoever for its
445    arguments.  Zero bytes may appear in the compiled regular expression.  */
446
447 typedef enum {
448         no_op = 0,
449
450         /* Succeed right away--no more backtracking.  */
451         succeed,
452
453         /* Followed by one byte giving n, then by n literal bytes.  */
454         exactn,
455
456         /* Matches any (more or less) character.  */
457         anychar,
458
459         /* Matches any one char belonging to specified set.  First
460            following byte is number of bitmap bytes.  Then come bytes
461            for a bitmap saying which chars are in.  Bits in each byte
462            are ordered low-bit-first.  A character is in the set if its
463            bit is 1.  A character too large to have a bit in the map is
464            automatically not in the set.  */
465         charset,
466
467         /* Same parameters as charset, but match any character that is
468            not one of those specified.  */
469         charset_not,
470
471         /* Start remembering the text that is matched, for storing in a
472            register.  Followed by one byte with the register number, in
473            the range 1 to the pattern buffer's re_ngroups
474            field.  Then followed by one byte with the number of groups
475            inner to this one.  (This last has to be part of the
476            start_memory only because we need it in the on_failure_jump
477            of re_match_2.)  */
478         start_memory,
479
480         /* Stop remembering the text that is matched and store it in a
481            memory register.  Followed by one byte with the register
482            number, in the range 1 to `re_ngroups' in the
483            pattern buffer, and one byte with the number of inner groups,
484            just like `start_memory'.  (We need the number of inner
485            groups here because we don't have any easy way of finding the
486            corresponding start_memory when we're at a stop_memory.)  */
487         stop_memory,
488
489         /* Match a duplicate of something remembered. Followed by one
490            byte containing the register number.  */
491         duplicate,
492
493         /* Fail unless at beginning of line.  */
494         begline,
495
496         /* Fail unless at end of line.  */
497         endline,
498
499         /* Succeeds if at beginning of buffer (if emacs) or at beginning
500            of string to be matched (if not).  */
501         begbuf,
502
503         /* Analogously, for end of buffer/string.  */
504         endbuf,
505
506         /* Followed by two byte relative address to which to jump.  */
507         jump,
508
509         /* Same as jump, but marks the end of an alternative.  */
510         jump_past_alt,
511
512         /* Followed by two-byte relative address of place to resume at
513            in case of failure.  */
514         on_failure_jump,
515
516         /* Like on_failure_jump, but pushes a placeholder instead of the
517            current string position when executed.  */
518         on_failure_keep_string_jump,
519
520         /* Throw away latest failure point and then jump to following
521            two-byte relative address.  */
522         pop_failure_jump,
523
524         /* Change to pop_failure_jump if know won't have to backtrack to
525            match; otherwise change to jump.  This is used to jump
526            back to the beginning of a repeat.  If what follows this jump
527            clearly won't match what the repeat does, such that we can be
528            sure that there is no use backtracking out of repetitions
529            already matched, then we change it to a pop_failure_jump.
530            Followed by two-byte address.  */
531         maybe_pop_jump,
532
533         /* Jump to following two-byte address, and push a dummy failure
534            point. This failure point will be thrown away if an attempt
535            is made to use it for a failure.  A `+' construct makes this
536            before the first repeat.  Also used as an intermediary kind
537            of jump when compiling an alternative.  */
538         dummy_failure_jump,
539
540         /* Push a dummy failure point and continue.  Used at the end of
541            alternatives.  */
542         push_dummy_failure,
543
544         /* Followed by two-byte relative address and two-byte number n.
545            After matching N times, jump to the address upon failure.  */
546         succeed_n,
547
548         /* Followed by two-byte relative address, and two-byte number n.
549            Jump to the address N times, then fail.  */
550         jump_n,
551
552         /* Set the following two-byte relative address to the
553            subsequent two-byte number.  The address *includes* the two
554            bytes of number.  */
555         set_number_at,
556
557         wordchar,               /* Matches any word-constituent character.  */
558         notwordchar,            /* Matches any char that is not a word-constituent.  */
559
560         wordbeg,                /* Succeeds if at word beginning.  */
561         wordend,                /* Succeeds if at word end.  */
562
563         wordbound,              /* Succeeds if at a word boundary.  */
564         notwordbound            /* Succeeds if not at a word boundary.  */
565 #ifdef emacs
566             , before_dot,       /* Succeeds if before point.  */
567         at_dot,                 /* Succeeds if at point.  */
568         after_dot,              /* Succeeds if after point.  */
569
570         /* Matches any character whose syntax is specified.  Followed by
571            a byte which contains a syntax code, e.g., Sword.  */
572         syntaxspec,
573
574         /* Matches any character whose syntax is not that specified.  */
575         notsyntaxspec
576 #endif                          /* emacs */
577 #ifdef MULE
578             /* need extra stuff to be able to properly work with XEmacs/Mule
579                characters (which may take up more than one byte) */
580             , charset_mule,     /* Matches any character belonging to specified set.
581                                    The set is stored in "unified range-table
582                                    format"; see rangetab.c.  Unlike the `charset'
583                                    opcode, this can handle arbitrary characters. */
584
585         charset_mule_not        /* Same parameters as charset_mule, but match any
586                                    character that is not one of those specified.  */
587             /* 97/2/17 jhod: The following two were merged back in from the Mule
588                2.3 code to enable some language specific processing */
589             , categoryspec,     /* Matches entries in the character category tables */
590         notcategoryspec         /* The opposite of the above */
591 #endif                          /* MULE */
592 } re_opcode_t;
593 \f
594 /* Common operations on the compiled pattern.  */
595
596 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
597
598 #define STORE_NUMBER(destination, number)                               \
599   do {                                                                  \
600     (destination)[0] = (number) & 0377;                                 \
601     (destination)[1] = (number) >> 8;                                   \
602   } while (0)
603
604 /* Same as STORE_NUMBER, except increment DESTINATION to
605    the byte after where the number is stored.  Therefore, DESTINATION
606    must be an lvalue.  */
607
608 #define STORE_NUMBER_AND_INCR(destination, number)                      \
609   do {                                                                  \
610     STORE_NUMBER (destination, number);                                 \
611     (destination) += 2;                                                 \
612   } while (0)
613
614 /* Put into DESTINATION a number stored in two contiguous bytes starting
615    at SOURCE.  */
616
617 #define EXTRACT_NUMBER(destination, source)                             \
618   do {                                                                  \
619     (destination) = *(source) & 0377;                                   \
620     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;           \
621   } while (0)
622
623 #ifdef REGEX_DEBUG_FLAG
624 static void extract_number(int *dest, re_char * source)
625 {
626         int temp = SIGN_EXTEND_CHAR(*(source + 1));
627         *dest = *source & 0377;
628         *dest += temp << 8;
629 }
630
631 #ifndef EXTRACT_MACROS          /* To debug the macros.  */
632 #undef EXTRACT_NUMBER
633 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
634 #endif                          /* not EXTRACT_MACROS */
635
636 #endif                          /* REGEX_DEBUG_FLAG */
637
638 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
639    SOURCE must be an lvalue.  */
640
641 #define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
642         do {                                                            \
643                 EXTRACT_NUMBER (destination, source);                   \
644                 (source) += 2;                                          \
645         } while (0)
646
647 #ifdef REGEX_DEBUG_FLAG
648 static void
649 extract_number_and_incr(int *destination, unsigned char **source)
650 {
651         extract_number(destination, *source);
652         *source += 2;
653 }
654
655 #ifndef EXTRACT_MACROS
656 #undef EXTRACT_NUMBER_AND_INCR
657 #define EXTRACT_NUMBER_AND_INCR(dest, src)      \
658         extract_number_and_incr (&dest, &src)
659 #endif                          /* not EXTRACT_MACROS */
660
661 #endif                          /* REGEX_DEBUG_FLAG */
662 \f
663 /* If REGEX_DEBUG_FLAG is defined, Regex prints many voluminous messages about
664    what it is doing (if the variable `debug' is nonzero).  If linked with the
665    main program in `iregex.c', you can enter patterns and strings interactively.
666    And if linked with the main program in `main.c' and the other test files, you
667    can run the already-written tests.  */
668
669 #if defined (REGEX_DEBUG_FLAG)
670
671 /* We use standard I/O for debugging.  */
672 #include <stdio.h>
673
674 #ifndef emacs
675 /* XEmacs provides its own version of assert() */
676 /* It is useful to test things that ``must'' be true when debugging.  */
677 #include <assert.h>
678 #endif
679
680 #if 0
681 /* no point in having this one, since we do not offer any accessor to this */
682 static int debug = 0;
683 #endif
684
685 #define DEBUG_STATEMENT(e)              e
686 #define DEBUG_PRINT1(x)                 printf(x)
687 #define DEBUG_PRINT2(x1, x2)            printf(x1, x2)
688 #define DEBUG_PRINT3(x1, x2, x3)        printf(x1, x2, x3)
689 #define DEBUG_PRINT4(x1, x2, x3, x4)    printf(x1, x2, x3, x4)
690 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                           \
691         print_partial_compiled_pattern(s, e)
692 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                  \
693         print_double_string (w, s1, sz1, s2, sz2)
694
695 /* Print the fastmap in human-readable form.  */
696
697 static void print_fastmap(char *fastmap)
698 {
699         unsigned was_a_range = 0;
700         unsigned i = 0;
701
702         while (i < (1 << BYTEWIDTH)) {
703                 if (fastmap[i++]) {
704                         was_a_range = 0;
705                         putchar(i - 1);
706                         while (i < (1 << BYTEWIDTH) && fastmap[i]) {
707                                 was_a_range = 1;
708                                 i++;
709                         }
710                         if (was_a_range) {
711                                 putchar('-');
712                                 putchar(i - 1);
713                         }
714                 }
715         }
716         putchar('\n');
717 }
718
719 /* Print a compiled pattern string in human-readable form, starting at
720    the START pointer into it and ending just before the pointer END.  */
721
722 /* can't help it ... the ugly mule code changes `start' upon alignment,
723  * so this can't have the const qualifier ... bugger */
724 static void
725 print_partial_compiled_pattern(unsigned char *start, re_char *end)
726 {
727         int mcnt, mcnt2;
728         unsigned char *p = start;
729         re_char *pend = end;
730
731         if (start == NULL) {
732                 puts("(null)");
733                 return;
734         }
735
736         /* Loop over pattern commands.  */
737         while (p < pend) {
738                 printf("%ld:\t", (long)(p - start));
739
740                 switch ((const re_opcode_t)*p++) {
741                 case no_op:
742                         printf("/no_op");
743                         break;
744
745                 case exactn:
746                         mcnt = *p++;
747                         printf("/exactn/%d", mcnt);
748                         do {
749                                 putchar('/');
750                                 putchar(*p++);
751                         }
752                         while (--mcnt);
753                         break;
754
755                 case start_memory:
756                         mcnt = *p++;
757                         printf("/start_memory/%d/%d", mcnt, *p++);
758                         break;
759
760                 case stop_memory:
761                         mcnt = *p++;
762                         printf("/stop_memory/%d/%d", mcnt, *p++);
763                         break;
764
765                 case duplicate:
766                         printf("/duplicate/%d", *p++);
767                         break;
768
769                 case anychar:
770                         printf("/anychar");
771                         break;
772
773                 case charset:
774                 case charset_not: {
775                         REGISTER int c, last = -100;
776                         REGISTER int in_range = 0;
777
778                         printf("/charset [%s",
779                                (re_opcode_t) * (p - 1) == charset_not
780                                ? "^" : "");
781
782                         assert(p + *p < pend);
783
784                         for (c = 0; c < 256; c++)
785                                 if (((unsigned char)(c / 8) < *p)
786                                     && (p[1 + (c / 8)] &
787                                         (1 << (c % 8)))) {
788                                         /* Are we starting a range?  */
789                                         if (last + 1 == c && !in_range) {
790                                                 putchar('-');
791                                                 in_range = 1;
792                                         }
793                                         /* Have we broken a range?  */
794                                         else if (last + 1 != c
795                                                  && in_range) {
796                                                 putchar(last);
797                                                 in_range = 0;
798                                         }
799
800                                         if (!in_range)
801                                                 putchar(c);
802
803                                         last = c;
804                                 }
805
806                         if (in_range)
807                                 putchar(last);
808
809                         putchar(']');
810
811                         p += 1 + *p;
812                         break;
813                 }
814
815 #ifdef MULE
816                 case charset_mule:
817                 case charset_mule_not: {
818                         int nentries, i;
819
820                         printf("/charset_mule [%s",
821                                (re_opcode_t) * (p - 1) == charset_mule_not
822                                ? "^" : "");
823                         /* u_r_t_nentries() CHANGES its arg, why on earth
824                          * is this marked const here then? :O -hrop */
825                         nentries = unified_range_table_nentries(p);
826                         for (i = 0; i < nentries; i++) {
827                                 EMACS_INT first, last;
828                                 Lisp_Object dummy_val;
829
830                                 /* u_r_t_get_range CHANGES its arg ...
831                                  * p can't be const thence */
832                                 unified_range_table_get_range(p, i,
833                                                               &first,
834                                                               &last,
835                                                               &dummy_val);
836                                 if (first < 0x100)
837                                         putchar(first);
838                                 else
839                                         printf("(0x%lx)", (long)first);
840                                 if (first != last) {
841                                         putchar('-');
842                                         if (last < 0x100)
843                                                 putchar(last);
844                                         else
845                                                 printf("(0x%lx)",
846                                                        (long)last);
847                                 }
848                         }
849                         putchar(']');
850                         p += unified_range_table_bytes_used(p);
851                 }
852                         break;
853 #endif
854
855                 case begline:
856                         printf("/begline");
857                         break;
858
859                 case endline:
860                         printf("/endline");
861                         break;
862
863                 case on_failure_jump:
864                         extract_number_and_incr(&mcnt, &p);
865                         printf("/on_failure_jump to %ld",
866                                (long)(p + mcnt - start));
867                         break;
868
869                 case on_failure_keep_string_jump:
870                         extract_number_and_incr(&mcnt, &p);
871                         printf("/on_failure_keep_string_jump to %ld",
872                                (long)(p + mcnt - start));
873                         break;
874
875                 case dummy_failure_jump:
876                         extract_number_and_incr(&mcnt, &p);
877                         printf("/dummy_failure_jump to %ld",
878                                (long)(p + mcnt - start));
879                         break;
880
881                 case push_dummy_failure:
882                         printf("/push_dummy_failure");
883                         break;
884
885                 case maybe_pop_jump:
886                         extract_number_and_incr(&mcnt, &p);
887                         printf("/maybe_pop_jump to %ld",
888                                (long)(p + mcnt - start));
889                         break;
890
891                 case pop_failure_jump:
892                         extract_number_and_incr(&mcnt, &p);
893                         printf("/pop_failure_jump to %ld",
894                                (long)(p + mcnt - start));
895                         break;
896
897                 case jump_past_alt:
898                         extract_number_and_incr(&mcnt, &p);
899                         printf("/jump_past_alt to %ld",
900                                (long)(p + mcnt - start));
901                         break;
902
903                 case jump:
904                         extract_number_and_incr(&mcnt, &p);
905                         printf("/jump to %ld", (long)(p + mcnt - start));
906                         break;
907
908                 case succeed_n:
909                         extract_number_and_incr(&mcnt, &p);
910                         extract_number_and_incr(&mcnt2, &p);
911                         printf("/succeed_n to %ld, %d times",
912                                (long)(p + mcnt - start), mcnt2);
913                         break;
914
915                 case jump_n:
916                         extract_number_and_incr(&mcnt, &p);
917                         extract_number_and_incr(&mcnt2, &p);
918                         printf("/jump_n to %ld, %d times",
919                                (long)(p + mcnt - start), mcnt2);
920                         break;
921
922                 case set_number_at:
923                         extract_number_and_incr(&mcnt, &p);
924                         extract_number_and_incr(&mcnt2, &p);
925                         printf("/set_number_at location %ld to %d",
926                                (long)(p + mcnt - start), mcnt2);
927                         break;
928
929                 case wordbound:
930                         printf("/wordbound");
931                         break;
932
933                 case notwordbound:
934                         printf("/notwordbound");
935                         break;
936
937                 case wordbeg:
938                         printf("/wordbeg");
939                         break;
940
941                 case wordend:
942                         printf("/wordend");
943
944 #ifdef emacs
945                 case before_dot:
946                         printf("/before_dot");
947                         break;
948
949                 case at_dot:
950                         printf("/at_dot");
951                         break;
952
953                 case after_dot:
954                         printf("/after_dot");
955                         break;
956
957                 case syntaxspec:
958                         printf("/syntaxspec");
959                         mcnt = *p++;
960                         printf("/%d", mcnt);
961                         break;
962
963                 case notsyntaxspec:
964                         printf("/notsyntaxspec");
965                         mcnt = *p++;
966                         printf("/%d", mcnt);
967                         break;
968
969 #ifdef MULE
970 /* 97/2/17 jhod Mule category patch */
971                 case categoryspec:
972                         printf("/categoryspec");
973                         mcnt = *p++;
974                         printf("/%d", mcnt);
975                         break;
976
977                 case notcategoryspec:
978                         printf("/notcategoryspec");
979                         mcnt = *p++;
980                         printf("/%d", mcnt);
981                         break;
982 /* end of category patch */
983 #endif                          /* MULE */
984 #endif                          /* emacs */
985
986                 case wordchar:
987                         printf("/wordchar");
988                         break;
989
990                 case notwordchar:
991                         printf("/notwordchar");
992                         break;
993
994                 case begbuf:
995                         printf("/begbuf");
996                         break;
997
998                 case endbuf:
999                         printf("/endbuf");
1000                         break;
1001
1002                 case succeed:
1003                 default:
1004                         printf("?%d", *(p - 1));
1005                 }
1006
1007                 putchar('\n');
1008         }
1009
1010         printf("%ld:\tend of pattern.\n", (long)(p - start));
1011 }
1012
1013 static void
1014 print_compiled_pattern(struct re_pattern_buffer *bufp)
1015 {
1016 #if 0
1017         /* cant be const */
1018         re_char *buffer = bufp->buffer;
1019 #else
1020         unsigned char *buffer = bufp->buffer;
1021 #endif
1022
1023         print_partial_compiled_pattern(buffer, buffer + bufp->used);
1024         printf("%ld bytes used/%ld bytes allocated.\n", bufp->used,
1025                bufp->allocated);
1026
1027         if (bufp->fastmap_accurate && bufp->fastmap) {
1028                 printf("fastmap: ");
1029                 print_fastmap(bufp->fastmap);
1030         }
1031
1032         printf("re_nsub: %ld\t", (long)bufp->re_nsub);
1033         printf("re_ngroups: %ld\t", (long)bufp->re_ngroups);
1034         printf("regs_alloc: %d\t", bufp->regs_allocated);
1035         printf("can_be_null: %d\t", bufp->can_be_null);
1036         printf("newline_anchor: %d\n", bufp->newline_anchor);
1037         printf("no_sub: %d\t", bufp->no_sub);
1038         printf("not_bol: %d\t", bufp->not_bol);
1039         printf("not_eol: %d\t", bufp->not_eol);
1040         printf("syntax: %d\n", bufp->syntax);
1041         /* Perhaps we should print the translate table?  */
1042         /* and maybe the category table? */
1043
1044         if (bufp->external_to_internal_register) {
1045                 int i;
1046
1047                 printf("external_to_internal_register:\n");
1048                 for (i = 0; i <= bufp->re_nsub; i++) {
1049                         if (i > 0)
1050                                 printf(", ");
1051                         printf("%d -> %d", i,
1052                                bufp->external_to_internal_register[i]);
1053                 }
1054                 printf("\n");
1055         }
1056 }
1057
1058 static void
1059 print_double_string(re_char * where, re_char * string1, int size1,
1060                     re_char * string2, int size2)
1061 {
1062         if (where == NULL)
1063                 printf("(null)");
1064         else {
1065                 Element_count this_char;
1066
1067                 if (FIRST_STRING_P(where)) {
1068                         for (this_char = where - string1; this_char < size1;
1069                              this_char++)
1070                                 putchar(string1[this_char]);
1071
1072                         where = string2;
1073                 }
1074
1075                 for (this_char = where - string2; this_char < size2;
1076                      this_char++)
1077                         putchar(string2[this_char]);
1078         }
1079 }
1080
1081 #else                           /* not REGEX_DEBUG_FLAG */
1082
1083 #undef assert
1084 #define assert(e)
1085
1086 #define DEBUG_STATEMENT(e)
1087 #define DEBUG_PRINT1(x)
1088 #define DEBUG_PRINT2(x1, x2)
1089 #define DEBUG_PRINT3(x1, x2, x3)
1090 #define DEBUG_PRINT4(x1, x2, x3, x4)
1091 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1092 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1093
1094 #endif                          /* REGEX_DEBUG_FLAG */
1095 \f
1096 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
1097    also be assigned to arbitrarily: each pattern buffer stores its own
1098    syntax, so it can be changed between regex compilations.  */
1099 /* This has no initializer because initialized variables in Emacs
1100    become read-only after dumping.  */
1101 reg_syntax_t re_syntax_options;
1102
1103 /* Specify the precise syntax of regexps for compilation.  This provides
1104    for compatibility for various utilities which historically have
1105    different, incompatible syntaxes.
1106
1107    The argument SYNTAX is a bit mask comprised of the various bits
1108    defined in regex.h.  We return the old syntax.  */
1109
1110 reg_syntax_t re_set_syntax(reg_syntax_t syntax)
1111 {
1112         reg_syntax_t ret = re_syntax_options;
1113
1114         re_syntax_options = syntax;
1115         return ret;
1116 }
1117 \f
1118 /* This table gives an error message for each of the error codes listed
1119    in regex.h.  Obviously the order here has to be same as there.
1120    POSIX doesn't require that we do anything for REG_NOERROR,
1121    but why not be nice?  */
1122
1123 static const char *re_error_msgid[] = {
1124         "Success",              /* REG_NOERROR */
1125         "No match",             /* REG_NOMATCH */
1126         "Invalid regular expression",   /* REG_BADPAT */
1127         "Invalid collation character",  /* REG_ECOLLATE */
1128         "Invalid character class name", /* REG_ECTYPE */
1129         "Trailing backslash",   /* REG_EESCAPE */
1130         "Invalid back reference",       /* REG_ESUBREG */
1131         "Unmatched [ or [^",    /* REG_EBRACK */
1132         "Unmatched ( or \\(",   /* REG_EPAREN */
1133         "Unmatched \\{",        /* REG_EBRACE */
1134         "Invalid content of \\{\\}",    /* REG_BADBR */
1135         "Invalid range end",    /* REG_ERANGE */
1136         "Memory exhausted",     /* REG_ESPACE */
1137         "Invalid preceding regular expression", /* REG_BADRPT */
1138         "Premature end of regular expression",  /* REG_EEND */
1139         "Regular expression too big",   /* REG_ESIZE */
1140         "Unmatched ) or \\)",   /* REG_ERPAREN */
1141 #ifdef emacs
1142         "Invalid syntax designator",    /* REG_ESYNTAX */
1143 #endif
1144 #ifdef MULE
1145         "Ranges may not span charsets", /* REG_ERANGESPAN */
1146         "Invalid category designator",  /* REG_ECATEGORY */
1147 #endif
1148 };
1149 \f
1150 /* Avoiding alloca during matching, to placate r_alloc.  */
1151
1152 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1153    searching and matching functions should not call alloca.  On some
1154    systems, alloca is implemented in terms of malloc, and if we're
1155    using the relocating allocator routines, then malloc could cause a
1156    relocation, which might (if the strings being searched are in the
1157    ralloc heap) shift the data out from underneath the regexp
1158    routines.
1159
1160    Here's another reason to avoid allocation: Emacs
1161    processes input from X in a signal handler; processing X input may
1162    call malloc; if input arrives while a matching routine is calling
1163    malloc, then we're scrod.  But Emacs can't just block input while
1164    calling matching routines; then we don't notice interrupts when
1165    they come in.  So, Emacs blocks input around all regexp calls
1166    except the matching calls, which it leaves unprotected, in the
1167    faith that they will not malloc.  */
1168
1169 /* Normally, this is fine.  */
1170 #define MATCH_MAY_ALLOCATE
1171
1172 /* When using GNU C, we are not REALLY using the C alloca, no matter
1173    what config.h may say.  So don't take precautions for it.  */
1174 #ifdef __GNUC__
1175 #undef C_ALLOCA
1176 #endif
1177
1178 /* The match routines may not allocate if (1) they would do it with malloc
1179    and (2) it's not safe for them to use malloc.
1180    Note that if REL_ALLOC is defined, matching would not use malloc for the
1181    failure stack, but we would still use it for the register vectors;
1182    so REL_ALLOC should not affect this.  */
1183 #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
1184 #undef MATCH_MAY_ALLOCATE
1185 #endif
1186 \f
1187 /* Failure stack declarations and macros; both re_compile_fastmap and
1188    re_match_2 use a failure stack.  These have to be macros because of
1189    REGEX_ALLOCATE_STACK.  */
1190
1191 /* Number of failure points for which to initially allocate space
1192    when matching.  If this number is exceeded, we allocate more
1193    space, so it is not a hard limit.  */
1194 #ifndef INIT_FAILURE_ALLOC
1195 #define INIT_FAILURE_ALLOC 20
1196 #endif
1197
1198 /* Roughly the maximum number of failure points on the stack.  Would be
1199    exactly that if always used MAX_FAILURE_SPACE each time we failed.
1200    This is a variable only so users of regex can assign to it; we never
1201    change it ourselves.  */
1202 #if defined (MATCH_MAY_ALLOCATE) || defined (REGEX_MALLOC)
1203 /* 4400 was enough to cause a crash on Alpha OSF/1,
1204    whose default stack limit is 2mb.  */
1205 int re_max_failures = 50000;
1206 #else
1207 int re_max_failures = 2000;
1208 #endif
1209
1210 union fail_stack_elt {
1211         const unsigned char *pointer;
1212         int integer;
1213 };
1214
1215 typedef union fail_stack_elt fail_stack_elt_t;
1216
1217 typedef struct {
1218         fail_stack_elt_t *stack;
1219         Element_count size;
1220         Element_count avail;    /* Offset of next open position.  */
1221 } fail_stack_type;
1222
1223 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1224 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1225 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1226
1227 /* Define macros to initialize and free the failure stack.
1228    Do `return -2' if the alloc fails.  */
1229
1230 #ifdef MATCH_MAY_ALLOCATE
1231 #define INIT_FAIL_STACK()                                               \
1232         do {                                                            \
1233                 fail_stack.stack = (fail_stack_elt_t *)                 \
1234                         REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC *      \
1235                                               sizeof (fail_stack_elt_t)); \
1236                                                                         \
1237                 if (fail_stack.stack == NULL)                           \
1238                         return -2;                                      \
1239                                                                         \
1240                 fail_stack.size = INIT_FAILURE_ALLOC;                   \
1241                 fail_stack.avail = 0;                                   \
1242         } while (0)
1243
1244 #define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1245 #else
1246 #define INIT_FAIL_STACK()                       \
1247         do {                                    \
1248                 fail_stack.avail = 0;           \
1249         } while (0)
1250
1251 #define RESET_FAIL_STACK()
1252 #endif
1253
1254 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1255
1256    Return 1 if succeeds, and 0 if either ran out of memory
1257    allocating space for it or it was already too large.
1258
1259    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1260
1261 #define DOUBLE_FAIL_STACK(fail_stack)                                   \
1262   ((int) (fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS        \
1263    ? 0                                                                  \
1264    : ((fail_stack).stack = (fail_stack_elt_t *)                         \
1265         REGEX_REALLOCATE_STACK ((fail_stack).stack,                     \
1266           (fail_stack).size * sizeof (fail_stack_elt_t),                \
1267           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),        \
1268                                                                         \
1269       (fail_stack).stack == NULL                                        \
1270       ? 0                                                               \
1271       : ((fail_stack).size <<= 1,                                       \
1272          1)))
1273
1274 /* Push pointer POINTER on FAIL_STACK.
1275    Return 1 if was able to do so and 0 if ran out of memory allocating
1276    space to do so.  */
1277 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)                            \
1278   ((FAIL_STACK_FULL ()                                                  \
1279     && !DOUBLE_FAIL_STACK (FAIL_STACK))                                 \
1280    ? 0                                                                  \
1281    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,       \
1282       1))
1283
1284 /* Push a pointer value onto the failure stack.
1285    Assumes the variable `fail_stack'.  Probably should only
1286    be called from within `PUSH_FAILURE_POINT'.  */
1287 #define PUSH_FAILURE_POINTER(item)                                      \
1288         fail_stack.stack[fail_stack.avail++].pointer =                  \
1289                 (const unsigned char*)(item)
1290
1291 /* This pushes an integer-valued item onto the failure stack.
1292    Assumes the variable `fail_stack'.  Probably should only
1293    be called from within `PUSH_FAILURE_POINT'.  */
1294 #define PUSH_FAILURE_INT(item)                                  \
1295         fail_stack.stack[fail_stack.avail++].integer = (item)
1296
1297 /* Push a fail_stack_elt_t value onto the failure stack.
1298    Assumes the variable `fail_stack'.  Probably should only
1299    be called from within `PUSH_FAILURE_POINT'.  */
1300 #define PUSH_FAILURE_ELT(item)                                  \
1301         fail_stack.stack[fail_stack.avail++] =  (item)
1302
1303 /* These three POP... operations complement the three PUSH... operations.
1304    All assume that `fail_stack' is nonempty.  */
1305 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1306 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1307 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1308
1309 /* Used to omit pushing failure point id's when we're not debugging.  */
1310 #ifdef REGEX_DEBUG_FLAG
1311 #define DEBUG_PUSH PUSH_FAILURE_INT
1312 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1313 #else
1314 #define DEBUG_PUSH(item)
1315 #define DEBUG_POP(item_addr)
1316 #endif
1317
1318 /* Push the information about the state we will need
1319    if we ever fail back to it.
1320
1321    Requires variables fail_stack, regstart, regend, reg_info, and
1322    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
1323    declared.
1324
1325    Does `return FAILURE_CODE' if runs out of memory.  */
1326
1327 #if !defined (REGEX_MALLOC) && !defined (REL_ALLOC)
1328 #define DECLARE_DESTINATION char *destination
1329 #else
1330 #define DECLARE_DESTINATION DECLARE_NOTHING
1331 #endif
1332
1333 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)   \
1334         do {                                                            \
1335                 DECLARE_DESTINATION;                                    \
1336                 /* Must be int, so when we don't save any registers, the\
1337                    arithmetic \ of 0 + -1 isn't done as unsigned.  */   \
1338                 int this_reg;                                           \
1339                                                                         \
1340                 DEBUG_STATEMENT (failure_id++);                         \
1341                 DEBUG_STATEMENT (nfailure_points_pushed++);             \
1342                 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
1343                 DEBUG_PRINT2 ("  Before push, next avail: %lu\n",       \
1344                               (long unsigned int)(fail_stack).avail);   \
1345                 DEBUG_PRINT2 ("                     size: %lu\n",       \
1346                               (long unsigned int)(fail_stack).size);    \
1347                                                                         \
1348                 DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS); \
1349                 DEBUG_PRINT2 ("     available: %ld\n",                  \
1350                               (long) REMAINING_AVAIL_SLOTS);            \
1351                                                                         \
1352                 /* Ensure we have enough space allocated for what       \
1353                  * we will push.  */                                    \
1354                 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) {     \
1355                         if (!DOUBLE_FAIL_STACK (fail_stack))            \
1356                                 return failure_code;                    \
1357                                                                         \
1358                         DEBUG_PRINT2 ("\n  Doubled stack; size now: %lu\n", \
1359                                       (unsigned long) (fail_stack).size); \
1360                         DEBUG_PRINT2 ("  slots available: %ld\n",       \
1361                                       (long) REMAINING_AVAIL_SLOTS);    \
1362                 }                                                       \
1363                                                                         \
1364                 /* Push the info, starting with the registers.  */      \
1365                 DEBUG_PRINT1 ("\n");                                    \
1366                                                                         \
1367                 for (this_reg = lowest_active_reg;                      \
1368                      this_reg <= highest_active_reg;                    \
1369                      this_reg++) {                                      \
1370                         DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg); \
1371                         DEBUG_STATEMENT (num_regs_pushed++);            \
1372                                                                         \
1373                         DEBUG_PRINT2("    start: %p\n",                 \
1374                                      regstart[this_reg]);               \
1375                         PUSH_FAILURE_POINTER(regstart[this_reg]);       \
1376                                                                         \
1377                         DEBUG_PRINT2 ("    end: %p\n",                  \
1378                                       regend[this_reg]);                \
1379                         PUSH_FAILURE_POINTER(regend[this_reg]); \
1380                                                                         \
1381                         DEBUG_PRINT2 ("    info: 0x%p\n      ",         \
1382                                       &reg_info[this_reg]);             \
1383                         DEBUG_PRINT2 (" match_null=%d",                 \
1384                                       REG_MATCH_NULL_STRING_P           \
1385                                       (reg_info[this_reg]));            \
1386                         DEBUG_PRINT2 (" active=%d",                     \
1387                                       IS_ACTIVE (reg_info[this_reg]));  \
1388                         DEBUG_PRINT2 (" matched_something=%d",          \
1389                                       MATCHED_SOMETHING                 \
1390                                       (reg_info[this_reg]));            \
1391                         DEBUG_PRINT2 (" ever_matched_something=%d",     \
1392                                       EVER_MATCHED_SOMETHING            \
1393                                       (reg_info[this_reg]));            \
1394                         DEBUG_PRINT1 ("\n");                            \
1395                         PUSH_FAILURE_ELT(reg_info[this_reg].word);      \
1396                 }                                                       \
1397                                                                         \
1398                 DEBUG_PRINT2 ("  Pushing  low active reg: %d\n",        \
1399                               lowest_active_reg);                       \
1400                 PUSH_FAILURE_INT (lowest_active_reg);                   \
1401                                                                         \
1402                 DEBUG_PRINT2 ("  Pushing high active reg: %d\n",        \
1403                               highest_active_reg);                      \
1404                 PUSH_FAILURE_INT (highest_active_reg);                  \
1405                                                                         \
1406                 DEBUG_PRINT2 ("  Pushing pattern 0x%lx: \n",            \
1407                               (long) pattern_place);                    \
1408                 DEBUG_PRINT_COMPILED_PATTERN(bufp, pattern_place, pend); \
1409                 PUSH_FAILURE_POINTER (pattern_place);                   \
1410                                                                         \
1411                 DEBUG_PRINT2("  Pushing string %p: `",                  \
1412                              string_place);                             \
1413                 DEBUG_PRINT_DOUBLE_STRING(string_place,                 \
1414                                           string1, size1,               \
1415                                           string2, size2);              \
1416                 DEBUG_PRINT1("'\n");                                    \
1417                 PUSH_FAILURE_POINTER (string_place);                    \
1418                                                                         \
1419                 DEBUG_PRINT2("  Pushing failure id: %u\n", failure_id); \
1420                 DEBUG_PUSH(failure_id);                                 \
1421         } while (0)
1422
1423 /* This is the number of items that are pushed and popped on the stack
1424    for each register.  */
1425 #define NUM_REG_ITEMS  3
1426
1427 /* Individual items aside from the registers.  */
1428 #ifdef DEBUG
1429 #define NUM_NONREG_ITEMS 5      /* Includes failure point id.  */
1430 #else
1431 #define NUM_NONREG_ITEMS 4
1432 #endif
1433
1434 /* We push at most this many items on the stack.  */
1435 /* We used to use (num_regs - 1), which is the number of registers
1436    this regexp will save; but that was changed to 5
1437    to avoid stack overflow for a regexp with lots of parens.  */
1438 #define MAX_FAILURE_ITEMS (5 * (NUM_REG_ITEMS + NUM_NONREG_ITEMS))
1439
1440 /* We actually push this many items.  */
1441 #define NUM_FAILURE_ITEMS                                               \
1442   ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \
1443     + NUM_NONREG_ITEMS)
1444
1445 /* How many items can still be added to the stack without overflowing it.  */
1446 #define REMAINING_AVAIL_SLOTS ((int) ((fail_stack).size - (fail_stack).avail))
1447
1448 /* Pops what PUSH_FAIL_STACK pushes.
1449
1450    We restore into the parameters, all of which should be lvalues:
1451      STR -- the saved data position.
1452      PAT -- the saved pattern position.
1453      LOW_REG, HIGH_REG -- the highest and lowest active registers.
1454      REGSTART, REGEND -- arrays of string positions.
1455      REG_INFO -- array of information about each subexpression.
1456
1457    Also assumes the variables `fail_stack' and (if debugging), `bufp',
1458    `pend', `string1', `size1', `string2', and `size2'.  */
1459
1460 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg,                  \
1461                           regstart, regend, reg_info)                   \
1462         do {                                                            \
1463                 int this_reg;                                           \
1464                 const unsigned char *string_temp;                       \
1465                 DEBUG_STATEMENT (fail_stack_elt_t ffailure_id);         \
1466                                                                         \
1467                 assert (!FAIL_STACK_EMPTY ());                          \
1468                                                                         \
1469                 /* Remove failure points and point to                   \
1470                  * how many regs pushed.  */                            \
1471                 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                  \
1472                 DEBUG_PRINT2 ("  Before pop, next avail: %lu\n",        \
1473                               (unsigned long) fail_stack.avail);        \
1474                 DEBUG_PRINT2 ("                    size: %lu\n",        \
1475                               (unsigned long) fail_stack.size);         \
1476                                                                         \
1477                 assert (fail_stack.avail >= NUM_NONREG_ITEMS);          \
1478                                                                         \
1479                 DEBUG_POP (&ffailure_id.integer);                       \
1480                 DEBUG_PRINT2 ("  Popping failure id: %u\n",             \
1481                               * (unsigned int *) &ffailure_id);         \
1482                                                                         \
1483                 /* If the saved string location is NULL, it             \
1484                    came from an on_failure_keep_string_jump opcode,     \
1485                    and we want to throw away the saved NULL, thus       \
1486                    retaining our current position in the string.  */    \
1487                 string_temp = (const unsigned char *)                   \
1488                         POP_FAILURE_POINTER ();                         \
1489                 if (string_temp != NULL)                                \
1490                         str = string_temp;                              \
1491                                                                         \
1492                 DEBUG_PRINT2("  Popping string %p: `", str);            \
1493                 DEBUG_PRINT_DOUBLE_STRING(str,                          \
1494                                           string1, size1,               \
1495                                           string2, size2);              \
1496                 DEBUG_PRINT1("'\n");                                    \
1497                                                                         \
1498                 pat = (unsigned char *) POP_FAILURE_POINTER();          \
1499                 DEBUG_PRINT2 ("  Popping pattern %p: ", pat);           \
1500                 DEBUG_PRINT_COMPILED_PATTERN(bufp, pat, pend);          \
1501                                                                         \
1502                 /* Restore register info.  */                           \
1503                 high_reg = (unsigned) POP_FAILURE_INT ();               \
1504                 DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg); \
1505                                                                         \
1506                 low_reg = (unsigned) POP_FAILURE_INT ();                \
1507                 DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg); \
1508                                                                         \
1509                 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) { \
1510                         DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg); \
1511                                                                         \
1512                         reg_info[this_reg].word = POP_FAILURE_ELT ();   \
1513                         DEBUG_PRINT2 ("      info: %p\n",               \
1514                                       &reg_info[this_reg]);             \
1515                                                                         \
1516                         regend[this_reg] = POP_FAILURE_POINTER ();      \
1517                         DEBUG_PRINT2 ("      end: %p\n",                \
1518                                       regend[this_reg]);                \
1519                                                                         \
1520                         regstart[this_reg] = POP_FAILURE_POINTER ();    \
1521                         DEBUG_PRINT2 ("      start: %p\n",              \
1522                                       regstart[this_reg]);              \
1523                 }                                                       \
1524                                                                         \
1525                 set_regs_matched_done = 0;                              \
1526                 DEBUG_STATEMENT (nfailure_points_popped++);             \
1527         } while (0)                     /* POP_FAILURE_POINT */
1528 \f
1529 /* Structure for per-register (a.k.a. per-group) information.
1530    Other register information, such as the
1531    starting and ending positions (which are addresses), and the list of
1532    inner groups (which is a bits list) are maintained in separate
1533    variables.
1534
1535    We are making a (strictly speaking) nonportable assumption here: that
1536    the compiler will pack our bit fields into something that fits into
1537    the type of `word', i.e., is something that fits into one item on the
1538    failure stack.  */
1539
1540 typedef union {
1541         fail_stack_elt_t word;
1542         struct {
1543                 /* This field is one if this group can match the empty string,
1544                    zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1545 #define MATCH_NULL_UNSET_VALUE 3
1546                 unsigned match_null_string_p:2;
1547                 unsigned is_active:1;
1548                 unsigned matched_something:1;
1549                 unsigned ever_matched_something:1;
1550         } bits;
1551 } register_info_type;
1552
1553 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1554 #define IS_ACTIVE(R)  ((R).bits.is_active)
1555 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1556 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1557
1558 /* Call this when have matched a real character; it sets `matched' flags
1559    for the subexpressions which we are currently inside.  Also records
1560    that those subexprs have matched.  */
1561 #define SET_REGS_MATCHED()                                              \
1562   do                                                                    \
1563     {                                                                   \
1564       if (!set_regs_matched_done)                                       \
1565         {                                                               \
1566           Element_count r;                                              \
1567           set_regs_matched_done = 1;                                    \
1568           for (r = lowest_active_reg; r <= highest_active_reg; r++)     \
1569             {                                                           \
1570               MATCHED_SOMETHING (reg_info[r])                           \
1571                 = EVER_MATCHED_SOMETHING (reg_info[r])                  \
1572                 = 1;                                                    \
1573             }                                                           \
1574         }                                                               \
1575     }                                                                   \
1576   while (0)
1577
1578 /* Registers are set to a sentinel when they haven't yet matched.  */
1579 static unsigned char reg_unset_dummy;
1580 #define REG_UNSET_VALUE (&reg_unset_dummy)
1581 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1582 \f
1583 /* Subroutine declarations and macros for regex_compile.  */
1584
1585 /* Fetch the next character in the uncompiled pattern---translating it
1586    if necessary.  Also cast from a signed character in the constant
1587    string passed to us by the user to an unsigned char that we can use
1588    as an array index (in, e.g., `translate').  */
1589 #define PATFETCH(c)                                                     \
1590   do {                                                                  \
1591     PATFETCH_RAW (c);                                                   \
1592     c = TRANSLATE (c);                                                  \
1593   } while (0)
1594
1595 /* Fetch the next character in the uncompiled pattern, with no
1596    translation.  */
1597 #define PATFETCH_RAW(c)                                                 \
1598   do {if (p == pend) return REG_EEND;                                   \
1599     assert (p < pend);                                                  \
1600     c = charptr_emchar (p);                                             \
1601     INC_CHARPTR (p);                                                    \
1602   } while (0)
1603
1604 /* Go backwards one character in the pattern.  */
1605 #define PATUNFETCH DEC_CHARPTR (p)
1606
1607 #ifdef MULE
1608
1609 #define PATFETCH_EXTENDED(emch)                                         \
1610   do {if (p == pend) return REG_EEND;                                   \
1611     assert (p < pend);                                                  \
1612     emch = charptr_emchar ((const Bufbyte *) p);                        \
1613     INC_CHARPTR (p);                                                    \
1614     if (TRANSLATE_P (translate) && emch < 0x80)                         \
1615       emch = (Emchar) (unsigned char) RE_TRANSLATE (emch);              \
1616   } while (0)
1617
1618 #define PATFETCH_RAW_EXTENDED(emch)                                     \
1619   do {if (p == pend) return REG_EEND;                                   \
1620     assert (p < pend);                                                  \
1621     emch = charptr_emchar ((const Bufbyte *) p);                        \
1622     INC_CHARPTR (p);                                                    \
1623   } while (0)
1624
1625 #define PATUNFETCH_EXTENDED DEC_CHARPTR (p)
1626
1627 #define PATFETCH_EITHER(emch)                   \
1628   do {                                          \
1629     if (has_extended_chars)                     \
1630       PATFETCH_EXTENDED (emch);                 \
1631     else                                        \
1632       PATFETCH (emch);                          \
1633   } while (0)
1634
1635 #define PATFETCH_RAW_EITHER(emch)               \
1636   do {                                          \
1637     if (has_extended_chars)                     \
1638       PATFETCH_RAW_EXTENDED (emch);             \
1639     else                                        \
1640       PATFETCH_RAW (emch);                      \
1641   } while (0)
1642
1643 #define PATUNFETCH_EITHER                       \
1644   do {                                          \
1645     if (has_extended_chars)                     \
1646       PATUNFETCH_EXTENDED (emch);               \
1647     else                                        \
1648       PATUNFETCH (emch);                        \
1649   } while (0)
1650
1651 #else                           /* not MULE */
1652
1653 #define PATFETCH_EITHER(emch) PATFETCH (emch)
1654 #define PATFETCH_RAW_EITHER(emch) PATFETCH_RAW (emch)
1655 #define PATUNFETCH_EITHER PATUNFETCH
1656
1657 #endif                          /* MULE */
1658
1659 /* If `translate' is non-null, return translate[D], else just D.  We
1660    cast the subscript to translate because some data is declared as
1661    `char *', to avoid warnings when a string constant is passed.  But
1662    when we use a character as a subscript we must make it unsigned.  */
1663 #define TRANSLATE(d) (TRANSLATE_P (translate) ? RE_TRANSLATE (d) : (d))
1664
1665 /* Macros for outputting the compiled pattern into `buffer'.  */
1666
1667 /* If the buffer isn't allocated when it comes in, use this.  */
1668 #define INIT_BUF_SIZE  32
1669
1670 /* Make sure we have at least N more bytes of space in buffer.  */
1671 #define GET_BUFFER_SPACE(n)                                             \
1672     while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated)  \
1673       EXTEND_BUFFER ()
1674
1675 /* Make sure we have one more byte of buffer space and then add C to it.  */
1676 #define BUF_PUSH(c)                                                     \
1677   do {                                                                  \
1678     GET_BUFFER_SPACE (1);                                               \
1679     *buf_end++ = (unsigned char) (c);                                   \
1680   } while (0)
1681
1682 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1683 #define BUF_PUSH_2(c1, c2)                                              \
1684   do {                                                                  \
1685     GET_BUFFER_SPACE (2);                                               \
1686     *buf_end++ = (unsigned char) (c1);                                  \
1687     *buf_end++ = (unsigned char) (c2);                                  \
1688   } while (0)
1689
1690 /* As with BUF_PUSH_2, except for three bytes.  */
1691 #define BUF_PUSH_3(c1, c2, c3)                                          \
1692   do {                                                                  \
1693     GET_BUFFER_SPACE (3);                                               \
1694     *buf_end++ = (unsigned char) (c1);                                  \
1695     *buf_end++ = (unsigned char) (c2);                                  \
1696     *buf_end++ = (unsigned char) (c3);                                  \
1697   } while (0)
1698
1699 /* Store a jump with opcode OP at LOC to location TO.  We store a
1700    relative address offset by the three bytes the jump itself occupies.  */
1701 #define STORE_JUMP(op, loc, to) \
1702   store_op1 (op, loc, (to) - (loc) - 3)
1703
1704 /* Likewise, for a two-argument jump.  */
1705 #define STORE_JUMP2(op, loc, to, arg) \
1706   store_op2 (op, loc, (to) - (loc) - 3, arg)
1707
1708 /* Like `STORE_JUMP', but for inserting.  Assume `buf_end' is the
1709    buffer end.  */
1710 #define INSERT_JUMP(op, loc, to) \
1711   insert_op1 (op, loc, (to) - (loc) - 3, buf_end)
1712
1713 /* Like `STORE_JUMP2', but for inserting.  Assume `buf_end' is the
1714    buffer end.  */
1715 #define INSERT_JUMP2(op, loc, to, arg) \
1716   insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end)
1717
1718 /* This is not an arbitrary limit: the arguments which represent offsets
1719    into the pattern are two bytes long.  So if 2^16 bytes turns out to
1720    be too small, many things would have to change.  */
1721 #define MAX_BUF_SIZE (1L << 16)
1722
1723 /* Extend the buffer by twice its current size via realloc and
1724    reset the pointers that pointed into the old block to point to the
1725    correct places in the new one.  If extending the buffer results in it
1726    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1727 #define EXTEND_BUFFER()                                                 \
1728         do {                                                            \
1729                 re_char *old_buffer = bufp->buffer;                     \
1730                 if (bufp->allocated == MAX_BUF_SIZE)                    \
1731                         return REG_ESIZE;                               \
1732                 bufp->allocated <<= 1;                                  \
1733                 if (bufp->allocated > MAX_BUF_SIZE)                     \
1734                         bufp->allocated = MAX_BUF_SIZE;                 \
1735                 bufp->buffer = (unsigned char *)xrealloc(               \
1736                         bufp->buffer, bufp->allocated);                 \
1737                 if (bufp->buffer == NULL) {                             \
1738                         return REG_ESPACE;                              \
1739                 }                                                       \
1740                 /* If the buffer moved, move all the pointers into it.  */ \
1741                 if (old_buffer != bufp->buffer) {                       \
1742                         buf_end = (buf_end - old_buffer) + bufp->buffer; \
1743                         begalt = (begalt - old_buffer) + bufp->buffer;  \
1744                         if (fixup_alt_jump) {                           \
1745                                 fixup_alt_jump =                        \
1746                                         (fixup_alt_jump - old_buffer) + \
1747                                         bufp->buffer;                   \
1748                         }                                               \
1749                         if (laststart) {                                \
1750                                 laststart = (laststart - old_buffer) +  \
1751                                         bufp->buffer;                   \
1752                         }                                               \
1753                         if (pending_exact) {                            \
1754                                 pending_exact =                         \
1755                                         (pending_exact - old_buffer) +  \
1756                                         bufp->buffer;                   \
1757                         }                                               \
1758                 }                                                       \
1759         } while (0)
1760
1761 /* Since we have one byte reserved for the register number argument to
1762    {start,stop}_memory, the maximum number of groups we can report
1763    things about is what fits in that byte.  */
1764 #define MAX_REGNUM 255
1765
1766 /* But patterns can have more than `MAX_REGNUM' registers.  We just
1767    ignore the excess.  */
1768 typedef unsigned regnum_t;
1769
1770 #define INIT_REG_TRANSLATE_SIZE 5
1771
1772 /* Macros for the compile stack.  */
1773
1774 /* Since offsets can go either forwards or backwards, this type needs to
1775    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1776 typedef int pattern_offset_t;
1777
1778 typedef struct {
1779         pattern_offset_t begalt_offset;
1780         pattern_offset_t fixup_alt_jump;
1781         pattern_offset_t inner_group_offset;
1782         pattern_offset_t laststart_offset;
1783         regnum_t regnum;
1784 } compile_stack_elt_t;
1785
1786 typedef struct {
1787         compile_stack_elt_t *stack;
1788         unsigned size;
1789         unsigned avail;         /* Offset of next open position.  */
1790 } compile_stack_type;
1791
1792 #define INIT_COMPILE_STACK_SIZE 32
1793
1794 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1795 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1796
1797 /* The next available element.  */
1798 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1799
1800 /* Set the bit for character C in a bit vector.  */
1801 #define SET_LIST_BIT(c)                         \
1802   (buf_end[((unsigned char) (c)) / BYTEWIDTH]   \
1803    |= 1 << (((unsigned char) c) % BYTEWIDTH))
1804
1805 #ifdef MULE
1806
1807 /* Set the "bit" for character C in a range table. */
1808 #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt)
1809
1810 /* Set the "bit" for character c in the appropriate table. */
1811 #define SET_EITHER_BIT(c)                       \
1812         do {                                    \
1813                 if (has_extended_chars)         \
1814                         SET_RANGETAB_BIT (c);   \
1815                 else                            \
1816                         SET_LIST_BIT (c);       \
1817         } while (0)
1818
1819 #else                           /* not MULE */
1820
1821 #define SET_EITHER_BIT(c) SET_LIST_BIT (c)
1822
1823 #endif
1824
1825 /* Get the next unsigned number in the uncompiled pattern.  */
1826 #define GET_UNSIGNED_NUMBER(num)                                        \
1827   { if (p != pend)                                                      \
1828      {                                                                  \
1829        PATFETCH (c);                                                    \
1830        while (ISDIGIT (c))                                              \
1831          {                                                              \
1832            if (num < 0)                                                 \
1833               num = 0;                                                  \
1834            num = num * 10 + c - '0';                                    \
1835            if (p == pend)                                               \
1836               break;                                                    \
1837            PATFETCH (c);                                                \
1838          }                                                              \
1839        }                                                                \
1840     }
1841
1842 #define CHAR_CLASS_MAX_LENGTH  9        /* Namely, `multibyte'.  */
1843
1844 \f
1845 static void store_op1(re_opcode_t op, unsigned char *loc, int arg);
1846 static void store_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2);
1847 static void insert_op1(re_opcode_t op, unsigned char *loc, int arg,
1848                        unsigned char *end);
1849 static void insert_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2,
1850                        unsigned char *end);
1851 static re_bool at_begline_loc_p(re_char*, re_char*, reg_syntax_t);
1852 static re_bool at_endline_loc_p(re_char*, re_char*, int syntax);
1853 static re_bool group_in_compile_stack(compile_stack_type compile_stack,
1854                                       regnum_t regnum);
1855 static reg_errcode_t compile_range(re_char ** p_ptr, re_char * pend,
1856                                    RE_TRANSLATE_TYPE translate,
1857                                    reg_syntax_t syntax, unsigned char *b);
1858 #ifdef MULE
1859 static reg_errcode_t compile_extended_range(re_char ** p_ptr,
1860                                             re_char * pend,
1861                                             RE_TRANSLATE_TYPE translate,
1862                                             reg_syntax_t syntax,
1863                                             Lisp_Object rtab);
1864 #endif                          /* MULE */
1865 static re_bool group_match_null_string_p(unsigned char **p,
1866                                          unsigned char *end,
1867                                          register_info_type * reg_info);
1868 static re_bool alt_match_null_string_p(unsigned char *p, unsigned char *end,
1869                                        register_info_type * reg_info);
1870 static re_bool common_op_match_null_string_p(unsigned char **p,
1871                                              unsigned char *end,
1872                                              register_info_type * reg_info);
1873 static int bcmp_translate(const unsigned char *s1, const unsigned char *s2,
1874                           REGISTER int len, RE_TRANSLATE_TYPE translate);
1875 static int re_match_2_internal(struct re_pattern_buffer *bufp,
1876                                re_char * string1, int size1,
1877                                re_char * string2, int size2, int pos,
1878                                struct re_registers *regs, int stop);
1879 \f
1880 #ifndef MATCH_MAY_ALLOCATE
1881
1882 /* If we cannot allocate large objects within re_match_2_internal,
1883    we make the fail stack and register vectors global.
1884    The fail stack, we grow to the maximum size when a regexp
1885    is compiled.
1886    The register vectors, we adjust in size each time we
1887    compile a regexp, according to the number of registers it needs.  */
1888
1889 static fail_stack_type fail_stack;
1890
1891 /* Size with which the following vectors are currently allocated.
1892    That is so we can make them bigger as needed,
1893    but never make them smaller.  */
1894 static int regs_allocated_size;
1895
1896 static re_char **regstart, **regend;
1897 static re_char **old_regstart, **old_regend;
1898 static re_char **best_regstart, **best_regend;
1899 static register_info_type *reg_info;
1900 static re_char **reg_dummy;
1901 static register_info_type *reg_info_dummy;
1902
1903 /* Make the register vectors big enough for NUM_REGS registers,
1904    but don't make them smaller.  */
1905
1906 static void regex_grow_registers(int num_regs)
1907 {
1908         if (num_regs > regs_allocated_size) {
1909                 RETALLOC_IF(regstart, num_regs, re_char *);
1910                 RETALLOC_IF(regend, num_regs, re_char *);
1911                 RETALLOC_IF(old_regstart, num_regs, re_char *);
1912                 RETALLOC_IF(old_regend, num_regs, re_char *);
1913                 RETALLOC_IF(best_regstart, num_regs, re_char *);
1914                 RETALLOC_IF(best_regend, num_regs, re_char *);
1915                 RETALLOC_IF(reg_info, num_regs, register_info_type);
1916                 RETALLOC_IF(reg_dummy, num_regs, re_char *);
1917                 RETALLOC_IF(reg_info_dummy, num_regs, register_info_type);
1918
1919                 regs_allocated_size = num_regs;
1920         }
1921 }
1922
1923 #endif                          /* not MATCH_MAY_ALLOCATE */
1924
1925 /* auxiliary stuff, FSF theft */
1926 /* Character classes.  */
1927 typedef enum {
1928         RECC_ERROR = 0,
1929         RECC_ALNUM, RECC_ALPHA, RECC_WORD,
1930         RECC_GRAPH, RECC_PRINT,
1931         RECC_LOWER, RECC_UPPER,
1932         RECC_PUNCT, RECC_CNTRL,
1933         RECC_DIGIT, RECC_XDIGIT,
1934         RECC_BLANK, RECC_SPACE,
1935         RECC_MULTIBYTE, RECC_NONASCII,
1936         RECC_ASCII, RECC_UNIBYTE
1937 } re_wctype_t;
1938
1939 /* Map a string to the char class it names (if any).  */
1940 static inline re_wctype_t
1941 re_wctype(re_char *str)
1942 {
1943         const char *string = (const char*)str;
1944
1945         if (STREQ (string, "alnum")) {
1946                 return RECC_ALNUM;
1947         } else if (STREQ (string, "alpha")) {
1948                 return RECC_ALPHA;
1949         } else if (STREQ (string, "digit")) {
1950                 return RECC_DIGIT;
1951         } else if (STREQ (string, "graph")) {
1952                 return RECC_GRAPH;
1953         } else if (STREQ (string, "space")) {
1954                 return RECC_SPACE;
1955         } else if (STREQ (string, "upper")) {
1956                 return RECC_UPPER;
1957         } else if (STREQ (string, "lower")) {
1958                 return RECC_LOWER;
1959         } else if (STREQ (string, "word")) {
1960                 return RECC_WORD;
1961         } else if (STREQ (string, "print")) {
1962                 return RECC_PRINT;
1963         } else if (STREQ (string, "punct")) {
1964                 return RECC_PUNCT;
1965         } else if (STREQ (string, "xdigit")) {
1966                 return RECC_XDIGIT;
1967         } else if (STREQ (string, "cntrl")) {
1968                 return RECC_CNTRL;
1969         } else if (STREQ (string, "blank")) {
1970                 return RECC_BLANK;
1971         } else if (STREQ (string, "ascii")) {
1972                 return RECC_ASCII;
1973         } else if (STREQ (string, "nonascii")) {
1974                 return RECC_NONASCII;
1975         } else if (STREQ (string, "unibyte")) {
1976                 return RECC_UNIBYTE;
1977         } else if (STREQ (string, "multibyte")) {
1978                 return RECC_MULTIBYTE;
1979         } else {
1980                 return RECC_ERROR;
1981         }
1982 }
1983
1984 /* True if CH is in the char class CC.  */
1985 static inline bool
1986 re_iswctype(int ch, re_wctype_t cc)
1987 {
1988         switch (cc) {
1989         case RECC_ALNUM:
1990                 return ISALNUM (ch);
1991         case RECC_ALPHA:
1992                 return ISALPHA (ch);
1993         case RECC_BLANK:
1994                 return ISBLANK (ch);
1995         case RECC_CNTRL:
1996                 return ISCNTRL (ch);
1997         case RECC_DIGIT:
1998                 return ISDIGIT (ch);
1999         case RECC_GRAPH:
2000                 return ISGRAPH (ch);
2001         case RECC_LOWER:
2002                 return ISLOWER (ch);
2003         case RECC_PRINT:
2004                 return ISPRINT (ch);
2005         case RECC_PUNCT:
2006                 return ISPUNCT (ch);
2007         case RECC_SPACE:
2008                 return ISSPACE (ch);
2009         case RECC_UPPER:
2010                 return ISUPPER (ch);
2011         case RECC_XDIGIT:
2012                 return ISXDIGIT (ch);
2013         case RECC_ASCII:
2014                 return IS_REAL_ASCII (ch);
2015         case RECC_NONASCII:
2016                 return !IS_REAL_ASCII (ch);
2017         case RECC_WORD:
2018                 return ISWORD(ch);
2019         case RECC_UNIBYTE:
2020                 return ISUNIBYTE((unsigned int)ch);
2021         case RECC_MULTIBYTE:
2022                 return !ISUNIBYTE((unsigned int)ch);
2023         case RECC_ERROR:
2024                 return false;
2025         default:
2026                 abort();
2027         }
2028         return false;
2029 }
2030
2031 \f
2032 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
2033    Returns one of error codes defined in `regex.h', or zero for success.
2034
2035    Assumes the `allocated' (and perhaps `buffer') and `translate'
2036    fields are set in BUFP on entry.
2037
2038    If it succeeds, results are put in BUFP (if it returns an error, the
2039    contents of BUFP are undefined):
2040      `buffer' is the compiled pattern;
2041      `syntax' is set to SYNTAX;
2042      `used' is set to the length of the compiled pattern;
2043      `fastmap_accurate' is zero;
2044      `re_ngroups' is the number of groups/subexpressions (including shy
2045         groups) in PATTERN;
2046      `re_nsub' is the number of non-shy groups in PATTERN;
2047      `not_bol' and `not_eol' are zero;
2048
2049    The `fastmap' and `newline_anchor' fields are neither
2050    examined nor set.  */
2051
2052 static reg_errcode_t
2053 regex_compile(re_char * pattern, int size, reg_syntax_t syntax,
2054               struct re_pattern_buffer *bufp)
2055 {
2056         /* We fetch characters from PATTERN here.  We declare these as int
2057            (or possibly long) so that chars above 127 can be used as
2058            array indices.  The macros that fetch a character from the pattern
2059            make sure to coerce to unsigned char before assigning, so we won't
2060            get bitten by negative numbers here. */
2061         /* XEmacs change: used to be unsigned char. */
2062         REGISTER EMACS_INT c, c1;
2063
2064         /* A random temporary spot in PATTERN.  */
2065         re_char *p1;
2066
2067         /* Points to the end of the buffer, where we should append.  */
2068         REGISTER unsigned char *buf_end;
2069
2070         /* Keeps track of unclosed groups.  */
2071         compile_stack_type compile_stack;
2072
2073         /* Points to the current (ending) position in the pattern.  */
2074         re_char *p = pattern;
2075         re_char *pend = pattern + size;
2076
2077         /* How to translate the characters in the pattern.  */
2078         RE_TRANSLATE_TYPE translate = bufp->translate;
2079
2080         /* Address of the count-byte of the most recently inserted `exactn'
2081            command.  This makes it possible to tell if a new exact-match
2082            character can be added to that command or if the character requires
2083            a new `exactn' command.  */
2084         unsigned char *pending_exact = 0;
2085
2086         /* Address of start of the most recently finished expression.
2087            This tells, e.g., postfix * where to find the start of its
2088            operand.  Reset at the beginning of groups and alternatives.  */
2089         unsigned char *laststart = 0;
2090
2091         /* Address of beginning of regexp, or inside of last group.  */
2092         unsigned char *begalt;
2093
2094         /* Place in the uncompiled pattern (i.e., the {) to
2095            which to go back if the interval is invalid.  */
2096         re_char *beg_interval;
2097
2098         /* Address of the place where a forward jump should go to the end of
2099            the containing expression.  Each alternative of an `or' -- except the
2100            last -- ends with a forward jump of this sort.  */
2101         unsigned char *fixup_alt_jump = 0;
2102
2103         /* Counts open-groups as they are encountered.  Remembered for the
2104            matching close-group on the compile stack, so the same register
2105            number is put in the stop_memory as the start_memory.  */
2106         regnum_t regnum = 0;
2107
2108         /* we need to close over compile_stack */
2109 #       define free_stack(args...)      xfree(compile_stack.stack)
2110
2111 #ifdef REGEX_DEBUG_FLAG
2112         DEBUG_PRINT1("\nCompiling pattern: ");
2113         if (debug) {
2114                 int debug_count;
2115
2116                 for (debug_count = 0; debug_count < size; debug_count++)
2117                         putchar(pattern[debug_count]);
2118                 putchar('\n');
2119         }
2120 #endif                          /* DEBUG */
2121
2122         /* Initialize the compile stack.  */
2123         compile_stack.stack =
2124                 TALLOC(INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2125         if (compile_stack.stack == NULL) {
2126                 return REG_ESPACE;
2127         }
2128         compile_stack.size = INIT_COMPILE_STACK_SIZE;
2129         compile_stack.avail = 0;
2130
2131         /* Initialize the pattern buffer.  */
2132         bufp->syntax = syntax;
2133         bufp->fastmap_accurate = 0;
2134         bufp->not_bol = bufp->not_eol = 0;
2135
2136         /* Set `used' to zero, so that if we return an error, the pattern
2137            printer (for debugging) will think there's no pattern.  We reset it
2138            at the end.  */
2139         bufp->used = 0;
2140
2141         /* Always count groups, whether or not bufp->no_sub is set.  */
2142         bufp->re_nsub = 0;
2143         bufp->re_ngroups = 0;
2144
2145         /* Allocate index translation array if needed. */
2146         if (bufp->external_to_internal_register == 0) {
2147                 bufp->external_to_internal_register_size =
2148                     INIT_REG_TRANSLATE_SIZE;
2149                 RETALLOC(bufp->external_to_internal_register,
2150                          bufp->external_to_internal_register_size, int);
2151         }
2152         /* Initialize translations to impossible value to aid debugging. */
2153         {
2154                 int i;
2155
2156                 bufp->external_to_internal_register[0] = 0;
2157                 for (i = 1; i < bufp->external_to_internal_register_size; i++)
2158                         bufp->external_to_internal_register[i] =
2159                             (int)0xDEADBEEF;
2160         }
2161
2162 #if !defined (emacs) && !defined (SYNTAX_TABLE)
2163         /* Initialize the syntax table.  */
2164         init_syntax_once();
2165 #endif
2166
2167         if (bufp->allocated == 0) {
2168                 if (bufp->buffer) {
2169                         /* If zero allocated, but buffer is non-null, try to
2170                            realloc enough space.  This loses if buffer's address
2171                            is bogus, but that is the user's responsibility.  */
2172                         RETALLOC(bufp->buffer, INIT_BUF_SIZE, unsigned char);
2173                 } else {
2174                         /* Caller did not allocate a buffer.  Do it for them. */
2175                         bufp->buffer = TALLOC(INIT_BUF_SIZE, unsigned char);
2176                 }
2177                 if (!bufp->buffer) {
2178                         free_stack();
2179                         return REG_ESPACE;
2180                 }
2181
2182                 bufp->allocated = INIT_BUF_SIZE;
2183         }
2184
2185         begalt = buf_end = bufp->buffer;
2186
2187         /* Loop through the uncompiled pattern until we're at the end.  */
2188         while (p != pend) {
2189                 PATFETCH(c);
2190
2191                 switch (c) {
2192                 case '^': {
2193                         if (    /* If at start of pattern, it's an operator.  */
2194                                 p == pattern + 1
2195                                 /* If context independent, it's an operator.  */
2196                                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2197                                 /* Otherwise, depends on what's come before.  */
2198                                 || at_begline_loc_p(pattern, p, syntax))
2199                                 BUF_PUSH(begline);
2200                         else
2201                                 goto normal_char;
2202                 }
2203                         break;
2204
2205                 case '$': {
2206                         if (    /* If at end of pattern, it's an
2207                                    operator.  */
2208                                 p == pend
2209                                 /* If context independent, it's an
2210                                    operator.  */
2211                                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2212                                 /* Otherwise, depends on what's
2213                                    next.  */
2214                                 || at_endline_loc_p(p, pend, syntax))
2215                                 BUF_PUSH(endline);
2216                         else
2217                                 goto normal_char;
2218                 }
2219                         break;
2220
2221                 case '+':
2222                 case '?':
2223                         if ((syntax & RE_BK_PLUS_QM) ||
2224                             (syntax & RE_LIMITED_OPS)) {
2225                                 goto normal_char;
2226                         }
2227                         /* pardon? */
2228                 handle_plus:
2229                 case '*': {
2230                         re_bool zero_times_ok;
2231                         re_bool many_times_ok;
2232                         re_bool minimal;
2233
2234                         /* If there is no previous pattern... */
2235                         if (!laststart) {
2236                                 if (syntax & RE_CONTEXT_INVALID_OPS) {
2237                                         free_stack();
2238                                         return REG_BADRPT;
2239                                 } else if (!(syntax & RE_CONTEXT_INDEP_OPS)) {
2240                                         goto normal_char;
2241                                 }
2242                         }
2243
2244                         /* true means zero/many matches are allowed. */
2245                         zero_times_ok = c != '+';
2246                         many_times_ok = c != '?';
2247
2248                         /* true means match shortest string possible. */
2249                         minimal = false;
2250
2251                         /* If there is a sequence of repetition chars, collapse it
2252                            down to just one (the right one).  We can't combine
2253                            interval operators with these because of, e.g., `a{2}*',
2254                            which should only match an even number of `a's.  */
2255                         while (p != pend) {
2256                                 PATFETCH(c);
2257
2258                                 if (c == '*'
2259                                     || (!(syntax & RE_BK_PLUS_QM)
2260                                         && (c == '+' || c == '?'))) ;
2261
2262                                 else if (syntax & RE_BK_PLUS_QM
2263                                          && c == '\\') {
2264                                         if (p == pend) {
2265                                                 free_stack();
2266                                                 return REG_EESCAPE;
2267                                         }
2268
2269                                         PATFETCH(c1);
2270                                         if (!(c1 == '+' || c1 == '?')) {
2271                                                 PATUNFETCH;
2272                                                 PATUNFETCH;
2273                                                 break;
2274                                         }
2275
2276                                         c = c1;
2277                                 } else {
2278                                         PATUNFETCH;
2279                                         break;
2280                                 }
2281
2282                                 /* If we get here, we found another repeat
2283                                    character.  */
2284                                 if (!(syntax & RE_NO_MINIMAL_MATCHING)) {
2285                                         /* "*?" and "+?" and "??" are okay (and
2286                                            mean match minimally), but other
2287                                            sequences (such as "*??" and "+++")
2288                                            are rejected (reserved for future
2289                                            use). */
2290                                         if (minimal || c != '?') {
2291                                                 free_stack();
2292                                                 return REG_BADRPT;
2293                                         }
2294                                         minimal = true;
2295                                 } else {
2296                                         zero_times_ok |= c != '+';
2297                                         many_times_ok |= c != '?';
2298                                 }
2299                         }
2300
2301                         /* Star, etc. applied to an empty pattern is equivalent
2302                            to an empty pattern.  */
2303                         if (!laststart) {
2304                                 break;
2305                         }
2306
2307                         /* Now we know whether zero matches is allowed
2308                            and whether two or more matches is allowed
2309                            and whether we want minimal or maximal matching. */
2310                         if (minimal) {
2311                                 if (!many_times_ok) {
2312                                         /* "a??" becomes:
2313                                            0: /on_failure_jump to 6
2314                                            3: /jump to 9
2315                                            6: /exactn/1/A
2316                                            9: end of pattern.
2317                                         */
2318                                         GET_BUFFER_SPACE(6);
2319                                         INSERT_JUMP(jump, laststart,
2320                                                     buf_end + 3);
2321                                         buf_end += 3;
2322                                         INSERT_JUMP(on_failure_jump,
2323                                                     laststart,
2324                                                     laststart + 6);
2325                                         buf_end += 3;
2326                                 } else if (zero_times_ok) {
2327                                         /* "a*?" becomes:
2328                                            0: /jump to 6
2329                                            3: /exactn/1/A
2330                                            6: /on_failure_jump to 3
2331                                            9: end of pattern.
2332                                         */
2333                                         GET_BUFFER_SPACE(6);
2334                                         INSERT_JUMP(jump, laststart,
2335                                                     buf_end + 3);
2336                                         buf_end += 3;
2337                                         STORE_JUMP(on_failure_jump,
2338                                                    buf_end,
2339                                                    laststart + 3);
2340                                         buf_end += 3;
2341                                 } else {
2342                                         /* "a+?" becomes:
2343                                            0: /exactn/1/A
2344                                            3: /on_failure_jump to 0
2345                                            6: end of pattern.
2346                                         */
2347                                         GET_BUFFER_SPACE(3);
2348                                         STORE_JUMP(on_failure_jump,
2349                                                    buf_end, laststart);
2350                                         buf_end += 3;
2351                                 }
2352                         } else {
2353                                 /* Are we optimizing this jump?  */
2354                                 re_bool keep_string_p = false;
2355
2356                                 if (many_times_ok) {
2357                                         /* More than one repetition is allowed,
2358                                            so put in at the end a backward
2359                                            relative jump from `buf_end' to
2360                                            before the next jump we're going to
2361                                            put in below (which jumps from
2362                                            laststart to after this jump).
2363
2364                                            But if we are at the `*' in the exact
2365                                            sequence `.*\n', insert an
2366                                            unconditional jump backwards to the
2367                                            ., instead of the beginning of the
2368                                            loop.  This way we only push a
2369                                            failure point once, instead of every
2370                                            time through the loop.  */
2371                                         assert(p - 1 > pattern);
2372
2373                                         /* Allocate the space for the jump.  */
2374                                         GET_BUFFER_SPACE(3);
2375
2376                                         /* We know we are not at the first
2377                                            character of the pattern, because
2378                                            laststart was nonzero.  And we've
2379                                            already incremented `p', by the way,
2380                                            to be the character after the `*'.
2381                                            Do we have to do something analogous
2382                                            here for null bytes, because of
2383                                            RE_DOT_NOT_NULL? */
2384                                         if (*(p - 2) == '.' &&
2385                                             zero_times_ok &&
2386                                             p < pend &&
2387                                             *p == '\n' &&
2388                                             !(syntax & RE_DOT_NEWLINE)) {
2389                                                 /* We have .*\n.  */
2390                                                 STORE_JUMP(jump,
2391                                                            buf_end,
2392                                                            laststart);
2393                                                 keep_string_p = true;
2394                                         } else {
2395                                                 /* Anything else.  */
2396                                                 STORE_JUMP
2397                                                         (maybe_pop_jump,
2398                                                          buf_end,
2399                                                          laststart - 3);
2400                                         }
2401                                         /* We've added more stuff to the
2402                                            buffer.  */
2403                                         buf_end += 3;
2404                                 }
2405
2406                                 /* On failure, jump from laststart to buf_end +
2407                                    3, which will be the end of the buffer after
2408                                    this jump is inserted.  */
2409                                 GET_BUFFER_SPACE(3);
2410                                 INSERT_JUMP(keep_string_p ?
2411                                             on_failure_keep_string_jump
2412                                             : on_failure_jump,
2413                                             laststart, buf_end + 3);
2414                                 buf_end += 3;
2415
2416                                 if (!zero_times_ok) {
2417                                         /* At least one repetition is required,
2418                                            so insert a `dummy_failure_jump'
2419                                            before the initial `on_failure_jump'
2420                                            instruction of the loop. This effects
2421                                            a skip over that instruction the
2422                                            first time we hit that loop.  */
2423                                         GET_BUFFER_SPACE(3);
2424                                         INSERT_JUMP(dummy_failure_jump,
2425                                                     laststart,
2426                                                     laststart + 6);
2427                                         buf_end += 3;
2428                                 }
2429                         }
2430                         pending_exact = 0;
2431                 }
2432                         break;
2433
2434                 case '.':
2435                         laststart = buf_end;
2436                         BUF_PUSH(anychar);
2437                         break;
2438
2439                 case '[': {
2440                         /* XEmacs change: this whole section */
2441                         re_bool had_char_class = false;
2442 #ifdef MULE
2443                         re_bool has_extended_chars = false;
2444                         REGISTER Lisp_Object rtab = Qnil;
2445 #endif
2446
2447                         if (p == pend) {
2448                                 free_stack();
2449                                 return REG_EBRACK;
2450                         }
2451
2452                         /* Ensure that we have enough space to push a charset:
2453                            the opcode, the length count, and the bitset; 34
2454                            bytes in all.  */
2455                         GET_BUFFER_SPACE(34);
2456
2457                         laststart = buf_end;
2458
2459                         /* We test `*p == '^' twice, instead of using an if
2460                            statement, so we only need one BUF_PUSH.  */
2461                         BUF_PUSH(*p == '^' ? charset_not : charset);
2462                         if (*p == '^')
2463                                 p++;
2464
2465                         /* Remember the first position in the bracket
2466                            expression.  */
2467                         p1 = p;
2468
2469                         /* Push the number of bytes in the bitmap.  */
2470                         BUF_PUSH((1 << BYTEWIDTH) / BYTEWIDTH);
2471
2472                         /* Clear the whole map.  */
2473                         memset(buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
2474
2475                         /* charset_not matches newline according to a syntax
2476                            bit.  */
2477                         if ((re_opcode_t) buf_end[-2] == charset_not
2478                             && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2479                                 SET_LIST_BIT('\n');
2480
2481 #ifdef MULE
2482                         start_over_with_extended:
2483                         if (has_extended_chars) {
2484                                 /* There are extended chars here, which means we
2485                                    need to start over and shift to unified
2486                                    range-table format. */
2487                                 if (buf_end[-2] == charset)
2488                                         buf_end[-2] = charset_mule;
2489                                 else
2490                                         buf_end[-2] = charset_mule_not;
2491                                 buf_end--;
2492                                 p = p1;
2493                                 /* go back to the beginning of the charset,
2494                                    after a possible ^. */
2495                                 rtab = Vthe_lisp_rangetab;
2496                                 Fclear_range_table(rtab);
2497
2498                                 /* charset_not matches newline according to a
2499                                    syntax bit.  */
2500                                 if ((re_opcode_t) buf_end[-1] ==
2501                                     charset_mule_not
2502                                     && (syntax &
2503                                         RE_HAT_LISTS_NOT_NEWLINE))
2504                                         SET_EITHER_BIT('\n');
2505                         }
2506 #endif                          /* MULE */
2507
2508                                 /* Read in characters and ranges, setting map bits.  */
2509                         for (;;) {
2510                                 if (p == pend) {
2511                                         free_stack();
2512                                         return REG_EBRACK;
2513                                 }
2514                                 PATFETCH(c);
2515
2516 #ifdef MULE
2517                                 if (c >= 0x80 && !has_extended_chars) {
2518                                         has_extended_chars = 1;
2519                                         /* Frumble-bumble, we've found some
2520                                            extended chars.  Need to start over,
2521                                            process everything using the general
2522                                            extended-char mechanism, and need to
2523                                            use charset_mule and charset_mule_not
2524                                            instead of charset and
2525                                            charset_not. */
2526                                         goto start_over_with_extended;
2527                                 }
2528 #endif                          /* MULE */
2529                                 /* \ might escape characters inside [...] and
2530                                    [^...].  */
2531                                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) &&
2532                                     c == '\\') {
2533                                         if (p == pend) {
2534                                                 free_stack();
2535                                                 return REG_EESCAPE;
2536                                         }
2537
2538                                         PATFETCH(c1);
2539 #ifdef MULE
2540                                         if (c1 >= 0x80
2541                                             && !has_extended_chars) {
2542                                                 has_extended_chars = 1;
2543                                                 goto start_over_with_extended;
2544                                         }
2545 #endif                          /* MULE */
2546                                         SET_EITHER_BIT(c1);
2547                                         continue;
2548                                 }
2549
2550                                 /* Could be the end of the bracket
2551                                    expression.  If it's not (i.e., when
2552                                    the bracket expression is `[]' so
2553                                    far), the ']' character bit gets set
2554                                    way below.  */
2555                                 if (c == ']' && p != p1 + 1)
2556                                         break;
2557
2558                                 /* Look ahead to see if it's a range
2559                                    when the last thing was a character
2560                                    class.  */
2561                                 if (had_char_class && c == '-'
2562                                     && *p != ']') {
2563                                         free_stack();
2564                                         return REG_ERANGE;
2565                                 }
2566
2567                                 /* Look ahead to see if it's a range
2568                                    when the last thing was a character:
2569                                    if this is a hyphen not at the
2570                                    beginning or the end of a list, then
2571                                    it's the range operator.  */
2572                                 if (c == '-'
2573                                     && !(p - 2 >= pattern
2574                                          && p[-2] == '[')
2575                                     && !(p - 3 >= pattern
2576                                          && p[-3] == '['
2577                                          && p[-2] == '^')
2578                                     && *p != ']') {
2579                                         reg_errcode_t ret;
2580
2581 #ifdef MULE
2582                                         if (*(const unsigned char *)p >= 0x80
2583                                             && !has_extended_chars) {
2584                                                 has_extended_chars = 1;
2585                                                 goto start_over_with_extended;
2586                                         }
2587                                         if (has_extended_chars)
2588                                                 ret = compile_extended_range
2589                                                         (&p, pend, translate,
2590                                                          syntax, rtab);
2591                                         else
2592 #endif                          /* MULE */
2593                                                 ret = compile_range
2594                                                         (&p, pend, translate,
2595                                                          syntax, buf_end);
2596                                         if (ret != REG_NOERROR) {
2597                                                 free_stack();
2598                                                 return ret;
2599                                         }
2600                                 }
2601
2602                                 else if (p[0] == '-' && p[1] != ']') {
2603                                         /* This handles ranges made up of
2604                                            characters only.  */
2605                                         reg_errcode_t ret;
2606
2607                                         /* Move past the `-'.  */
2608                                         PATFETCH(c1);
2609
2610 #ifdef MULE
2611                                         if (*(const unsigned char*)p >= 0x80
2612                                             && !has_extended_chars) {
2613                                                 has_extended_chars = 1;
2614                                                 goto start_over_with_extended;
2615                                         }
2616                                         if (has_extended_chars)
2617                                                 ret = compile_extended_range(
2618                                                         &p, pend, translate,
2619                                                         syntax, rtab);
2620                                         else
2621 #endif                          /* MULE */
2622                                                 ret = compile_range(
2623                                                         &p, pend, translate,
2624                                                         syntax, buf_end);
2625                                         if (ret != REG_NOERROR) {
2626                                                 free_stack();
2627                                                 return ret;
2628                                         }
2629                                 }
2630
2631                                 /* See if we're at the beginning of a possible
2632                                    character class.  */
2633
2634                                 else if (syntax & RE_CHAR_CLASSES &&
2635                                          c == '[' && *p == ':') {
2636                                         /* Leave room for the null.  */
2637                                         char str[CHAR_CLASS_MAX_LENGTH + 1];
2638
2639                                         PATFETCH(c);
2640                                         c1 = 0;
2641
2642                                         /* If pattern is `[[:'.  */
2643                                         if (p == pend) {
2644                                                 free_stack();
2645                                                 return REG_EBRACK;
2646                                         }
2647                                         for (;;) {
2648                                                 /* #### This code is unused.
2649                                                    Correctness is not checked
2650                                                    after TRT table change.  */
2651                                                 PATFETCH(c);
2652                                                 if (c == ':' || c == ']'
2653                                                     || p == pend
2654                                                     || c1 ==
2655                                                     CHAR_CLASS_MAX_LENGTH)
2656                                                         break;
2657                                                 str[c1++] = (char)c;
2658                                         }
2659                                         str[c1] = '\0';
2660
2661                                         /* If isn't a word bracketed by
2662                                            `[:' and `:]': undo the
2663                                            ending character, the
2664                                            letters, and leave the
2665                                            leading `:' and `[' (but set
2666                                            bits for them).  */
2667                                         if (c == ':' && *p == ']') {
2668                                                 re_wctype_t wct =
2669                                                         re_wctype(
2670                                                                 (unsigned char*)
2671                                                                 str);
2672
2673                                                 if (wct == RECC_ERROR) {
2674                                                         free_stack();
2675                                                         return REG_ECTYPE;
2676                                                 }
2677
2678                                                 /* Throw away the ] at
2679                                                    the end of the
2680                                                    character class.  */
2681                                                 PATFETCH(c);
2682
2683                                                 if (p == pend) {
2684                                                         free_stack();
2685                                                         return REG_EBRACK;
2686                                                 }
2687
2688                                                 for (int ch = 0;
2689                                                      ch < 1<<BYTEWIDTH;
2690                                                      ch++) {
2691                                                         /* rmsmacs has
2692                                                            this */
2693 #if 0
2694                                                         int translated =
2695                                                                 TRANSLATE(ch);
2696                                                         if (translated <
2697                                                             (1 << BYTEWIDTH) &&
2698                                                             re_iswctype(ch, wct)) {
2699                                                                 SET_EITHER_BIT(ch);
2700                                                         }
2701 #else
2702                                                         if (re_iswctype(ch, wct)) {
2703                                                                 SET_EITHER_BIT(ch);
2704                                                         }
2705 #endif
2706                                                 }
2707                                                 had_char_class = true;
2708                                         } else {
2709                                                 c1++;
2710                                                 while (c1--) {
2711                                                         PATUNFETCH;
2712                                                 }
2713                                                 SET_EITHER_BIT('[');
2714                                                 SET_EITHER_BIT(':');
2715                                                 had_char_class = false;
2716                                         }
2717                                 } else {
2718                                         had_char_class = false;
2719                                         SET_EITHER_BIT(c);
2720                                 }
2721                         }
2722
2723 #ifdef MULE
2724                         if (has_extended_chars) {
2725                                 /* We have a range table, not a bit vector. */
2726                                 int bytes_needed =
2727                                         unified_range_table_bytes_needed
2728                                         (rtab);
2729                                 GET_BUFFER_SPACE(bytes_needed);
2730                                 unified_range_table_copy_data(rtab,
2731                                                               buf_end);
2732                                 buf_end +=
2733                                         unified_range_table_bytes_used
2734                                         (buf_end);
2735                                 break;
2736                         }
2737 #endif                          /* MULE */
2738                                 /* Discard any (non)matching list bytes that are
2739                                    all 0 at the end of the map.  Decrease the
2740                                    map-length byte too.  */
2741                         while ((int)buf_end[-1] > 0
2742                                && buf_end[buf_end[-1] - 1] == 0)
2743                                 buf_end[-1]--;
2744                         buf_end += buf_end[-1];
2745                 }
2746                         break;
2747
2748                 case '(':
2749                         if (syntax & RE_NO_BK_PARENS)
2750                                 goto handle_open;
2751                         else
2752                                 goto normal_char;
2753
2754                 case ')':
2755                         if (syntax & RE_NO_BK_PARENS)
2756                                 goto handle_close;
2757                         else
2758                                 goto normal_char;
2759
2760                 case '\n':
2761                         if (syntax & RE_NEWLINE_ALT)
2762                                 goto handle_alt;
2763                         else
2764                                 goto normal_char;
2765
2766                 case '|':
2767                         if (syntax & RE_NO_BK_VBAR)
2768                                 goto handle_alt;
2769                         else
2770                                 goto normal_char;
2771
2772                 case '{':
2773                         if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2774                                 goto handle_interval;
2775                         else
2776                                 goto normal_char;
2777
2778                 case '\\':
2779                         if (p == pend) {
2780                                 free_stack();
2781                                 return REG_EESCAPE;
2782                         }
2783
2784                         /* Do not translate the character after the \, so that we can
2785                            distinguish, e.g., \B from \b, even if we normally would
2786                            translate, e.g., B to b.  */
2787                         PATFETCH_RAW(c);
2788
2789                         switch (c) {
2790                         case '(':
2791                                 if (syntax & RE_NO_BK_PARENS)
2792                                         goto normal_backslash;
2793
2794                               handle_open:
2795                                 {
2796                                         regnum_t r;
2797                                         int shy = 0;
2798
2799                                         if (!(syntax & RE_NO_SHY_GROUPS)
2800                                             && p != pend && *p == '?') {
2801                                                 p++;
2802                                                 PATFETCH(c);
2803                                                 switch (c) {
2804                                                 case ':':
2805                                                         /* shy groups */
2806                                                         shy = 1;
2807                                                         break;
2808
2809                                                         /* All others are
2810                                                            reserved for future
2811                                                            constructs. */
2812                                                 default:
2813                                                         free_stack();
2814                                                         return REG_BADPAT;
2815                                                 }
2816                                         }
2817
2818                                         r = ++regnum;
2819                                         bufp->re_ngroups++;
2820                                         if (!shy) {
2821                                                 /* Record the translation from capturing group index to
2822                                                    register number, reallocating table as needed. */
2823                                                 bufp->re_nsub++;
2824                                                 while (bufp->
2825                                                        external_to_internal_register_size
2826                                                        <= bufp->re_nsub) {
2827                                                         int i;
2828                                                         int old_size =
2829                                                             bufp->
2830                                                             external_to_internal_register_size;
2831                                                         bufp->
2832                                                             external_to_internal_register_size
2833                                                             += 5;
2834                                                         RETALLOC(bufp->
2835                                                                  external_to_internal_register,
2836                                                                  bufp->
2837                                                                  external_to_internal_register_size,
2838                                                                  int);
2839                                                         /* debugging */
2840                                                         for (i = old_size;
2841                                                              i <
2842                                                              bufp->
2843                                                              external_to_internal_register_size;
2844                                                              i++)
2845                                                                 bufp->
2846                                                                     external_to_internal_register
2847                                                                     [i] =
2848                                                                     (int)
2849                                                                     0xDEADBEEF;
2850                                                 }
2851
2852                                                 bufp->
2853                                                     external_to_internal_register
2854                                                     [bufp->re_nsub] =
2855                                                     bufp->re_ngroups;
2856                                         }
2857
2858                                         if (COMPILE_STACK_FULL) {
2859                                                 RETALLOC(compile_stack.stack,
2860                                                          compile_stack.
2861                                                          size << 1,
2862                                                          compile_stack_elt_t);
2863                                                 if (compile_stack.stack == NULL)
2864                                                         return REG_ESPACE;
2865
2866                                                 compile_stack.size <<= 1;
2867                                         }
2868
2869                                         /* These are the values to restore when
2870                                            we hit end of this group.  They are
2871                                            all relative offsets, so that if the
2872                                            whole pattern moves because of
2873                                            realloc, they will still be
2874                                            valid.  */
2875                                         COMPILE_STACK_TOP.begalt_offset =
2876                                                 begalt - bufp->buffer;
2877                                         COMPILE_STACK_TOP.fixup_alt_jump =
2878                                                 fixup_alt_jump
2879                                                 ? fixup_alt_jump -
2880                                                 bufp->buffer + 1
2881                                                 : 0;
2882                                         COMPILE_STACK_TOP.laststart_offset =
2883                                                 buf_end - bufp->buffer;
2884                                         COMPILE_STACK_TOP.regnum = r;
2885
2886                                         /* We will eventually replace the 0 with
2887                                            the number of groups inner to this
2888                                            one.  But do not push a start_memory
2889                                            for groups beyond the last one we can
2890                                            represent in the compiled pattern.
2891                                            #### bad bad bad.  this will fail in
2892                                            lots of ways, if we ever have to
2893                                            backtrack for these groups.
2894                                          */
2895                                         if (r <= MAX_REGNUM) {
2896                                                 COMPILE_STACK_TOP.
2897                                                         inner_group_offset =
2898                                                         buf_end - bufp->buffer +
2899                                                         2;
2900                                                 BUF_PUSH_3(start_memory, r, 0);
2901                                         }
2902
2903                                         compile_stack.avail++;
2904
2905                                         fixup_alt_jump = 0;
2906                                         laststart = 0;
2907                                         begalt = buf_end;
2908                                         /* If we've reached MAX_REGNUM groups,
2909                                            then this open won't actually
2910                                            generate any code, so we'll have to
2911                                            clear pending_exact explicitly.  */
2912                                         pending_exact = 0;
2913                                 }
2914                                 break;
2915
2916                         case ')':
2917                                 if (syntax & RE_NO_BK_PARENS)
2918                                         goto normal_backslash;
2919
2920                                 if (COMPILE_STACK_EMPTY) {
2921                                         if (syntax &
2922                                             RE_UNMATCHED_RIGHT_PAREN_ORD) {
2923                                                 goto normal_backslash;
2924                                         } else {
2925                                                 free_stack();
2926                                                 return REG_ERPAREN;
2927                                         }
2928                                 }
2929
2930                               handle_close:
2931                                 if (fixup_alt_jump) {
2932                                         /* Push a dummy failure point at the end
2933                                            of the alternative for a possible
2934                                            future `pop_failure_jump' to pop.
2935                                            See comments at `push_dummy_failure'
2936                                            in `re_match_2'. */
2937                                         BUF_PUSH(push_dummy_failure);
2938
2939                                         /* We allocated space for this jump when
2940                                            we assigned to `fixup_alt_jump', in
2941                                            the `handle_alt' case below.  */
2942                                         STORE_JUMP(jump_past_alt,
2943                                                    fixup_alt_jump, buf_end - 1);
2944                                 }
2945
2946                                 /* See similar code for backslashed left paren
2947                                    above.  */
2948                                 if (COMPILE_STACK_EMPTY) {
2949                                         if (syntax &
2950                                             RE_UNMATCHED_RIGHT_PAREN_ORD) {
2951                                                 goto normal_char;
2952                                         } else {
2953                                                 free_stack();
2954                                                 return REG_EPAREN;
2955                                         }
2956                                 }
2957
2958                                 /* Since we just checked for an empty stack
2959                                    above, this ``can't happen''.  */
2960                                 assert(compile_stack.avail != 0);
2961                                 {
2962                                         /* We don't just want to restore into
2963                                            `regnum', because later groups should
2964                                            continue to be numbered higher, as in
2965                                            `(ab)c(de)' -- the second group is
2966                                            #2.  */
2967                                         regnum_t this_group_regnum;
2968
2969                                         compile_stack.avail--;
2970                                         begalt =
2971                                                 bufp->buffer +
2972                                                 COMPILE_STACK_TOP.begalt_offset;
2973                                         fixup_alt_jump =
2974                                                 COMPILE_STACK_TOP.
2975                                                 fixup_alt_jump
2976                                                 ? bufp->buffer +
2977                                                 COMPILE_STACK_TOP.
2978                                                 fixup_alt_jump - 1
2979                                                 : 0;
2980                                         laststart =
2981                                                 bufp->buffer +
2982                                                 COMPILE_STACK_TOP.
2983                                                 laststart_offset;
2984                                         this_group_regnum =
2985                                                 COMPILE_STACK_TOP.regnum;
2986                                         /* If we've reached MAX_REGNUM groups,
2987                                            then this open won't actually
2988                                            generate any code, so we'll have to
2989                                            clear pending_exact explicitly.  */
2990                                         pending_exact = 0;
2991
2992                                         /* We're at the end of the group, so now
2993                                            we know how many groups were inside
2994                                            this one.  */
2995                                         if (this_group_regnum <= MAX_REGNUM) {
2996                                                 unsigned char *inner_group_loc
2997                                                         =
2998                                                         bufp->buffer +
2999                                                         COMPILE_STACK_TOP.
3000                                                         inner_group_offset;
3001
3002                                                 *inner_group_loc =
3003                                                         regnum -
3004                                                         this_group_regnum;
3005                                                 BUF_PUSH_3(stop_memory,
3006                                                            this_group_regnum,
3007                                                            regnum -
3008                                                            this_group_regnum);
3009                                         }
3010                                 }
3011                                 break;
3012
3013                         case '|':       /* `\|'.  */
3014                                 if (syntax & RE_LIMITED_OPS
3015                                     || syntax & RE_NO_BK_VBAR) {
3016                                         goto normal_backslash;
3017                                 }
3018                         handle_alt:
3019                                 if (syntax & RE_LIMITED_OPS) {
3020                                         goto normal_char;
3021                                 }
3022
3023                                 /* Insert before the previous alternative a jump
3024                                    which jumps to this alternative if the former
3025                                    fails.  */
3026                                 GET_BUFFER_SPACE(3);
3027                                 INSERT_JUMP(on_failure_jump, begalt,
3028                                             buf_end + 6);
3029                                 pending_exact = 0;
3030                                 buf_end += 3;
3031
3032                                 /* The alternative before this one has a jump after it
3033                                    which gets executed if it gets matched.  Adjust that
3034                                    jump so it will jump to this alternative's analogous
3035                                    jump (put in below, which in turn will jump to the next
3036                                    (if any) alternative's such jump, etc.).  The last such
3037                                    jump jumps to the correct final destination.  A picture:
3038                                    _____ _____
3039                                    |   | |   |
3040                                    |   v |   v
3041                                    a | b   | c
3042
3043                                    If we are at `b', then fixup_alt_jump right now points to a
3044                                    three-byte space after `a'.  We'll put in the jump, set
3045                                    fixup_alt_jump to right after `b', and leave behind three
3046                                    bytes which we'll fill in when we get to after `c'.  */
3047
3048                                 if (fixup_alt_jump)
3049                                         STORE_JUMP(jump_past_alt,
3050                                                    fixup_alt_jump, buf_end);
3051
3052                                 /* Mark and leave space for a jump after this alternative,
3053                                    to be filled in later either by next alternative or
3054                                    when know we're at the end of a series of alternatives.  */
3055                                 fixup_alt_jump = buf_end;
3056                                 GET_BUFFER_SPACE(3);
3057                                 buf_end += 3;
3058
3059                                 laststart = 0;
3060                                 begalt = buf_end;
3061                                 break;
3062
3063                         case '{':
3064                                 /* If \{ is a literal.  */
3065                                 if (!(syntax & RE_INTERVALS)
3066                                     /* If we're at `\{' and it's not the open-interval
3067                                        operator.  */
3068                                     || ((syntax & RE_INTERVALS)
3069                                         && (syntax & RE_NO_BK_BRACES))
3070                                     || (p - 2 == pattern && p == pend))
3071                                         goto normal_backslash;
3072
3073                               handle_interval:
3074                                 {
3075                                         /* If got here, then the syntax allows
3076                                            intervals.  */
3077
3078                                         /* At least (most) this many matches
3079                                            must be made.  */
3080                                         int lower_bound = -1, upper_bound = -1;
3081
3082                                         beg_interval = p - 1;
3083
3084                                         if (p == pend) {
3085                                                 if (syntax & RE_NO_BK_BRACES) {
3086                                                         goto unfetch_interval;
3087                                                 } else {
3088                                                         free_stack();
3089                                                         return REG_EBRACE;
3090                                                 }
3091                                         }
3092
3093                                         GET_UNSIGNED_NUMBER(lower_bound);
3094
3095                                         if (c == ',') {
3096                                                 GET_UNSIGNED_NUMBER(
3097                                                         upper_bound);
3098                                                 if (upper_bound < 0) {
3099                                                         upper_bound =
3100                                                                 RE_DUP_MAX;
3101                                                 }
3102                                         } else {
3103                                                 /* Interval such as `{1}' =>
3104                                                    match exactly once. */
3105                                                 upper_bound = lower_bound;
3106                                         }
3107
3108                                         if (lower_bound < 0
3109                                             || upper_bound > RE_DUP_MAX
3110                                             || lower_bound > upper_bound) {
3111                                                 if (syntax & RE_NO_BK_BRACES) {
3112                                                         goto unfetch_interval;
3113                                                 } else {
3114                                                         free_stack();
3115                                                         return REG_BADBR;
3116                                                 }
3117                                         }
3118
3119                                         if (!(syntax & RE_NO_BK_BRACES)) {
3120                                                 if (c != '\\') {
3121                                                         free_stack();
3122                                                         return REG_EBRACE;
3123                                                 }
3124                                                 PATFETCH(c);
3125                                         }
3126
3127                                         if (c != '}') {
3128                                                 if (syntax & RE_NO_BK_BRACES) {
3129                                                         goto unfetch_interval;
3130                                                 } else {
3131                                                         free_stack();
3132                                                         return REG_BADBR;
3133                                                 }
3134                                         }
3135
3136                                         /* We just parsed a valid interval.  */
3137
3138                                         /* If it's invalid to have no preceding
3139                                            re.  */
3140                                         if (!laststart) {
3141                                                 if (syntax &
3142                                                     RE_CONTEXT_INVALID_OPS) {
3143                                                         free_stack();
3144                                                         return REG_BADRPT;
3145                                                 } else if (
3146                                                         syntax &
3147                                                         RE_CONTEXT_INDEP_OPS) {
3148                                                         laststart = buf_end;
3149                                                 } else {
3150                                                         goto unfetch_interval;
3151                                                 }
3152                                         }
3153
3154                                         /* If the upper bound is zero, don't
3155                                            want to succeed at all; jump from
3156                                            `laststart' to `b + 3', which will be
3157                                            the end of the buffer after we insert
3158                                            the jump.  */
3159                                         if (upper_bound == 0) {
3160                                                 GET_BUFFER_SPACE(3);
3161                                                 INSERT_JUMP(jump, laststart,
3162                                                             buf_end + 3);
3163                                                 buf_end += 3;
3164                                         }
3165
3166                                         /* Otherwise, we have a nontrivial interval.  When
3167                                            we're all done, the pattern will look like:
3168                                            set_number_at <jump count> <upper bound>
3169                                            set_number_at <succeed_n count> <lower bound>
3170                                            succeed_n <after jump addr> <succeed_n count>
3171                                            <body of loop>
3172                                            jump_n <succeed_n addr> <jump count>
3173                                            (The upper bound and `jump_n' are omitted if
3174                                            `upper_bound' is 1, though.)  */
3175                                         else {  /* If the upper bound is > 1, we need to insert
3176                                                    more at the end of the loop.  */
3177                                                 Memory_count nbytes =
3178                                                     10 + (upper_bound > 1) * 10;
3179
3180                                                 GET_BUFFER_SPACE(nbytes);
3181
3182                                                 /* Initialize lower bound of the `succeed_n', even
3183                                                    though it will be set during matching by its
3184                                                    attendant `set_number_at' (inserted next),
3185                                                    because `re_compile_fastmap' needs to know.
3186                                                    Jump to the `jump_n' we might insert below.  */
3187                                                 INSERT_JUMP2(succeed_n,
3188                                                              laststart,
3189                                                              buf_end + 5 +
3190                                                              (upper_bound >
3191                                                               1) * 5,
3192                                                              lower_bound);
3193                                                 buf_end += 5;
3194
3195                                                 /* Code to initialize the lower bound.  Insert
3196                                                    before the `succeed_n'.  The `5' is the last two
3197                                                    bytes of this `set_number_at', plus 3 bytes of
3198                                                    the following `succeed_n'.  */
3199                                                 insert_op2(set_number_at,
3200                                                            laststart, 5,
3201                                                            lower_bound,
3202                                                            buf_end);
3203                                                 buf_end += 5;
3204
3205                                                 if (upper_bound > 1) {  /* More than one repetition is allowed, so
3206                                                                            append a backward jump to the `succeed_n'
3207                                                                            that starts this interval.
3208
3209                                                                            When we've reached this during matching,
3210                                                                            we'll have matched the interval once, so
3211                                                                            jump back only `upper_bound - 1' times.  */
3212                                                         STORE_JUMP2(jump_n,
3213                                                                     buf_end,
3214                                                                     laststart +
3215                                                                     5,
3216                                                                     upper_bound
3217                                                                     - 1);
3218                                                         buf_end += 5;
3219
3220                                                         /* The location we want to set is the second
3221                                                            parameter of the `jump_n'; that is `b-2' as
3222                                                            an absolute address.  `laststart' will be
3223                                                            the `set_number_at' we're about to insert;
3224                                                            `laststart+3' the number to set, the source
3225                                                            for the relative address.  But we are
3226                                                            inserting into the middle of the pattern --
3227                                                            so everything is getting moved up by 5.
3228                                                            Conclusion: (b - 2) - (laststart + 3) + 5,
3229                                                            i.e., b - laststart.
3230
3231                                                            We insert this at the beginning of the loop
3232                                                            so that if we fail during matching, we'll
3233                                                            reinitialize the bounds.  */
3234                                                         insert_op2
3235                                                             (set_number_at,
3236                                                              laststart,
3237                                                              buf_end -
3238                                                              laststart,
3239                                                              upper_bound - 1,
3240                                                              buf_end);
3241                                                         buf_end += 5;
3242                                                 }
3243                                         }
3244                                         pending_exact = 0;
3245                                         beg_interval = NULL;
3246                                 }
3247                                 break;
3248
3249                               unfetch_interval:
3250                                 /* If an invalid interval, match the characters as literals.  */
3251                                 assert(beg_interval);
3252                                 p = beg_interval;
3253                                 beg_interval = NULL;
3254
3255                                 /* normal_char and normal_backslash need `c'.  */
3256                                 PATFETCH(c);
3257
3258                                 if (!(syntax & RE_NO_BK_BRACES)) {
3259                                         if (p > pattern && p[-1] == '\\')
3260                                                 goto normal_backslash;
3261                                 }
3262                                 goto normal_char;
3263
3264 #ifdef emacs
3265                                 /* There is no way to specify the before_dot and after_dot
3266                                    operators.  rms says this is ok.  --karl  */
3267                         case '=':
3268                                 BUF_PUSH(at_dot);
3269                                 break;
3270
3271                         case 's':
3272                                 laststart = buf_end;
3273                                 PATFETCH(c);
3274                                 /* XEmacs addition */
3275                                 if (c >= 0x80 || syntax_spec_code[c] == 0377) {
3276                                         free_stack();
3277                                         return REG_ESYNTAX;
3278                                 }
3279                                 BUF_PUSH_2(syntaxspec, syntax_spec_code[c]);
3280                                 break;
3281
3282                         case 'S':
3283                                 laststart = buf_end;
3284                                 PATFETCH(c);
3285                                 /* XEmacs addition */
3286                                 if (c >= 0x80 || syntax_spec_code[c] == 0377) {
3287                                         free_stack();
3288                                         return REG_ESYNTAX;
3289                                 }
3290                                 BUF_PUSH_2(notsyntaxspec, syntax_spec_code[c]);
3291                                 break;
3292
3293 #ifdef MULE
3294 /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */
3295                         case 'c':
3296                                 laststart = buf_end;
3297                                 PATFETCH_RAW(c);
3298                                 if (c < 32 || c > 127) {
3299                                         free_stack();
3300                                         return REG_ECATEGORY;
3301                                 }
3302                                 BUF_PUSH_2(categoryspec, c);
3303                                 break;
3304
3305                         case 'C':
3306                                 laststart = buf_end;
3307                                 PATFETCH_RAW(c);
3308                                 if (c < 32 || c > 127) {
3309                                         free_stack();
3310                                         return REG_ECATEGORY;
3311                                 }
3312                                 BUF_PUSH_2(notcategoryspec, c);
3313                                 break;
3314 /* end of category patch */
3315 #endif                          /* MULE */
3316 #endif                          /* emacs */
3317
3318                         case 'w':
3319                                 laststart = buf_end;
3320                                 BUF_PUSH(wordchar);
3321                                 break;
3322
3323                         case 'W':
3324                                 laststart = buf_end;
3325                                 BUF_PUSH(notwordchar);
3326                                 break;
3327
3328                         case '<':
3329                                 BUF_PUSH(wordbeg);
3330                                 break;
3331
3332                         case '>':
3333                                 BUF_PUSH(wordend);
3334                                 break;
3335
3336                         case 'b':
3337                                 BUF_PUSH(wordbound);
3338                                 break;
3339
3340                         case 'B':
3341                                 BUF_PUSH(notwordbound);
3342                                 break;
3343
3344                         case '`':
3345                                 BUF_PUSH(begbuf);
3346                                 break;
3347
3348                         case '\'':
3349                                 BUF_PUSH(endbuf);
3350                                 break;
3351
3352                         case '1':
3353                         case '2':
3354                         case '3':
3355                         case '4':
3356                         case '5':
3357                         case '6':
3358                         case '7':
3359                         case '8':
3360                         case '9': {
3361                                 int reg;
3362
3363                                 if (syntax & RE_NO_BK_REFS) {
3364                                         goto normal_char;
3365                                 }
3366                                 /* External register indexing. */
3367                                 reg = c - '0';
3368
3369                                 if (reg > bufp->re_nsub) {
3370                                         free_stack();
3371                                         return REG_ESUBREG;
3372                                 }
3373
3374                                 /* Convert external to internal as soon
3375                                    as possible. */
3376                                 reg = bufp->external_to_internal_register[reg];
3377
3378                                 /* Can't back reference to a
3379                                    subexpression if inside it. */
3380                                 if (group_in_compile_stack(
3381                                             compile_stack, reg)) {
3382                                         goto normal_char;
3383                                 }
3384                                 laststart = buf_end;
3385                                 BUF_PUSH_2(duplicate, reg);
3386                         }
3387                                 break;
3388
3389                         case '+':
3390                         case '?':
3391                                 if (syntax & RE_BK_PLUS_QM) {
3392                                         goto handle_plus;
3393                                 } else {
3394                                         goto normal_backslash;
3395                                 }
3396                         default:
3397                         normal_backslash:
3398                                 /* You might think it would be useful for \ to
3399                                    mean not to translate; but if we don't
3400                                    translate it, it will never match
3401                                    anything.  */
3402                                 c = TRANSLATE(c);
3403                                 goto normal_char;
3404                         }
3405                         break;
3406
3407                         /* Expects the character in `c'.  */
3408                         /* `p' points to the location after where `c' came
3409                            from. */
3410                 normal_char:
3411                 default: {
3412                         /* XEmacs: modifications here for Mule. */
3413                         /* `q' points to the beginning of the next char. */
3414                         re_char *q = p;
3415
3416                         /* If no exactn currently being built.  */
3417                         if (!pending_exact
3418                             /* If last exactn not at current position.  */
3419                             || pending_exact + *pending_exact + 1 !=
3420                             buf_end
3421                             /* We have only one byte following the exactn for
3422                                the count. */
3423                             || ((unsigned int)(*pending_exact + (q - p)) >=
3424                                 ((unsigned int)(1 << BYTEWIDTH) - 1))
3425
3426                             /* If followed by a repetition operator.  */
3427                             || *q == '*' || *q == '^'
3428                             || ((syntax & RE_BK_PLUS_QM)
3429                                 ? *q == '\\' && (q[1] == '+'
3430                                                  || q[1] == '?')
3431                                 : (*q == '+' || *q == '?'))
3432                             || ((syntax & RE_INTERVALS)
3433                                 && ((syntax & RE_NO_BK_BRACES)
3434                                     ? *q == '{'
3435                                     : (q[0] == '\\' && q[1] == '{')))) {
3436                                 /* Start building a new exactn.  */
3437
3438                                 laststart = buf_end;
3439
3440                                 BUF_PUSH_2(exactn, 0);
3441                                 pending_exact = buf_end - 1;
3442                         }
3443 #ifndef MULE
3444                         BUF_PUSH(c);
3445                         (*pending_exact)++;
3446 #else
3447                         {
3448                                 Bytecount bt_count;
3449                                 Bufbyte tmp_buf[MAX_EMCHAR_LEN];
3450                                 int i;
3451
3452                                 bt_count =
3453                                         set_charptr_emchar(tmp_buf, c);
3454
3455                                 for (i = 0; i < bt_count; i++) {
3456                                         BUF_PUSH(tmp_buf[i]);
3457                                         (*pending_exact)++;
3458                                 }
3459                         }
3460 #endif
3461                         break;
3462                 }
3463                 }               /* switch (c) */
3464         }                       /* while p != pend */
3465
3466         /* Through the pattern now.  */
3467
3468         if (fixup_alt_jump) {
3469                 STORE_JUMP(jump_past_alt, fixup_alt_jump, buf_end);
3470         }
3471         if (!COMPILE_STACK_EMPTY) {
3472                 free_stack();
3473                 return REG_EPAREN;
3474         }
3475         /* If we don't want backtracking, force success
3476            the first time we reach the end of the compiled pattern.  */
3477         if (syntax & RE_NO_POSIX_BACKTRACKING) {
3478                 BUF_PUSH(succeed);
3479         }
3480         xfree(compile_stack.stack);
3481
3482         /* We have succeeded; set the length of the buffer.  */
3483         bufp->used = buf_end - bufp->buffer;
3484
3485 #ifdef REGEX_DEBUG_FLAG
3486         if (debug) {
3487                 DEBUG_PRINT1("\nCompiled pattern: \n");
3488                 print_compiled_pattern(bufp);
3489         }
3490 #endif                          /* DEBUG */
3491
3492 #ifndef MATCH_MAY_ALLOCATE
3493         /* Initialize the failure stack to the largest possible stack.  This
3494            isn't necessary unless we're trying to avoid calling alloca in
3495            the search and match routines.  */
3496         {
3497                 int num_regs = bufp->re_ngroups + 1;
3498
3499                 /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
3500                    is strictly greater than re_max_failures, the largest possible stack
3501                    is 2 * re_max_failures failure points.  */
3502                 if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) {
3503                         fail_stack.size =
3504                             (2 * re_max_failures * MAX_FAILURE_ITEMS);
3505
3506                         if (!fail_stack.stack) {
3507                                 fail_stack.stack =
3508                                         (fail_stack_elt_t *)
3509                                         xmalloc_atomic(
3510                                                 fail_stack.size *
3511                                                 sizeof(fail_stack_elt_t));
3512                         } else {
3513                                 fail_stack.stack =
3514                                         (fail_stack_elt_t *)
3515                                         xrealloc(fail_stack.stack,
3516                                                  (fail_stack.size *
3517                                                   sizeof(fail_stack_elt_t)));
3518                         }
3519                 }
3520
3521                 regex_grow_registers(num_regs);
3522         }
3523 #endif  /* not MATCH_MAY_ALLOCATE */
3524
3525         return REG_NOERROR;
3526 }
3527 /* regex_compile */
3528 \f
3529 /* Subroutines for `regex_compile'.  */
3530
3531 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
3532
3533 static void store_op1(re_opcode_t op, unsigned char *loc, int arg)
3534 {
3535         *loc = (unsigned char)op;
3536         STORE_NUMBER(loc + 1, arg);
3537 }
3538
3539 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
3540
3541 static void store_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2)
3542 {
3543         *loc = (unsigned char)op;
3544         STORE_NUMBER(loc + 1, arg1);
3545         STORE_NUMBER(loc + 3, arg2);
3546 }
3547
3548 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
3549    for OP followed by two-byte integer parameter ARG.  */
3550
3551 static void
3552 insert_op1(re_opcode_t op, unsigned char *loc, int arg, unsigned char *end)
3553 {
3554         REGISTER unsigned char *pfrom = end;
3555         REGISTER unsigned char *pto = end + 3;
3556
3557         while (pfrom != loc)
3558                 *--pto = *--pfrom;
3559
3560         store_op1(op, loc, arg);
3561 }
3562
3563 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
3564
3565 static void
3566 insert_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2,
3567            unsigned char *end)
3568 {
3569         REGISTER unsigned char *pfrom = end;
3570         REGISTER unsigned char *pto = end + 5;
3571
3572         while (pfrom != loc)
3573                 *--pto = *--pfrom;
3574
3575         store_op2(op, loc, arg1, arg2);
3576 }
3577
3578 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
3579    after an alternative or a begin-subexpression.  We assume there is at
3580    least one character before the ^.  */
3581
3582 static re_bool
3583 at_begline_loc_p(re_char *pattern, re_char *p, reg_syntax_t syntax)
3584 {
3585         re_char *prev = p - 2;
3586         re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3587
3588         return
3589             /* After a subexpression?  */
3590             (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3591             /* After an alternative?  */
3592             || (*prev == '|'
3593                 && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
3594 }
3595
3596 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
3597    at least one character after the $, i.e., `P < PEND'.  */
3598
3599 static re_bool
3600 at_endline_loc_p(re_char *p, re_char *pend, int syntax)
3601 {
3602         re_char *next = p;
3603         re_bool next_backslash = *next == '\\';
3604         re_char *next_next = p + 1 < pend ? p + 1 : 0;
3605
3606         return
3607             /* Before a subexpression?  */
3608             (syntax & RE_NO_BK_PARENS ? *next == ')'
3609              : next_backslash && next_next && *next_next == ')')
3610             /* Before an alternative?  */
3611             || (syntax & RE_NO_BK_VBAR ? *next == '|'
3612                 : next_backslash && next_next && *next_next == '|');
3613 }
3614
3615 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3616    false if it's not.  */
3617
3618 static re_bool
3619 group_in_compile_stack(compile_stack_type compile_stack, regnum_t regnum)
3620 {
3621         int this_element;
3622
3623         for (this_element = compile_stack.avail - 1;
3624              this_element >= 0; this_element--)
3625                 if (compile_stack.stack[this_element].regnum == regnum) {
3626                         return true;
3627                 }
3628         return false;
3629 }
3630
3631 /* Read the ending character of a range (in a bracket expression) from the
3632    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
3633    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
3634    Then we set the translation of all bits between the starting and
3635    ending characters (inclusive) in the compiled pattern B.
3636
3637    Return an error code.
3638
3639    We use these short variable names so we can use the same macros as
3640    `regex_compile' itself.  */
3641
3642 static reg_errcode_t
3643 compile_range(re_char ** p_ptr, re_char * pend, RE_TRANSLATE_TYPE translate,
3644               reg_syntax_t syntax, unsigned char *buf_end)
3645 {
3646         Element_count this_char;
3647
3648         re_char *p = *p_ptr;
3649         int range_start, range_end;
3650
3651         if (p == pend)
3652                 return REG_ERANGE;
3653
3654         /* Even though the pattern is a signed `char *', we need to fetch
3655            with unsigned char *'s; if the high bit of the pattern character
3656            is set, the range endpoints will be negative if we fetch using a
3657            signed char *.
3658
3659            We also want to fetch the endpoints without translating them; the
3660            appropriate translation is done in the bit-setting loop below.  */
3661         /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
3662         range_start = ((const unsigned char *)p)[-2];
3663         range_end = ((const unsigned char *)p)[0];
3664
3665         /* Have to increment the pointer into the pattern string, so the
3666            caller isn't still at the ending character.  */
3667         (*p_ptr)++;
3668
3669         /* If the start is after the end, the range is empty.  */
3670         if (range_start > range_end)
3671                 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3672
3673         /* Here we see why `this_char' has to be larger than an `unsigned
3674            char' -- the range is inclusive, so if `range_end' == 0xff
3675            (assuming 8-bit characters), we would otherwise go into an infinite
3676            loop, since all characters <= 0xff.  */
3677         for (this_char = range_start; this_char <= range_end; this_char++) {
3678                 SET_LIST_BIT(TRANSLATE(this_char));
3679         }
3680
3681         return REG_NOERROR;
3682 }
3683
3684 #ifdef MULE
3685
3686 static reg_errcode_t
3687 compile_extended_range(re_char ** p_ptr, re_char * pend,
3688                        RE_TRANSLATE_TYPE translate,
3689                        reg_syntax_t syntax, Lisp_Object rtab)
3690 {
3691         Emchar this_char, range_start, range_end;
3692         const Bufbyte *p;
3693
3694         if (*p_ptr == pend)
3695                 return REG_ERANGE;
3696
3697         p = (const Bufbyte *)*p_ptr;
3698         range_end = charptr_emchar(p);
3699         p--;                    /* back to '-' */
3700         DEC_CHARPTR(p);         /* back to start of range */
3701         /* We also want to fetch the endpoints without translating them; the
3702            appropriate translation is done in the bit-setting loop below.  */
3703         range_start = charptr_emchar(p);
3704         INC_CHARPTR(*p_ptr);
3705
3706         /* If the start is after the end, the range is empty.  */
3707         if (range_start > range_end)
3708                 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3709
3710         /* Can't have ranges spanning different charsets, except maybe for
3711            ranges entirely within the first 256 chars. */
3712
3713         if ((range_start >= 0x100 || range_end >= 0x100)
3714             && CHAR_LEADING_BYTE(range_start) != CHAR_LEADING_BYTE(range_end))
3715                 return REG_ERANGESPAN;
3716
3717         /* As advertised, translations only work over the 0 - 0x7F range.
3718            Making this kind of stuff work generally is much harder.
3719            Iterating over the whole range like this would be way efficient
3720            if the range encompasses 10,000 chars or something.  You'd have
3721            to do something like this:
3722
3723            range_table a;
3724            range_table b;
3725            map over translation table in [range_start, range_end] of
3726            (put the mapped range in a;
3727            put the translation in b)
3728            invert the range in a and truncate to [range_start, range_end]
3729            compute the union of a, b
3730            union the result into rtab
3731          */
3732         for (this_char = range_start;
3733              this_char <= range_end && this_char < 0x80; this_char++) {
3734                 SET_RANGETAB_BIT(TRANSLATE(this_char));
3735         }
3736
3737         if (this_char <= range_end)
3738                 put_range_table(rtab, this_char, range_end, Qt);
3739
3740         return REG_NOERROR;
3741 }
3742
3743 #endif                          /* MULE */
3744 \f
3745 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3746    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
3747    characters can start a string that matches the pattern.  This fastmap
3748    is used by re_search to skip quickly over impossible starting points.
3749
3750    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3751    area as BUFP->fastmap.
3752
3753    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3754    the pattern buffer.
3755
3756    Returns 0 if we succeed, -2 if an internal error.   */
3757
3758 int re_compile_fastmap(struct re_pattern_buffer *bufp)
3759 {
3760         int j, k;
3761 #ifdef MATCH_MAY_ALLOCATE
3762         fail_stack_type fail_stack;
3763 #endif
3764         DECLARE_DESTINATION;
3765         /* We don't push any register information onto the failure stack.  */
3766
3767         REGISTER char *fastmap = bufp->fastmap;
3768         unsigned char *pattern = bufp->buffer;
3769         unsigned long size = bufp->used;
3770         /* actually p supposed to carry the const qualifier, however, some
3771            silly mule code below CHANGES p and hence we cant go with the const
3772            qualifier here, leaving an `unfixable' warning on the way */
3773 #ifdef MULE
3774         unsigned char *p = pattern;
3775 #else
3776         re_char *p = pattern;
3777 #endif
3778         REGISTER unsigned char *pend = pattern + size;
3779
3780 #ifdef REL_ALLOC
3781         /* This holds the pointer to the failure stack, when
3782            it is allocated relocatably.  */
3783         fail_stack_elt_t *failure_stack_ptr;
3784 #endif
3785
3786         /* Assume that each path through the pattern can be null until
3787            proven otherwise.  We set this false at the bottom of switch
3788            statement, to which we get only if a particular path doesn't
3789            match the empty string.  */
3790         re_bool path_can_be_null = true;
3791
3792         /* We aren't doing a `succeed_n' to begin with.  */
3793         re_bool succeed_n_p = false;
3794
3795         assert(fastmap != NULL && p != NULL);
3796
3797         INIT_FAIL_STACK();
3798         memset(fastmap, 0, 1 << BYTEWIDTH);     /* Assume nothing's valid.  */
3799         bufp->fastmap_accurate = 1;     /* It will be when we're done.  */
3800         bufp->can_be_null = 0;
3801
3802         while (1) {
3803                 if (p == pend || *p == succeed) {
3804                         /* We have reached the (effective) end of pattern.  */
3805                         if (!FAIL_STACK_EMPTY()) {
3806                                 bufp->can_be_null |= path_can_be_null;
3807
3808                                 /* Reset for next path.  */
3809                                 path_can_be_null = true;
3810
3811                                 /* fuck, p isnt const thanks to that
3812                                  * unified range table function below */
3813 #ifdef MULE
3814                                 p = (unsigned char*)fail_stack.
3815                                     stack[--fail_stack.avail].pointer;
3816 #else
3817                                 p = fail_stack.stack[--fail_stack.avail]
3818                                         .pointer;
3819 #endif
3820
3821                                 continue;
3822                         } else {
3823                                 break;
3824                         }
3825                 }
3826
3827                 /* We should never be about to go beyond the end of the pattern.  */
3828                 assert(p < pend);
3829
3830                 switch (SWITCH_ENUM_CAST((re_opcode_t) * p++)) {
3831
3832                         /* I guess the idea here is to simply not bother with a
3833                            fastmap if a backreference is used, since it's too
3834                            hard to figure out the fastmap for the corresponding
3835                            group.  Setting `can_be_null' stops `re_search_2'
3836                            from using the fastmap, so that is all we do.  */
3837                 case duplicate:
3838                         bufp->can_be_null = 1;
3839                         goto done;
3840
3841                         /* Following are the cases which match a character.
3842                            These end with `break'.  */
3843
3844                 case exactn:
3845                         fastmap[p[1]] = 1;
3846                         break;
3847
3848                 case charset:
3849                         /* XEmacs: Under Mule, these bit vectors will
3850                            only contain values for characters below 0x80. */
3851                         for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3852                                 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3853                                         fastmap[j] = 1;
3854                         break;
3855
3856                 case charset_not:
3857                         /* Chars beyond end of map must be allowed.  */
3858 #ifdef MULE
3859                         for (j = *p * BYTEWIDTH; j < 0x80; j++)
3860                                 fastmap[j] = 1;
3861                         /* And all extended characters must be allowed, too. */
3862                         for (j = 0x80; j < 0xA0; j++)
3863                                 fastmap[j] = 1;
3864 #else                           /* not MULE */
3865                         for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3866                                 fastmap[j] = 1;
3867 #endif                          /* MULE */
3868
3869                         for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3870                                 if (!
3871                                     (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3872                                         fastmap[j] = 1;
3873                         break;
3874
3875 #ifdef MULE
3876                 case charset_mule: {
3877                         int nentries;
3878                         int i;
3879
3880                         nentries = unified_range_table_nentries(p);
3881                         for (i = 0; i < nentries; i++) {
3882                                 EMACS_INT first, last;
3883                                 Lisp_Object dummy_val;
3884                                 int jj;
3885                                 Bufbyte strr[MAX_EMCHAR_LEN];
3886
3887                                 unified_range_table_get_range(p, i,
3888                                                               &first,
3889                                                               &last,
3890                                                               &dummy_val);
3891                                 for (jj = first;
3892                                      jj <= last && jj < 0x80; jj++)
3893                                         fastmap[jj] = 1;
3894                                 /* Ranges below 0x100 can span charsets, but there
3895                                    are only two (Control-1 and Latin-1), and
3896                                    either first or last has to be in them. */
3897                                 set_charptr_emchar(strr, first);
3898                                 fastmap[*strr] = 1;
3899                                 if (last < 0x100) {
3900                                         set_charptr_emchar(strr, last);
3901                                         fastmap[*strr] = 1;
3902                                 }
3903                         }
3904                 }
3905                         break;
3906
3907                 case charset_mule_not: {
3908                         int nentries;
3909                         int i;
3910
3911                         nentries = unified_range_table_nentries(p);
3912                         for (i = 0; i < nentries; i++) {
3913                                 EMACS_INT first, last;
3914                                 Lisp_Object dummy_val;
3915                                 int jj;
3916                                 int smallest_prev = 0;
3917
3918                                 unified_range_table_get_range(p, i,
3919                                                               &first,
3920                                                               &last,
3921                                                               &dummy_val);
3922                                 for (jj = smallest_prev;
3923                                      jj < first && jj < 0x80; jj++)
3924                                         fastmap[jj] = 1;
3925                                 smallest_prev = last + 1;
3926                                 if (smallest_prev >= 0x80)
3927                                         break;
3928                         }
3929                         /* Calculating which leading bytes are actually allowed
3930                            here is rather difficult, so we just punt and allow
3931                            all of them. */
3932                         for (i = 0x80; i < 0xA0; i++)
3933                                 fastmap[i] = 1;
3934                 }
3935                         break;
3936 #endif                          /* MULE */
3937
3938                 case wordchar:
3939 #ifdef emacs
3940                         k = (int)Sword;
3941                         goto matchsyntax;
3942 #else
3943                         for (j = 0; j < (1 << BYTEWIDTH); j++)
3944                                 if (SYNTAX_UNSAFE
3945                                     (XCHAR_TABLE
3946                                      (regex_emacs_buffer->mirror_syntax_table),
3947                                      j) == Sword)
3948                                         fastmap[j] = 1;
3949                         break;
3950 #endif
3951
3952                 case notwordchar:
3953 #ifdef emacs
3954                         k = (int)Sword;
3955                         goto matchnotsyntax;
3956 #else
3957                         for (j = 0; j < (1 << BYTEWIDTH); j++)
3958                                 if (SYNTAX_UNSAFE
3959                                     (XCHAR_TABLE
3960                                      (regex_emacs_buffer->mirror_syntax_table),
3961                                      j) != Sword)
3962                                         fastmap[j] = 1;
3963                         break;
3964 #endif
3965
3966                 case anychar: {
3967                         int fastmap_newline = fastmap['\n'];
3968
3969                         /* `.' matches anything ...  */
3970 #ifdef MULE
3971                         /* "anything" only includes bytes that can be the
3972                            first byte of a character. */
3973                         for (j = 0; j < 0xA0; j++)
3974                                 fastmap[j] = 1;
3975 #else
3976                         for (j = 0; j < (1 << BYTEWIDTH); j++)
3977                                 fastmap[j] = 1;
3978 #endif
3979
3980                         /* ... except perhaps newline.  */
3981                         if (!(bufp->syntax & RE_DOT_NEWLINE))
3982                                 fastmap['\n'] = fastmap_newline;
3983
3984                         /* Return if we have already set `can_be_null'; if we
3985                            have, then the fastmap is irrelevant.  Something's
3986                            wrong here.  */
3987                         else if (bufp->can_be_null)
3988                                 goto done;
3989
3990                         /* Otherwise, have to check alternative paths.  */
3991                         break;
3992                 }
3993
3994 #ifdef emacs
3995                 case wordbound:
3996                 case notwordbound:
3997                 case wordbeg:
3998                 case wordend:
3999                 case notsyntaxspec:
4000                 case syntaxspec:
4001                         /* This match depends on text properties.  These end with
4002                            aborting optimizations.  */
4003                         bufp->can_be_null = 1;
4004                         goto done;
4005
4006 #ifdef emacs
4007                 matchsyntax:
4008 #ifdef MULE
4009                         for (j = 0; j < 0x80; j++)
4010                                 if (SYNTAX_UNSAFE
4011                                     (XCHAR_TABLE
4012                                      (regex_emacs_buffer->mirror_syntax_table),
4013                                      j) == (enum syntaxcode)k)
4014                                         fastmap[j] = 1;
4015                         for (j = 0x80; j < 0xA0; j++) {
4016                                 if (LEADING_BYTE_PREFIX_P(j))
4017                                         /* too complicated to calculate this right */
4018                                         fastmap[j] = 1;
4019                                 else {
4020                                         int multi_p;
4021                                         Lisp_Object cset;
4022
4023                                         cset = CHARSET_BY_LEADING_BYTE(j);
4024                                         if (CHARSETP(cset)) {
4025                                                 if (charset_syntax
4026                                                     (regex_emacs_buffer, cset,
4027                                                      &multi_p)
4028                                                     == Sword || multi_p)
4029                                                         fastmap[j] = 1;
4030                                         }
4031                                 }
4032                         }
4033 #else                           /* not MULE */
4034                         for (j = 0; j < (1 << BYTEWIDTH); j++)
4035                                 if (SYNTAX_UNSAFE
4036                                     (XCHAR_TABLE
4037                                      (regex_emacs_buffer->mirror_syntax_table),
4038                                      j) == (enum syntaxcode)k)
4039                                         fastmap[j] = 1;
4040 #endif                          /* MULE */
4041                         break;
4042
4043                 matchnotsyntax:
4044 #ifdef MULE
4045                         for (j = 0; j < 0x80; j++)
4046                                 if (SYNTAX_UNSAFE
4047                                     (XCHAR_TABLE
4048                                      (regex_emacs_buffer->mirror_syntax_table),
4049                                      j) != (enum syntaxcode)k)
4050                                         fastmap[j] = 1;
4051                         for (j = 0x80; j < 0xA0; j++) {
4052                                 if (LEADING_BYTE_PREFIX_P(j))
4053                                         /* too complicated to calculate this right */
4054                                         fastmap[j] = 1;
4055                                 else {
4056                                         int multi_p;
4057                                         Lisp_Object cset;
4058
4059                                         cset = CHARSET_BY_LEADING_BYTE(j);
4060                                         if (CHARSETP(cset)) {
4061                                                 if (charset_syntax
4062                                                     (regex_emacs_buffer, cset,
4063                                                      &multi_p)
4064                                                     != Sword || multi_p)
4065                                                         fastmap[j] = 1;
4066                                         }
4067                                 }
4068                         }
4069 #else                           /* not MULE */
4070                         for (j = 0; j < (1 << BYTEWIDTH); j++)
4071                                 if (SYNTAX_UNSAFE
4072                                     (XCHAR_TABLE
4073                                      (regex_emacs_buffer->mirror_syntax_table),
4074                                      j) != (enum syntaxcode)k)
4075                                         fastmap[j] = 1;
4076 #endif                          /* MULE */
4077                         break;
4078 #endif                          /* emacs */
4079
4080 #ifdef MULE
4081 /* 97/2/17 jhod category patch */
4082                 case categoryspec:
4083                 case notcategoryspec:
4084                         bufp->can_be_null = 1;
4085                         return 0;
4086 /* end if category patch */
4087 #endif                          /* MULE */
4088
4089                         /* All cases after this match the empty string.  These
4090                            end with `continue'.  */
4091
4092                 case before_dot:
4093                 case at_dot:
4094                 case after_dot:
4095                         continue;
4096 #endif                          /* emacs */
4097
4098                 case no_op:
4099                 case begline:
4100                 case endline:
4101                 case begbuf:
4102                 case endbuf:
4103 #ifndef emacs
4104                 case wordbound:
4105                 case notwordbound:
4106                 case wordbeg:
4107                 case wordend:
4108 #endif
4109                 case push_dummy_failure:
4110                         continue;
4111
4112                 case jump_n:
4113                 case pop_failure_jump:
4114                 case maybe_pop_jump:
4115                 case jump:
4116                 case jump_past_alt:
4117                 case dummy_failure_jump:
4118                         EXTRACT_NUMBER_AND_INCR(j, p);
4119                         p += j;
4120                         if (j > 0)
4121                                 continue;
4122
4123                         /* Jump backward implies we just went through the body
4124                            of a loop and matched nothing.  Opcode jumped to
4125                            should be `on_failure_jump' or `succeed_n'.  Just
4126                            treat it like an ordinary jump.  For a * loop, it has
4127                            pushed its failure point already; if so, discard that
4128                            as redundant.  */
4129                         if ((re_opcode_t) * p != on_failure_jump
4130                             && (re_opcode_t) * p != succeed_n)
4131                                 continue;
4132
4133                         p++;
4134                         EXTRACT_NUMBER_AND_INCR(j, p);
4135                         p += j;
4136
4137                         /* If what's on the stack is where we are now, pop
4138                            it.  */
4139                         if (!FAIL_STACK_EMPTY()
4140                             && fail_stack.stack[fail_stack.avail - 1].pointer ==
4141                             p)
4142                                 fail_stack.avail--;
4143
4144                         continue;
4145
4146                 case on_failure_jump:
4147                 case on_failure_keep_string_jump:
4148                       handle_on_failure_jump:
4149                         EXTRACT_NUMBER_AND_INCR(j, p);
4150
4151                         /* For some patterns, e.g., `(a?)?', `p+j' here points
4152                            to the end of the pattern.  We don't want to push
4153                            such a point, since when we restore it above,
4154                            entering the switch will increment `p' past the end
4155                            of the pattern.  We don't need to push such a point
4156                            since we obviously won't find any more fastmap
4157                            entries beyond `pend'.  Such a pattern can match the
4158                            null string, though.  */
4159                         if (p + j < pend) {
4160                                 if (!PUSH_PATTERN_OP(p + j, fail_stack)) {
4161                                         RESET_FAIL_STACK();
4162                                         return -2;
4163                                 }
4164                         } else
4165                                 bufp->can_be_null = 1;
4166
4167                         if (succeed_n_p) {
4168                                 EXTRACT_NUMBER_AND_INCR(k, p);  /* Skip the n.  */
4169                                 succeed_n_p = false;
4170                         }
4171
4172                         continue;
4173
4174                 case succeed_n:
4175                         /* Get to the number of times to succeed.  */
4176                         p += 2;
4177
4178                         /* Increment p past the n for when k != 0.  */
4179                         EXTRACT_NUMBER_AND_INCR(k, p);
4180                         if (k == 0) {
4181                                 p -= 4;
4182                                 succeed_n_p = true;     /* Spaghetti code alert.  */
4183                                 goto handle_on_failure_jump;
4184                         }
4185                         continue;
4186
4187                 case set_number_at:
4188                         p += 4;
4189                         continue;
4190
4191                 case start_memory:
4192                 case stop_memory:
4193                         p += 2;
4194                         continue;
4195
4196                 case succeed:
4197                 default:
4198                         abort();        /* We have listed all the cases.  */
4199                 }               /* switch *p++ */
4200
4201                 /* Getting here means we have found the possible starting
4202                    characters for one path of the pattern -- and that the empty
4203                    string does not match.  We need not follow this path further.
4204                    Instead, look at the next alternative (remembered on the
4205                    stack), or quit if no more.  The test at the top of the loop
4206                    does these things.  */
4207                 path_can_be_null = false;
4208                 p = pend;
4209         }                       /* while p */
4210
4211         /* Set `can_be_null' for the last path (also the first path, if the
4212            pattern is empty).  */
4213         bufp->can_be_null |= path_can_be_null;
4214
4215 done:
4216         RESET_FAIL_STACK();
4217         return 0;
4218 }       /* re_compile_fastmap */
4219 \f
4220 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
4221    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
4222    this memory for recording register information.  STARTS and ENDS
4223    must be allocated using the malloc library routine, and must each
4224    be at least NUM_REGS * sizeof (regoff_t) bytes long.
4225
4226    If NUM_REGS == 0, then subsequent matches should allocate their own
4227    register data.
4228
4229    Unless this function is called, the first search or match using
4230    PATTERN_BUFFER will allocate its own register data, without
4231    freeing the old data.  */
4232
4233 void
4234 re_set_registers(struct re_pattern_buffer *bufp, struct re_registers *regs,
4235                  unsigned num_regs, regoff_t * starts, regoff_t * ends)
4236 {
4237         if (num_regs) {
4238                 bufp->regs_allocated = REGS_REALLOCATE;
4239                 regs->num_regs = num_regs;
4240                 regs->start = starts;
4241                 regs->end = ends;
4242         } else {
4243                 bufp->regs_allocated = REGS_UNALLOCATED;
4244                 regs->num_regs = 0;
4245                 regs->start = regs->end = (regoff_t *) 0;
4246         }
4247 }
4248 \f
4249 /* Searching routines.  */
4250
4251 /* Like re_search_2, below, but only one string is specified, and
4252    doesn't let you say where to stop matching. */
4253
4254 int
4255 re_search(struct re_pattern_buffer *bufp, const char *string, int size,
4256           int startpos, int range, struct re_registers *regs)
4257 {
4258         return re_search_2(bufp, NULL, 0, string, size, startpos, range,
4259                            regs, size);
4260 }
4261
4262 #ifndef emacs
4263 /* Snarfed from src/lisp.h, needed for compiling [ce]tags. */
4264 # define bytecount_to_charcount(ptr, len) (len)
4265 # define charcount_to_bytecount(ptr, len) (len)
4266 typedef int Charcount;
4267 #endif
4268
4269 /* Using the compiled pattern in BUFP->buffer, first tries to match the
4270    virtual concatenation of STRING1 and STRING2, starting first at index
4271    STARTPOS, then at STARTPOS + 1, and so on.
4272
4273    With MULE, STARTPOS is a byte position, not a char position.  And the
4274    search will increment STARTPOS by the width of the current leading
4275    character.
4276
4277    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
4278
4279    RANGE is how far to scan while trying to match.  RANGE = 0 means try
4280    only at STARTPOS; in general, the last start tried is STARTPOS +
4281    RANGE.
4282
4283    With MULE, RANGE is a byte position, not a char position.  The last
4284    start tried is the character starting <= STARTPOS + RANGE.
4285
4286    In REGS, return the indices of the virtual concatenation of STRING1
4287    and STRING2 that matched the entire BUFP->buffer and its contained
4288    subexpressions.
4289
4290    Do not consider matching one past the index STOP in the virtual
4291    concatenation of STRING1 and STRING2.
4292
4293    We return either the position in the strings at which the match was
4294    found, -1 if no match, or -2 if error (such as failure
4295    stack overflow).  */
4296
4297 int
4298 re_search_2(struct re_pattern_buffer *bufp, const char *str1,
4299             int size1, const char *str2, int size2, int startpos,
4300             int range, struct re_registers *regs, int stop)
4301 {
4302         int val;
4303         re_char *string1 = (re_char *) str1;
4304         re_char *string2 = (re_char *) str2;
4305         REGISTER char *fastmap = bufp->fastmap;
4306         REGISTER RE_TRANSLATE_TYPE translate = bufp->translate;
4307         int total_size = size1 + size2;
4308         int endpos = startpos + range;
4309 #ifdef REGEX_BEGLINE_CHECK
4310         int anchored_at_begline = 0;
4311 #endif
4312         re_char *d;
4313         Charcount d_size;
4314
4315         /* Check for out-of-range STARTPOS.  */
4316         if (startpos < 0 || startpos > total_size)
4317                 return -1;
4318
4319         /* Fix up RANGE if it might eventually take us outside
4320            the virtual concatenation of STRING1 and STRING2.  */
4321         if (endpos < 0)
4322                 range = 0 - startpos;
4323         else if (endpos > total_size)
4324                 range = total_size - startpos;
4325
4326         /* If the search isn't to be a backwards one, don't waste time in a
4327            search for a pattern that must be anchored.  */
4328         if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf
4329             && range > 0) {
4330                 if (startpos > 0)
4331                         return -1;
4332                 else {
4333                         d = ((const unsigned char *)
4334                              (startpos >=
4335                               size1 ? string2 - size1 : string1) + startpos);
4336                         range = charcount_to_bytecount(d, 1);
4337                 }
4338         }
4339 #ifdef emacs
4340         /* In a forward search for something that starts with \=.
4341            don't keep searching past point.  */
4342         if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot
4343             && range > 0) {
4344                 range =
4345                     BUF_PT(regex_emacs_buffer) - BUF_BEGV(regex_emacs_buffer)
4346                     - startpos;
4347                 if (range < 0)
4348                         return -1;
4349         }
4350 #endif                          /* emacs */
4351
4352         /* Update the fastmap now if not correct already.  */
4353         if (fastmap && !bufp->fastmap_accurate)
4354                 if (re_compile_fastmap(bufp) == -2)
4355                         return -2;
4356
4357 #ifdef REGEX_BEGLINE_CHECK
4358         {
4359                 unsigned long i = 0;
4360
4361                 while (i < bufp->used) {
4362                         if (bufp->buffer[i] == start_memory ||
4363                             bufp->buffer[i] == stop_memory)
4364                                 i += 2;
4365                         else
4366                                 break;
4367                 }
4368                 anchored_at_begline = i < bufp->used
4369                     && bufp->buffer[i] == begline;
4370         }
4371 #endif
4372
4373 #ifdef emacs
4374         SETUP_SYNTAX_CACHE_FOR_OBJECT(regex_match_object,
4375                                       regex_emacs_buffer,
4376                                       SYNTAX_CACHE_OBJECT_BYTE_TO_CHAR
4377                                       (regex_match_object, regex_emacs_buffer,
4378                                        startpos), 1);
4379 #endif
4380
4381         /* Loop through the string, looking for a place to start matching.  */
4382         for (;;) {
4383 #ifdef REGEX_BEGLINE_CHECK
4384                 /* If the regex is anchored at the beginning of a line (i.e. with a ^),
4385                    then we can speed things up by skipping to the next beginning-of-
4386                    line. */
4387                 if (anchored_at_begline && startpos > 0 && startpos != size1 &&
4388                     range > 0) {
4389                         /* whose stupid idea was it anyway to make this
4390                            function take two strings to match?? */
4391                         int lim = 0;
4392                         int irange = range;
4393
4394                         if (startpos < size1 && startpos + range >= size1)
4395                                 lim = range - (size1 - startpos);
4396
4397                         d = ((const unsigned char *)
4398                              (startpos >=
4399                               size1 ? string2 - size1 : string1) + startpos);
4400                         DEC_CHARPTR(d); /* Ok, since startpos != size1. */
4401                         d_size = charcount_to_bytecount(d, 1);
4402
4403                         if (TRANSLATE_P(translate))
4404                                 while (range > lim && *d != '\n') {
4405                                         d += d_size;    /* Speedier INC_CHARPTR(d) */
4406                                         d_size = charcount_to_bytecount(d, 1);
4407                                         range -= d_size;
4408                         } else
4409                                 while (range > lim && *d != '\n') {
4410                                         d += d_size;    /* Speedier INC_CHARPTR(d) */
4411                                         d_size = charcount_to_bytecount(d, 1);
4412                                         range -= d_size;
4413                                 }
4414
4415                         startpos += irange - range;
4416                 }
4417 #endif                          /* REGEX_BEGLINE_CHECK */
4418
4419                 /* If a fastmap is supplied, skip quickly over characters that
4420                    cannot be the start of a match.  If the pattern can match the
4421                    null string, however, we don't need to skip characters; we want
4422                    the first null string.  */
4423                 if (fastmap && startpos < total_size && !bufp->can_be_null) {
4424                         if (range > 0) {        /* Searching forwards.  */
4425                                 int lim = 0;
4426                                 int irange = range;
4427
4428                                 if (startpos < size1
4429                                     && startpos + range >= size1)
4430                                         lim = range - (size1 - startpos);
4431
4432                                 d = ((const unsigned char *)
4433                                      (startpos >=
4434                                       size1 ? string2 - size1 : string1) +
4435                                      startpos);
4436
4437                                 /* Written out as an if-else to avoid testing `translate'
4438                                    inside the loop.  */
4439                                 if (TRANSLATE_P(translate))
4440                                         while (range > lim) {
4441 #ifdef MULE
4442                                                 Emchar buf_ch;
4443                                                 Bufbyte str[MAX_EMCHAR_LEN];
4444
4445                                                 buf_ch = charptr_emchar(d);
4446                                                 buf_ch = RE_TRANSLATE(buf_ch);
4447                                                 set_charptr_emchar(str, buf_ch);
4448                                                 if (buf_ch >= 0200
4449                                                     || fastmap[(unsigned char)
4450                                                                *str])
4451                                                         break;
4452 #else
4453                                                 if (fastmap
4454                                                     [(unsigned char)
4455                                                      RE_TRANSLATE(*d)])
4456                                                         break;
4457 #endif                          /* MULE */
4458                                                 d_size =
4459                                                     charcount_to_bytecount(d,
4460                                                                            1);
4461                                                 range -= d_size;
4462                                                 d += d_size;    /* Speedier INC_CHARPTR(d) */
4463                                 } else
4464                                         while (range > lim && !fastmap[*d]) {
4465                                                 d_size =
4466                                                     charcount_to_bytecount(d,
4467                                                                            1);
4468                                                 range -= d_size;
4469                                                 d += d_size;    /* Speedier INC_CHARPTR(d) */
4470                                         }
4471
4472                                 startpos += irange - range;
4473                         } else {        /* Searching backwards.  */
4474
4475                                 Emchar c = (size1 == 0 || startpos >= size1
4476                                             ? charptr_emchar(string2 +
4477                                                              startpos - size1)
4478                                             : charptr_emchar(string1 +
4479                                                              startpos));
4480                                 c = TRANSLATE(c);
4481 #ifdef MULE
4482                                 if (!(c >= 0200 || fastmap[(unsigned char)c]))
4483                                         goto advance;
4484 #else
4485                                 if (!fastmap[(unsigned char)c])
4486                                         goto advance;
4487 #endif
4488                         }
4489                 }
4490
4491                 /* If can't match the null string, and that's all we have left, fail.  */
4492                 if (range >= 0 && startpos == total_size && fastmap
4493                     && !bufp->can_be_null)
4494                         return -1;
4495
4496 #ifdef emacs                    /* XEmacs added, w/removal of immediate_quit */
4497                 if (!no_quit_in_re_search)
4498                         QUIT;
4499 #endif
4500                 val = re_match_2_internal(bufp, string1, size1, string2, size2,
4501                                           startpos, regs, stop);
4502 #ifndef REGEX_MALLOC
4503 #ifdef C_ALLOCA
4504                 alloca(0);
4505 #endif
4506 #endif
4507
4508                 if (val >= 0)
4509                         return startpos;
4510
4511                 if (val == -2)
4512                         return -2;
4513
4514               advance:
4515                 if (!range)
4516                         break;
4517                 else if (range > 0) {
4518                         d = ((const unsigned char *)
4519                              (startpos >=
4520                               size1 ? string2 - size1 : string1) + startpos);
4521                         d_size = charcount_to_bytecount(d, 1);
4522                         range -= d_size;
4523                         startpos += d_size;
4524                 } else {
4525                         /* Note startpos > size1 not >=.  If we are on the
4526                            string1/string2 boundary, we want to backup into string1. */
4527                         d = ((const unsigned char *)
4528                              (startpos >
4529                               size1 ? string2 - size1 : string1) + startpos);
4530                         DEC_CHARPTR(d);
4531                         d_size = charcount_to_bytecount(d, 1);
4532                         range += d_size;
4533                         startpos -= d_size;
4534                 }
4535         }
4536         return -1;
4537 }                               /* re_search_2 */
4538 \f
4539 /* Declarations and macros for re_match_2.  */
4540
4541 /* This converts PTR, a pointer into one of the search strings `string1'
4542    and `string2' into an offset from the beginning of that string.  */
4543 #define POINTER_TO_OFFSET(ptr)                  \
4544   (FIRST_STRING_P (ptr)                         \
4545    ? ((regoff_t) ((ptr) - string1))             \
4546    : ((regoff_t) ((ptr) - string2 + size1)))
4547
4548 /* Macros for dealing with the split strings in re_match_2.  */
4549
4550 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
4551
4552 /* Call before fetching a character with *d.  This switches over to
4553    string2 if necessary.  */
4554 #define REGEX_PREFETCH()                                                        \
4555   while (d == dend)                                                     \
4556     {                                                                   \
4557       /* End of string2 => fail.  */                                    \
4558       if (dend == end_match_2)                                          \
4559         goto fail;                                                      \
4560       /* End of string1 => advance to string2.  */                      \
4561       d = string2;                                                      \
4562       dend = end_match_2;                                               \
4563     }
4564
4565 /* Test if at very beginning or at very end of the virtual concatenation
4566    of `string1' and `string2'.  If only one string, it's `string2'.  */
4567 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
4568 #define AT_STRINGS_END(d) ((d) == end2)
4569
4570 /* XEmacs change:
4571    If the given position straddles the string gap, return the equivalent
4572    position that is before or after the gap, respectively; otherwise,
4573    return the same position. */
4574 #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d))
4575 #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d))
4576
4577 /* Test if CH is a word-constituent character. (XEmacs change) */
4578 #define WORDCHAR_P_UNSAFE(ch)                                              \
4579   (SYNTAX_UNSAFE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),   \
4580                                ch) == Sword)
4581
4582 /* Free everything we malloc.  */
4583 #ifdef MATCH_MAY_ALLOCATE
4584 #define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
4585 #define FREE_VARIABLES()                                                \
4586   do {                                                                  \
4587     REGEX_FREE_STACK (fail_stack.stack);                                \
4588     FREE_VAR (regstart);                                                \
4589     FREE_VAR (regend);                                                  \
4590     FREE_VAR (old_regstart);                                            \
4591     FREE_VAR (old_regend);                                              \
4592     FREE_VAR (best_regstart);                                           \
4593     FREE_VAR (best_regend);                                             \
4594     FREE_VAR (reg_info);                                                \
4595     FREE_VAR (reg_dummy);                                               \
4596     FREE_VAR (reg_info_dummy);                                          \
4597   } while (0)
4598 #else                           /* not MATCH_MAY_ALLOCATE */
4599 #define FREE_VARIABLES() ((void)0)      /* Do nothing!  But inhibit gcc warning.  */
4600 #endif                          /* MATCH_MAY_ALLOCATE */
4601
4602 /* These values must meet several constraints.  They must not be valid
4603    register values; since we have a limit of 255 registers (because
4604    we use only one byte in the pattern for the register number), we can
4605    use numbers larger than 255.  They must differ by 1, because of
4606    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
4607    be larger than the value for the highest register, so we do not try
4608    to actually save any registers when none are active.  */
4609 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
4610 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
4611 \f
4612 /* Matching routines.  */
4613
4614 #ifndef emacs                   /* Emacs never uses this.  */
4615 /* re_match is like re_match_2 except it takes only a single string.  */
4616
4617 int
4618 re_match(struct re_pattern_buffer *bufp, const char *string, int size,
4619          int pos, struct re_registers *regs)
4620 {
4621         int result =
4622             re_match_2_internal(bufp, NULL, 0, (re_char *) string, size,
4623                                 pos, regs, size);
4624         alloca(0);
4625         return result;
4626 }
4627 #endif                          /* not emacs */
4628
4629 /* re_match_2 matches the compiled pattern in BUFP against the
4630    (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and
4631    SIZE2, respectively).  We start matching at POS, and stop matching
4632    at STOP.
4633
4634    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
4635    store offsets for the substring each group matched in REGS.  See the
4636    documentation for exactly how many groups we fill.
4637
4638    We return -1 if no match, -2 if an internal error (such as the
4639    failure stack overflowing).  Otherwise, we return the length of the
4640    matched substring.  */
4641
4642 int
4643 re_match_2(struct re_pattern_buffer *bufp, const char *string1,
4644            int size1, const char *string2, int size2, int pos,
4645            struct re_registers *regs, int stop)
4646 {
4647         int result;
4648
4649 #ifdef emacs
4650         SETUP_SYNTAX_CACHE_FOR_OBJECT(regex_match_object,
4651                                       regex_emacs_buffer,
4652                                       SYNTAX_CACHE_OBJECT_BYTE_TO_CHAR
4653                                       (regex_match_object, regex_emacs_buffer,
4654                                        pos), 1);
4655 #endif
4656
4657         result = re_match_2_internal(bufp, (re_char *) string1, size1,
4658                                      (re_char *) string2, size2,
4659                                      pos, regs, stop);
4660
4661         alloca(0);
4662         return result;
4663 }
4664
4665 /* This is a separate function so that we can force an alloca cleanup
4666    afterwards.  */
4667 static int
4668 re_match_2_internal(struct re_pattern_buffer *bufp, re_char * string1,
4669                     int size1, re_char * string2, int size2, int pos,
4670                     struct re_registers *regs, int stop)
4671 {
4672         /* General temporaries.  */
4673         int mcnt;
4674         unsigned char *p1;
4675         int should_succeed;     /* XEmacs change */
4676
4677         /* Just past the end of the corresponding string.  */
4678         re_char *end1, *end2;
4679
4680         /* Pointers into string1 and string2, just past the last characters in
4681            each to consider matching.  */
4682         re_char *end_match_1, *end_match_2;
4683
4684         /* Where we are in the data, and the end of the current string.  */
4685         const re_char *d, *dend;
4686
4687         /* Where we are in the pattern, and the end of the pattern.  */
4688         unsigned char *p = bufp->buffer;
4689         REGISTER unsigned char *pend = p + bufp->used;
4690
4691         /* Mark the opcode just after a start_memory, so we can test for an
4692            empty subpattern when we get to the stop_memory.  */
4693         re_char *just_past_start_mem = 0;
4694
4695         /* We use this to map every character in the string.  */
4696         RE_TRANSLATE_TYPE translate = bufp->translate;
4697
4698         /* Failure point stack.  Each place that can handle a failure further
4699            down the line pushes a failure point on this stack.  It consists of
4700            restart, regend, and reg_info for all registers corresponding to
4701            the subexpressions we're currently inside, plus the number of such
4702            registers, and, finally, two char *'s.  The first char * is where
4703            to resume scanning the pattern; the second one is where to resume
4704            scanning the strings.  If the latter is zero, the failure point is
4705            a ``dummy''; if a failure happens and the failure point is a dummy,
4706            it gets discarded and the next one is tried.  */
4707 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, this is global.  */
4708         fail_stack_type fail_stack;
4709 #endif
4710 #ifdef REGEX_DEBUG_FLAG
4711         static unsigned failure_id;
4712         int nfailure_points_pushed = 0, nfailure_points_popped = 0;
4713 #endif
4714
4715 #ifdef REL_ALLOC
4716         /* This holds the pointer to the failure stack, when
4717            it is allocated relocatably.  */
4718         fail_stack_elt_t *failure_stack_ptr;
4719 #endif
4720
4721         /* We fill all the registers internally, independent of what we
4722            return, for use in backreferences.  The number here includes
4723            an element for register zero.  */
4724         int num_regs = bufp->re_ngroups + 1;
4725
4726         /* The currently active registers.  */
4727         int lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4728         int highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4729
4730         /* Information on the contents of registers. These are pointers into
4731            the input strings; they record just what was matched (on this
4732            attempt) by a subexpression part of the pattern, that is, the
4733            regnum-th regstart pointer points to where in the pattern we began
4734            matching and the regnum-th regend points to right after where we
4735            stopped matching the regnum-th subexpression.  (The zeroth register
4736            keeps track of what the whole pattern matches.)  */
4737 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, these are global.  */
4738         re_char **regstart, **regend;
4739 #endif
4740
4741         /* If a group that's operated upon by a repetition operator fails to
4742            match anything, then the register for its start will need to be
4743            restored because it will have been set to wherever in the string we
4744            are when we last see its open-group operator.  Similarly for a
4745            register's end.  */
4746 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, these are global.  */
4747         re_char **old_regstart, **old_regend;
4748 #endif
4749
4750         /* The is_active field of reg_info helps us keep track of which (possibly
4751            nested) subexpressions we are currently in. The matched_something
4752            field of reg_info[reg_num] helps us tell whether or not we have
4753            matched any of the pattern so far this time through the reg_num-th
4754            subexpression.  These two fields get reset each time through any
4755            loop their register is in.  */
4756 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, this is global.  */
4757         register_info_type *reg_info;
4758 #endif
4759
4760         /* The following record the register info as found in the above
4761            variables when we find a match better than any we've seen before.
4762            This happens as we backtrack through the failure points, which in
4763            turn happens only if we have not yet matched the entire string. */
4764         unsigned best_regs_set = false;
4765 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, these are global.  */
4766         re_char **best_regstart, **best_regend;
4767 #endif
4768
4769         /* Logically, this is `best_regend[0]'.  But we don't want to have to
4770            allocate space for that if we're not allocating space for anything
4771            else (see below).  Also, we never need info about register 0 for
4772            any of the other register vectors, and it seems rather a kludge to
4773            treat `best_regend' differently than the rest.  So we keep track of
4774            the end of the best match so far in a separate variable.  We
4775            initialize this to NULL so that when we backtrack the first time
4776            and need to test it, it's not garbage.  */
4777         re_char *match_end = NULL;
4778
4779         /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
4780         int set_regs_matched_done = 0;
4781
4782         /* Used when we pop values we don't care about.  */
4783 #ifdef MATCH_MAY_ALLOCATE       /* otherwise, these are global.  */
4784         re_char **reg_dummy;
4785         register_info_type *reg_info_dummy;
4786 #endif
4787
4788 #ifdef REGEX_DEBUG_FLAG
4789         /* Counts the total number of registers pushed.  */
4790         unsigned num_regs_pushed = 0;
4791 #endif
4792
4793         /* 1 if this match ends in the same string (string1 or string2)
4794            as the best previous match.  */
4795         re_bool same_str_p;
4796
4797         /* 1 if this match is the best seen so far.  */
4798         re_bool best_match_p;
4799
4800         DEBUG_PRINT1("\n\nEntering re_match_2.\n");
4801
4802         INIT_FAIL_STACK();
4803
4804 #ifdef MATCH_MAY_ALLOCATE
4805         /* Do not bother to initialize all the register variables if there are
4806            no groups in the pattern, as it takes a fair amount of time.  If
4807            there are groups, we include space for register 0 (the whole
4808            pattern), even though we never use it, since it simplifies the
4809            array indexing.  We should fix this.  */
4810         if (bufp->re_ngroups) {
4811                 regstart = REGEX_TALLOC(num_regs, re_char *);
4812                 regend = REGEX_TALLOC(num_regs, re_char *);
4813                 old_regstart = REGEX_TALLOC(num_regs, re_char *);
4814                 old_regend = REGEX_TALLOC(num_regs, re_char *);
4815                 best_regstart = REGEX_TALLOC(num_regs, re_char *);
4816                 best_regend = REGEX_TALLOC(num_regs, re_char *);
4817                 reg_info = REGEX_TALLOC(num_regs, register_info_type);
4818                 reg_dummy = REGEX_TALLOC(num_regs, re_char *);
4819                 reg_info_dummy = REGEX_TALLOC(num_regs, register_info_type);
4820
4821                 if (!
4822                     (regstart && regend && old_regstart && old_regend
4823                      && reg_info && best_regstart && best_regend && reg_dummy
4824                      && reg_info_dummy)) {
4825                         FREE_VARIABLES();
4826                         return -2;
4827                 }
4828         } else {
4829                 /* We must initialize all our variables to NULL, so that
4830                    `FREE_VARIABLES' doesn't try to free them.  */
4831                 regstart = regend = old_regstart = old_regend = best_regstart
4832                     = best_regend = reg_dummy = NULL;
4833                 reg_info = reg_info_dummy = (register_info_type *) NULL;
4834         }
4835 #endif                          /* MATCH_MAY_ALLOCATE */
4836
4837         /* The starting position is bogus.  */
4838         if (pos < 0 || pos > size1 + size2) {
4839                 FREE_VARIABLES();
4840                 return -1;
4841         }
4842
4843         /* Initialize subexpression text positions to -1 to mark ones that no
4844            start_memory/stop_memory has been seen for. Also initialize the
4845            register information struct.  */
4846         for (mcnt = 1; mcnt < num_regs; mcnt++) {
4847                 regstart[mcnt] = regend[mcnt]
4848                     = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
4849
4850                 REG_MATCH_NULL_STRING_P(reg_info[mcnt]) =
4851                     MATCH_NULL_UNSET_VALUE;
4852                 IS_ACTIVE(reg_info[mcnt]) = 0;
4853                 MATCHED_SOMETHING(reg_info[mcnt]) = 0;
4854                 EVER_MATCHED_SOMETHING(reg_info[mcnt]) = 0;
4855         }
4856         /* We move `string1' into `string2' if the latter's empty -- but not if
4857            `string1' is null.  */
4858         if (size2 == 0 && string1 != NULL) {
4859                 string2 = string1;
4860                 size2 = size1;
4861                 string1 = 0;
4862                 size1 = 0;
4863         }
4864         end1 = string1 + size1;
4865         end2 = string2 + size2;
4866
4867         /* Compute where to stop matching, within the two strings.  */
4868         if (stop <= size1) {
4869                 end_match_1 = string1 + stop;
4870                 end_match_2 = string2;
4871         } else {
4872                 end_match_1 = end1;
4873                 end_match_2 = string2 + stop - size1;
4874         }
4875
4876         /* `p' scans through the pattern as `d' scans through the data.
4877            `dend' is the end of the input string that `d' points within.  `d'
4878            is advanced into the following input string whenever necessary, but
4879            this happens before fetching; therefore, at the beginning of the
4880            loop, `d' can be pointing at the end of a string, but it cannot
4881            equal `string2'.  */
4882         if (size1 > 0 && pos <= size1) {
4883                 d = string1 + pos;
4884                 dend = end_match_1;
4885         } else {
4886                 d = string2 + pos - size1;
4887                 dend = end_match_2;
4888         }
4889
4890         DEBUG_PRINT1("The compiled pattern is: \n");
4891         DEBUG_PRINT_COMPILED_PATTERN(bufp, p, pend);
4892         DEBUG_PRINT1("The string to match is: `");
4893         DEBUG_PRINT_DOUBLE_STRING(d, string1, size1, string2, size2);
4894         DEBUG_PRINT1("'\n");
4895
4896         /* This loops over pattern commands.  It exits by returning from the
4897            function if the match is complete, or it drops through if the match
4898            fails at this starting point in the input data.  */
4899         for (;;) {
4900                 DEBUG_PRINT2("\n0x%lx: ", (long)p);
4901 #ifdef emacs                    /* XEmacs added, w/removal of immediate_quit */
4902                 if (!no_quit_in_re_search)
4903                         QUIT;
4904 #endif
4905
4906                 if (p == pend) {
4907                         /* End of pattern means we might have succeeded.  */
4908                         DEBUG_PRINT1("end of pattern ... ");
4909
4910                         /* If we haven't matched the entire string, and we want
4911                            the longest match, try backtracking.  */
4912                         if (d != end_match_2) {
4913                                 same_str_p = (FIRST_STRING_P(match_end)
4914                                               == MATCHING_IN_FIRST_STRING);
4915
4916                                 /* AIX compiler got confused when this was
4917                                    combined with the previous declaration.  */
4918                                 if (same_str_p)
4919                                         best_match_p = d > match_end;
4920                                 else
4921                                         best_match_p =
4922                                             !MATCHING_IN_FIRST_STRING;
4923
4924                                 DEBUG_PRINT1("backtracking.\n");
4925
4926                                 if (!FAIL_STACK_EMPTY()) {
4927                                         /* More failure points to try.  */
4928
4929                                         /* If exceeds best match so far, save
4930                                            it.  */
4931                                         if (!best_regs_set || best_match_p) {
4932                                                 best_regs_set = true;
4933                                                 match_end = d;
4934
4935                                                 DEBUG_PRINT1
4936                                                     ("\nSAVING match as best so far.\n");
4937
4938                                                 for (mcnt = 1; mcnt < num_regs;
4939                                                      mcnt++) {
4940                                                         best_regstart[mcnt] =
4941                                                             regstart[mcnt];
4942                                                         best_regend[mcnt] =
4943                                                             regend[mcnt];
4944                                                 }
4945                                         }
4946                                         goto fail;
4947                                 }
4948
4949                                 /* If no failure points, don't restore garbage.
4950                                    And if last match is real best match, don't
4951                                    restore second best one. */
4952                                 else if (best_regs_set && !best_match_p) {
4953                                       restore_best_regs:
4954                                         /* Restore best match.  It may happen
4955                                            that `dend == end_match_1' while the
4956                                            restored d is in string2.  For
4957                                            example, the pattern `x.*y.*z'
4958                                            against the strings `x-' and `y-z-',
4959                                            if the two strings are not
4960                                            consecutive in memory.  */
4961                                         DEBUG_PRINT1
4962                                             ("Restoring best registers.\n");
4963
4964                                         d = match_end;
4965                                         dend = ((d >= string1 && d <= end1)
4966                                                 ? end_match_1 : end_match_2);
4967
4968                                         for (mcnt = 1; mcnt < num_regs; mcnt++) {
4969                                                 regstart[mcnt] =
4970                                                     best_regstart[mcnt];
4971                                                 regend[mcnt] =
4972                                                     best_regend[mcnt];
4973                                         }
4974                                 }
4975                         }
4976                         /* d != end_match_2 */
4977                 succeed_label:
4978                         DEBUG_PRINT1("Accepting match.\n");
4979                         {
4980                                 int num_nonshy_regs = bufp->re_nsub + 1;
4981                                 /* If caller wants register contents data back,
4982                                    fill REGS.  */
4983                                 if (regs && !bufp->no_sub) {
4984                                         /* Have the register data arrays been
4985                                            allocated?  */
4986                                         if (bufp->regs_allocated == REGS_UNALLOCATED) {
4987                                           /* No.  So allocate them with malloc.
4988                                              We need one extra element beyond
4989                                              `num_regs' for the `-1' marker GNU
4990                                              code uses.  */
4991                                                 regs->num_regs = MAX(RE_NREGS, num_nonshy_regs + 1);
4992                                                 regs->start = TALLOC(regs->num_regs, regoff_t);
4993                                                 regs->end = TALLOC(regs->num_regs, regoff_t);
4994                                                 if (regs->start == NULL || regs->end == NULL) {
4995                                                         FREE_VARIABLES ();
4996                                                         return -2;
4997                                                 }
4998                                                 bufp->regs_allocated = REGS_REALLOCATE;
4999                                         } else if (bufp->regs_allocated == REGS_REALLOCATE) {
5000                                           /* Yes.  If we need more elements than were already
5001                                              allocated, reallocate them.  If we need fewer, just
5002                                              leave it alone.  */
5003                                                 if (regs->num_regs < num_nonshy_regs + 1) {
5004                                                         regs->num_regs = num_nonshy_regs + 1;
5005                                                         RETALLOC(regs->start, regs->num_regs, regoff_t);
5006                                                         RETALLOC(regs->end, regs->num_regs, regoff_t);
5007                                                         if (regs->start == NULL || regs->end == NULL) {
5008                                                                 FREE_VARIABLES();
5009                                                                 return -2;
5010                                                         }
5011                                                 }
5012                                         } else {
5013                                                 /* The braces fend off a "empty body in an else-statement"
5014                                                    warning under GCC when assert expands to nothing.  */
5015                                                 assert (bufp->regs_allocated == REGS_FIXED);
5016                                         }
5017
5018                                         /* Convert the pointer data in `regstart' and `regend' to
5019                                            indices.  Register zero has to be set differently,
5020                                            since we haven't kept track of any info for it.  */
5021                                         if (regs->num_regs > 0) {
5022                                                 regs->start[0] = pos;
5023                                                 regs->end[0] = (MATCHING_IN_FIRST_STRING
5024                                                                 ? ((regoff_t) (d - string1))
5025                                                                 : ((regoff_t) (d - string2 + size1)));
5026                                         }
5027
5028                                         /* Map over the NUM_NONSHY_REGS non-shy internal registers.
5029                                            Copy each into the corresponding external register.
5030                                            N.B. MCNT indexes external registers. */
5031                                         for (mcnt = 1;
5032                                              mcnt < MIN (num_nonshy_regs, regs->num_regs);
5033                                              mcnt++) {
5034                                                 int ireg = bufp->external_to_internal_register[mcnt];
5035
5036                                                 if (REG_UNSET (regstart[ireg]) || REG_UNSET (regend[ireg]))
5037                                                         regs->start[mcnt] = regs->end[mcnt] = -1;
5038                                                 else {
5039                                                         regs->start[mcnt]
5040                                                                 = (regoff_t) POINTER_TO_OFFSET (regstart[ireg]);
5041                                                         regs->end[mcnt]
5042                                                                 = (regoff_t) POINTER_TO_OFFSET (regend[ireg]);
5043                                                 }
5044                                         }
5045                                 } /* regs && !bufp->no_sub */
5046
5047                                 /* If we have regs and the regs structure has
5048                                    more elements than were in the pattern, set
5049                                    the extra elements to -1.  If we
5050                                    (re)allocated the registers, this is the
5051                                    case, because we always allocate enough to
5052                                    have at least one -1 at the end.
5053
5054                                    We do this even when no_sub is set because
5055                                    some applications (XEmacs) reuse register
5056                                    structures which may contain stale
5057                                    information, and permit attempts to access
5058                                    those registers.
5059
5060                                    It would be possible to require the caller to
5061                                    do this, but we'd have to change the API for
5062                                    this function to reflect that, and audit all
5063                                    callers. */
5064                                 if (regs && regs->num_regs > 0)
5065                                         for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++)
5066                                                 regs->start[mcnt] = regs->end[mcnt] = -1;
5067                         }
5068
5069                         DEBUG_PRINT4("%u failure points pushed, %u popped (%u remain).\n",
5070                                       nfailure_points_pushed, nfailure_points_popped,
5071                                       nfailure_points_pushed - nfailure_points_popped);
5072                         DEBUG_PRINT2("%u registers pushed.\n", num_regs_pushed);
5073
5074                         mcnt = d - pos - (MATCHING_IN_FIRST_STRING
5075                                           ? string1
5076                                           : string2 - size1);
5077
5078                         DEBUG_PRINT2("Returning %d from re_match_2.\n", mcnt);
5079
5080                         FREE_VARIABLES();
5081                         return mcnt;
5082                 }
5083
5084                 /* Otherwise match next pattern command.  */
5085                 switch (SWITCH_ENUM_CAST((re_opcode_t) *p++)) {
5086                         /* Ignore these.  Used to ignore the n of succeed_n's
5087                            which currently have n == 0.  */
5088                 case no_op:
5089                         DEBUG_PRINT1("EXECUTING no_op.\n");
5090                         break;
5091
5092                 case succeed:
5093                         DEBUG_PRINT1("EXECUTING succeed.\n");
5094                         goto succeed_label;
5095
5096                         /* Match the next n pattern characters exactly.  The
5097                            following byte in the pattern defines n, and the n
5098                            bytes after that are the characters to match.  */
5099                 case exactn:
5100                         mcnt = *p++;
5101                         DEBUG_PRINT2("EXECUTING exactn %d.\n", mcnt);
5102
5103                         /* This is written out as an if-else so we don't waste
5104                            time testing `translate' inside the loop.  */
5105                         if (TRANSLATE_P(translate)) {
5106                                 do {
5107 #ifdef MULE
5108                                         Emchar pat_ch, buf_ch;
5109                                         Bytecount pat_len;
5110
5111                                         REGEX_PREFETCH();
5112                                         pat_ch = charptr_emchar(p);
5113                                         buf_ch = charptr_emchar(d);
5114                                         if (RE_TRANSLATE(buf_ch) != pat_ch)
5115                                                 goto fail;
5116
5117                                         pat_len = charcount_to_bytecount(p, 1);
5118                                         p += pat_len;
5119                                         INC_CHARPTR(d);
5120
5121                                         mcnt -= pat_len;
5122 #else                           /* not MULE */
5123                                         REGEX_PREFETCH();
5124                                         if ((unsigned char)RE_TRANSLATE(*d++) !=
5125                                             *p++)
5126                                                 goto fail;
5127                                         mcnt--;
5128 #endif
5129                                 }
5130                                 while (mcnt > 0);
5131                         } else {
5132                                 do {
5133                                         REGEX_PREFETCH();
5134                                         if (*d++ != *p++)
5135                                                 goto fail;
5136                                 }
5137                                 while (--mcnt);
5138                         }
5139                         SET_REGS_MATCHED();
5140                         break;
5141
5142                         /* Match any character except possibly a newline or a
5143                            null.  */
5144                 case anychar:
5145                         DEBUG_PRINT1("EXECUTING anychar.\n");
5146
5147                         REGEX_PREFETCH();
5148
5149                         if ((!(bufp->syntax & RE_DOT_NEWLINE)
5150                              && TRANSLATE(*d) == '\n')
5151                             || (bufp->syntax & RE_DOT_NOT_NULL
5152                                 && TRANSLATE(*d) == '\000'))
5153                                 goto fail;
5154
5155                         SET_REGS_MATCHED();
5156                         DEBUG_PRINT2("  Matched `%d'.\n", *d);
5157                         INC_CHARPTR(d); /* XEmacs change */
5158                         break;
5159
5160                 case charset:
5161                 case charset_not: {
5162                         REGISTER unsigned char c;
5163                         re_bool not_p =
5164                                 (re_opcode_t) * (p - 1) == charset_not;
5165
5166                         DEBUG_PRINT2("EXECUTING charset%s.\n",
5167                                      not_p ? "_not" : "");
5168
5169                         REGEX_PREFETCH();
5170                         /* The character to match.  */
5171                         c = TRANSLATE(*d);
5172
5173                         /* Cast to `unsigned' instead of `unsigned char'
5174                            in case the bit list is a full 32 bytes
5175                            long.  */
5176                         if (c < (unsigned)(*p * BYTEWIDTH)
5177                             && p[1 +
5178                                  c / BYTEWIDTH] & (1 << (c %
5179                                                          BYTEWIDTH)))
5180                                 not_p = !not_p;
5181
5182                         p += 1 + *p;
5183
5184                         if (!not_p)
5185                                 goto fail;
5186
5187                         SET_REGS_MATCHED();
5188                         INC_CHARPTR(d); /* XEmacs change */
5189                         break;
5190                 }
5191
5192 #ifdef MULE
5193                 case charset_mule:
5194                 case charset_mule_not: {
5195                         REGISTER Emchar c;
5196                         re_bool not_p =
5197                                 (re_opcode_t) * (p - 1) == charset_mule_not;
5198
5199                         DEBUG_PRINT2("EXECUTING charset_mule%s.\n",
5200                                      not_p ? "_not" : "");
5201
5202                         REGEX_PREFETCH();
5203                         c = charptr_emchar((const Bufbyte *)d);
5204                         /* The character to match.  */
5205                         c = TRANSLATE(c);
5206
5207                         if (EQ
5208                             (Qt,
5209                              unified_range_table_lookup(p, c, Qnil)))
5210                                 not_p = !not_p;
5211
5212                         p += unified_range_table_bytes_used(p);
5213
5214                         if (!not_p)
5215                                 goto fail;
5216
5217                         SET_REGS_MATCHED();
5218                         INC_CHARPTR(d);
5219                         break;
5220                 }
5221 #endif                          /* MULE */
5222
5223                         /* The beginning of a group is represented by
5224                            start_memory.  The arguments are the register number
5225                            in the next byte, and the number of groups inner to
5226                            this one in the next.  The text matched within the
5227                            group is recorded (in the internal registers data
5228                            structure) under the register number.  */
5229                 case start_memory:
5230                         DEBUG_PRINT3("EXECUTING start_memory %d (%d):\n", *p,
5231                                      p[1]);
5232
5233                         /* Find out if this group can match the empty string.  */
5234                         p1 = p; /* To send to group_match_null_string_p.  */
5235
5236                         if (REG_MATCH_NULL_STRING_P(reg_info[*p]) ==
5237                             MATCH_NULL_UNSET_VALUE)
5238                                 REG_MATCH_NULL_STRING_P(reg_info[*p])
5239                                     = group_match_null_string_p(&p1, pend,
5240                                                                 reg_info);
5241
5242                         /* Save the position in the string where we were the
5243                            last time we were at this open-group operator in case
5244                            the group is operated upon by a repetition operator,
5245                            e.g., with `(a*)*b' against `ab'; then we want to
5246                            ignore where we are now in the string in case this
5247                            attempt to match fails.  */
5248                         old_regstart[*p] = REG_MATCH_NULL_STRING_P(reg_info[*p])
5249                             ? REG_UNSET(regstart[*p]) ? d : regstart[*p]
5250                             : regstart[*p];
5251                         DEBUG_PRINT2("  old_regstart: %d\n",
5252                                      POINTER_TO_OFFSET(old_regstart[*p]));
5253
5254                         regstart[*p] = d;
5255                         DEBUG_PRINT2("  regstart: %d\n",
5256                                      POINTER_TO_OFFSET(regstart[*p]));
5257
5258                         IS_ACTIVE(reg_info[*p]) = 1;
5259                         MATCHED_SOMETHING(reg_info[*p]) = 0;
5260
5261                         /* Clear this whenever we change the register activity
5262                            status.  */
5263                         set_regs_matched_done = 0;
5264
5265                         /* This is the new highest active register.  */
5266                         highest_active_reg = *p;
5267
5268                         /* If nothing was active before, this is the new lowest
5269                            active register.  */
5270                         if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
5271                                 lowest_active_reg = *p;
5272
5273                         /* Move past the register number and inner group
5274                            count.  */
5275                         p += 2;
5276                         just_past_start_mem = p;
5277
5278                         break;
5279
5280                         /* The stop_memory opcode represents the end of a group.
5281                            Its arguments are the same as start_memory's: the
5282                            register number, and the number of inner groups.  */
5283                 case stop_memory:
5284                         DEBUG_PRINT3("EXECUTING stop_memory %d (%d):\n", *p,
5285                                      p[1]);
5286
5287                         /* We need to save the string position the last time we
5288                            were at this close-group operator in case the group
5289                            is operated upon by a repetition operator, e.g., with
5290                            `((a*)*(b*)*)*' against `aba'; then we want to ignore
5291                            where we are now in the string in case this attempt
5292                            to match fails.  */
5293                         old_regend[*p] = REG_MATCH_NULL_STRING_P(reg_info[*p])
5294                             ? REG_UNSET(regend[*p]) ? d : regend[*p]
5295                             : regend[*p];
5296                         DEBUG_PRINT2("      old_regend: %d\n",
5297                                      POINTER_TO_OFFSET(old_regend[*p]));
5298
5299                         regend[*p] = d;
5300                         DEBUG_PRINT2("      regend: %d\n",
5301                                      POINTER_TO_OFFSET(regend[*p]));
5302
5303                         /* This register isn't active anymore.  */
5304                         IS_ACTIVE(reg_info[*p]) = 0;
5305
5306                         /* Clear this whenever we change the register activity
5307                            status.  */
5308                         set_regs_matched_done = 0;
5309
5310                         /* If this was the only register active, nothing is
5311                            active anymore.  */
5312                         if (lowest_active_reg == highest_active_reg) {
5313                                 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
5314                                 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
5315                         } else {        /* We must scan for the new highest
5316                                            active register, since it isn't
5317                                            necessarily one less than now:
5318                                            consider (a(b)c(d(e)f)g).  When group
5319                                            3 ends, after the f), the new highest
5320                                            active register is 1.  */
5321                                 unsigned char r = *p - 1;
5322                                 while (r > 0 && !IS_ACTIVE(reg_info[r]))
5323                                         r--;
5324
5325                                 /* If we end up at register zero, that means
5326                                    that we saved the registers as the result of
5327                                    an `on_failure_jump', not a `start_memory',
5328                                    and we jumped to past the innermost
5329                                    `stop_memory'.  For example, in ((.)*) we
5330                                    save registers 1 and 2 as a result of the *,
5331                                    but when we pop back to the second ), we are
5332                                    at the stop_memory 1.  Thus, nothing is
5333                                    active.  */
5334                                 if (r == 0) {
5335                                         lowest_active_reg =
5336                                             NO_LOWEST_ACTIVE_REG;
5337                                         highest_active_reg =
5338                                             NO_HIGHEST_ACTIVE_REG;
5339                                 } else {
5340                                         highest_active_reg = r;
5341
5342                                         /* 98/9/21 jhod: We've also gotta set
5343                                            lowest_active_reg, don't we? */
5344                                         r = 1;
5345                                         while (r < highest_active_reg
5346                                                && !IS_ACTIVE(reg_info[r]))
5347                                                 r++;
5348                                         lowest_active_reg = r;
5349                                 }
5350                         }
5351
5352                         /* If just failed to match something this time around
5353                            with a group that's operated on by a repetition
5354                            operator, try to force exit from the ``loop'', and
5355                            restore the register information for this group that
5356                            we had before trying this last match.  */
5357                         if ((!MATCHED_SOMETHING(reg_info[*p])
5358                              || just_past_start_mem == p - 1)
5359                             && (p + 2) < pend) {
5360                                 re_bool is_a_jump_n = false;
5361
5362                                 p1 = p + 2;
5363                                 mcnt = 0;
5364                                 switch ((unsigned int)(re_opcode_t)*p1++) {
5365                                 case jump_n:
5366                                         is_a_jump_n = true;
5367                                 case pop_failure_jump:
5368                                 case maybe_pop_jump:
5369                                 case jump:
5370                                 case dummy_failure_jump:
5371                                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
5372                                         if (is_a_jump_n)
5373                                                 p1 += 2;
5374                                         break;
5375
5376                                 default:
5377                                         /* do nothing */ ;
5378                                 }
5379                                 p1 += mcnt;
5380
5381                                 /* If the next operation is a jump backwards in
5382                                    the pattern to an on_failure_jump right
5383                                    before the start_memory corresponding to this
5384                                    stop_memory, exit from the loop by forcing a
5385                                    failure after pushing on the stack the
5386                                    on_failure_jump's jump in the pattern, and
5387                                    d.  */
5388                                 if (mcnt < 0
5389                                     && (re_opcode_t) * p1 == on_failure_jump
5390                                     && (re_opcode_t) p1[3] == start_memory
5391                                     && p1[4] == *p) {
5392                                         /* If this group ever matched anything,
5393                                            then restore what its registers were
5394                                            before trying this last failed match,
5395                                            e.g., with `(a*)*b' against `ab' for
5396                                            regstart[1], and, e.g., with
5397                                            `((a*)*(b*)*)*' against `aba' for
5398                                            regend[3].
5399
5400                                            Also restore the registers for inner
5401                                            groups for, e.g., `((a*)(b*))*'
5402                                            against `aba' (register 3 would
5403                                            otherwise get trashed).  */
5404
5405                                         if (EVER_MATCHED_SOMETHING
5406                                             (reg_info[*p])) {
5407                                                 int r;
5408
5409                                                 EVER_MATCHED_SOMETHING(reg_info
5410                                                                        [*p]) =
5411                                                     0;
5412
5413                                                 /* Restore this and inner
5414                                                    groups' (if any)
5415                                                    registers.  */
5416                                                 for (r = *p; r < *p + *(p + 1);
5417                                                      r++) {
5418                                                         regstart[r] =
5419                                                             old_regstart[r];
5420
5421                                                         /* xx why this test?  */
5422                                                         if (old_regend[r] >=
5423                                                             regstart[r])
5424                                                                 regend[r] =
5425                                                                     old_regend
5426                                                                     [r];
5427                                                 }
5428                                         }
5429                                         p1++;
5430                                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
5431                                         PUSH_FAILURE_POINT(p1 + mcnt, d, -2);
5432
5433                                         goto fail;
5434                                 }
5435                         }
5436
5437                         /* Move past the register number and the inner group
5438                            count.  */
5439                         p += 2;
5440                         break;
5441
5442                         /* \<digit> has been turned into a `duplicate' command
5443                            which is followed by the numeric value of <digit> as
5444                            the register number.  (Already passed through
5445                            external-to-internal-register mapping, so it refers
5446                            to the actual group number, not the non-shy-only
5447                            numbering used in the external world.) */
5448                 case duplicate:
5449                         {
5450                                 REGISTER re_char *d2, *dend2;
5451                                 /* Get which register to match against.  */
5452                                 int regno = *p++;
5453                                 DEBUG_PRINT2("EXECUTING duplicate %d.\n",
5454                                              regno);
5455
5456                                 /* Can't back reference a group which we've
5457                                    never matched.  */
5458                                 if (REG_UNSET(regstart[regno])
5459                                     || REG_UNSET(regend[regno]))
5460                                         goto fail;
5461
5462                                 /* Where in input to try to start matching.  */
5463                                 d2 = regstart[regno];
5464
5465                                 /* Where to stop matching; if both the place to
5466                                    start and the place to stop matching are in
5467                                    the same string, then set to the place to
5468                                    stop, otherwise, for now have to use the end
5469                                    of the first string.  */
5470
5471                                 dend2 = ((FIRST_STRING_P(regstart[regno])
5472                                           == FIRST_STRING_P(regend[regno]))
5473                                          ? regend[regno] : end_match_1);
5474                                 for (;;) {
5475                                         /* If necessary, advance to next segment
5476                                            in register contents.  */
5477                                         while (d2 == dend2) {
5478                                                 if (dend2 == end_match_2)
5479                                                         break;
5480                                                 if (dend2 == regend[regno])
5481                                                         break;
5482
5483                                                 /* End of string1 => advance to
5484                                                    string2. */
5485                                                 d2 = string2;
5486                                                 dend2 = regend[regno];
5487                                         }
5488                                         /* At end of register contents =>
5489                                            success */
5490                                         if (d2 == dend2)
5491                                                 break;
5492
5493                                         /* If necessary, advance to next segment
5494                                            in data.  */
5495                                         REGEX_PREFETCH();
5496
5497                                         /* How many characters left in this segment to match.  */
5498                                         mcnt = dend - d;
5499
5500                                         /* Want how many consecutive characters
5501                                            we can match in one shot, so, if
5502                                            necessary, adjust the count.  */
5503                                         if (mcnt > dend2 - d2)
5504                                                 mcnt = dend2 - d2;
5505
5506                                         /* Compare that many; failure if
5507                                            mismatch, else move past them.  */
5508                                         if (TRANSLATE_P(translate)
5509                                             ? bcmp_translate(
5510                                                     (const unsigned char*)d,
5511                                                     (const unsigned char*)d2,
5512                                                     mcnt, translate)
5513                                             : memcmp(d, d2, mcnt)) {
5514                                                 goto fail;
5515                                         }
5516                                         d += mcnt, d2 += mcnt;
5517
5518                                         /* Do this because we've match some
5519                                            characters.  */
5520                                         SET_REGS_MATCHED();
5521                                 }
5522                         }
5523                         break;
5524
5525                         /* begline matches the empty string at the beginning of
5526                            the string (unless `not_bol' is set in `bufp'), and,
5527                            if `newline_anchor' is set, after newlines.  */
5528                 case begline:
5529                         DEBUG_PRINT1("EXECUTING begline.\n");
5530
5531                         if (AT_STRINGS_BEG(d)) {
5532                                 if (!bufp->not_bol)
5533                                         break;
5534                         } else if (d[-1] == '\n' && bufp->newline_anchor) {
5535                                 break;
5536                         }
5537                         /* In all other cases, we fail.  */
5538                         goto fail;
5539
5540                         /* endline is the dual of begline.  */
5541                 case endline:
5542                         DEBUG_PRINT1("EXECUTING endline.\n");
5543
5544                         if (AT_STRINGS_END(d)) {
5545                                 if (!bufp->not_eol)
5546                                         break;
5547                         }
5548
5549                         /* We have to ``prefetch'' the next character.  */
5550                         else if ((d == end1 ? *string2 : *d) == '\n'
5551                                  && bufp->newline_anchor) {
5552                                 break;
5553                         }
5554                         goto fail;
5555
5556                         /* Match at the very beginning of the data.  */
5557                 case begbuf:
5558                         DEBUG_PRINT1("EXECUTING begbuf.\n");
5559                         if (AT_STRINGS_BEG(d))
5560                                 break;
5561                         goto fail;
5562
5563                         /* Match at the very end of the data.  */
5564                 case endbuf:
5565                         DEBUG_PRINT1("EXECUTING endbuf.\n");
5566                         if (AT_STRINGS_END(d))
5567                                 break;
5568                         goto fail;
5569
5570                         /* on_failure_keep_string_jump is used to optimize
5571                            `.*\n'.  It pushes NULL as the value for the string
5572                            on the stack.  Then `pop_failure_point' will keep the
5573                            current value for the string, instead of restoring
5574                            it.  To see why, consider matching `foo\nbar' against
5575                            `.*\n'.  The .* matches the foo; then the . fails
5576                            against the \n.  But the next thing we want to do is
5577                            match the \n against the \n; if we restored the
5578                            string value, we would be back at the foo.
5579
5580                            Because this is used only in specific cases, we don't
5581                            need to check all the things that `on_failure_jump'
5582                            does, to make sure the right things get saved on the
5583                            stack.  Hence we don't share its code.  The only
5584                            reason to push anything on the stack at all is that
5585                            otherwise we would have to change `anychar's code to
5586                            do something besides goto fail in this case; that
5587                            seems worse than this.  */
5588                 case on_failure_keep_string_jump:
5589                         DEBUG_PRINT1("EXECUTING on_failure_keep_string_jump");
5590
5591                         EXTRACT_NUMBER_AND_INCR(mcnt, p);
5592                         DEBUG_PRINT3(" %d (to 0x%lx):\n", mcnt,
5593                                      (long)(p + mcnt));
5594
5595                         PUSH_FAILURE_POINT(p + mcnt, (unsigned char *)0, -2);
5596                         break;
5597
5598                         /* Uses of on_failure_jump:
5599
5600                            Each alternative starts with an on_failure_jump that
5601                            points to the beginning of the next alternative.
5602                            Each alternative except the last ends with a jump
5603                            that in effect jumps past the rest of the
5604                            alternatives.  (They really jump to the ending jump
5605                            of the following alternative, because tensioning
5606                            these jumps is a hassle.)
5607
5608                            Repeats start with an on_failure_jump that points
5609                            past both the repetition text and either the
5610                            following jump or pop_failure_jump back to this
5611                            on_failure_jump.  */
5612                 case on_failure_jump:
5613                       on_failure:
5614                         DEBUG_PRINT1("EXECUTING on_failure_jump");
5615
5616                         EXTRACT_NUMBER_AND_INCR(mcnt, p);
5617                         DEBUG_PRINT3(" %d (to 0x%lx)", mcnt, (long)(p + mcnt));
5618
5619                         /* If this on_failure_jump comes right before a group
5620                            (i.e., the original * applied to a group), save the
5621                            information for that group and all inner ones, so
5622                            that if we fail back to this point, the group's
5623                            information will be correct.  For example, in
5624                            \(a*\)*\1, we need the preceding group, and in
5625                            \(\(a*\)b*\)\2, we need the inner group.  */
5626
5627                         /* We can't use `p' to check ahead because we push
5628                            a failure point to `p + mcnt' after we do this.  */
5629                         p1 = p;
5630
5631                         /* We need to skip no_op's before we look for the
5632                            start_memory in case this on_failure_jump is
5633                            happening as the result of a completed succeed_n, as
5634                            in \(a\)\{1,3\}b\1 against aba.  */
5635                         while (p1 < pend && (re_opcode_t) * p1 == no_op)
5636                                 p1++;
5637
5638                         if (p1 < pend && (re_opcode_t) * p1 == start_memory) {
5639                                 /* We have a new highest active register now.
5640                                    This will get reset at the start_memory we
5641                                    are about to get to, but we will have saved
5642                                    all the registers relevant to this repetition
5643                                    op, as described above.  */
5644                                 highest_active_reg = *(p1 + 1) + *(p1 + 2);
5645                                 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
5646                                         lowest_active_reg = *(p1 + 1);
5647                         }
5648
5649                         DEBUG_PRINT1(":\n");
5650                         PUSH_FAILURE_POINT(p + mcnt, d, -2);
5651                         break;
5652
5653                         /* A smart repeat ends with `maybe_pop_jump'.
5654                            We change it to either `pop_failure_jump' or `jump'.  */
5655                 case maybe_pop_jump:
5656                         EXTRACT_NUMBER_AND_INCR(mcnt, p);
5657                         DEBUG_PRINT2("EXECUTING maybe_pop_jump %d.\n", mcnt);
5658                         {
5659                                 REGISTER const unsigned char *p2 = p;
5660
5661                                 /* Compare the beginning of the repeat with what
5662                                    in the pattern follows its end. If we can
5663                                    establish that there is nothing that they
5664                                    would both match, i.e., that we would have to
5665                                    backtrack because of (as in, e.g., `a*a')
5666                                    then we can change to pop_failure_jump,
5667                                    because we'll never have to backtrack.
5668
5669                                    This is not true in the case of alternatives:
5670                                    in `(a|ab)*' we do need to backtrack to the
5671                                    `ab' alternative (e.g., if the string was
5672                                    `ab').  But instead of trying to detect that
5673                                    here, the alternative has put on a dummy
5674                                    failure point which is what we will end up
5675                                    popping.  */
5676
5677                                 /* Skip over open/close-group commands.  If what
5678                                    follows this loop is a ...+ construct, look
5679                                    at what begins its body, since we will have
5680                                    to match at least one of that.  */
5681                                 while (1) {
5682                                         if (p2 + 2 < pend
5683                                             && ((re_opcode_t) * p2 ==
5684                                                 stop_memory
5685                                                 || (re_opcode_t) * p2 ==
5686                                                 start_memory))
5687                                                 p2 += 3;
5688                                         else if (p2 + 6 < pend
5689                                                  && (re_opcode_t) * p2 ==
5690                                                  dummy_failure_jump)
5691                                                 p2 += 6;
5692                                         else
5693                                                 break;
5694                                 }
5695
5696                                 p1 = p + mcnt;
5697                                 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
5698                                    to the `maybe_finalize_jump' of this case.  Examine what
5699                                    follows.  */
5700
5701                                 /* If we're at the end of the pattern, we can change.  */
5702                                 if (p2 == pend) {
5703                                         /* Consider what happens when matching ":\(.*\)"
5704                                            against ":/".  I don't really understand this code
5705                                            yet.  */
5706                                         p[-3] = (unsigned char)pop_failure_jump;
5707                                         DEBUG_PRINT1
5708                                             ("  End of pattern: change to `pop_failure_jump'.\n");
5709                                 }
5710
5711                                 else if ((re_opcode_t) * p2 == exactn
5712                                          || (bufp->newline_anchor
5713                                              && (re_opcode_t) * p2 ==
5714                                              endline)) {
5715                                         REGISTER unsigned char c =
5716                                             *p2 ==
5717                                             (unsigned char)endline ? '\n' :
5718                                             p2[2];
5719
5720                                         if ((re_opcode_t) p1[3] == exactn
5721                                             && p1[5] != c) {
5722                                                 p[-3] =
5723                                                     (unsigned char)
5724                                                     pop_failure_jump;
5725                                                 DEBUG_PRINT3
5726                                                     ("  %c != %c => pop_failure_jump.\n",
5727                                                      c, p1[5]);
5728                                         }
5729
5730                                         else if ((re_opcode_t) p1[3] == charset
5731                                                  || (re_opcode_t) p1[3] ==
5732                                                  charset_not) {
5733                                                 int not_p =
5734                                                     (re_opcode_t) p1[3] ==
5735                                                     charset_not;
5736
5737                                                 if (c <
5738                                                     (unsigned char)(p1[4] *
5739                                                                     BYTEWIDTH)
5740                                                     && p1[5 +
5741                                                           c /
5742                                                           BYTEWIDTH] & (1 << (c
5743                                                                               %
5744                                                                               BYTEWIDTH)))
5745                                                         not_p = !not_p;
5746
5747                                                 /* `not_p' is equal to 1 if c would match, which means
5748                                                    that we can't change to pop_failure_jump.  */
5749                                                 if (!not_p) {
5750                                                         p[-3] =
5751                                                             (unsigned char)
5752                                                             pop_failure_jump;
5753                                                         DEBUG_PRINT1
5754                                                             ("  No match => pop_failure_jump.\n");
5755                                                 }
5756                                         }
5757                                 } else if ((re_opcode_t) * p2 == charset) {
5758 #ifdef REGEX_DEBUG_FLAG
5759                                         REGISTER unsigned char c
5760                                             =
5761                                             *p2 ==
5762                                             (unsigned char)endline ? '\n' :
5763                                             p2[2];
5764 #endif
5765
5766                                         if ((re_opcode_t) p1[3] == exactn
5767                                             && !((int)p2[1] * BYTEWIDTH >
5768                                                  (int)p1[5]
5769                                                  && (p2[2 + p1[5] / BYTEWIDTH]
5770                                                      & (1 <<
5771                                                         (p1[5] %
5772                                                          BYTEWIDTH))))) {
5773                                                 p[-3] =
5774                                                     (unsigned char)
5775                                                     pop_failure_jump;
5776                                                 DEBUG_PRINT3
5777                                                     ("  %c != %c => pop_failure_jump.\n",
5778                                                      c, p1[5]);
5779                                         }
5780
5781                                         else if ((re_opcode_t) p1[3] ==
5782                                                  charset_not) {
5783                                                 int idx;
5784                                                 /* We win if the charset_not inside the loop
5785                                                    lists every character listed in the charset after.  */
5786                                                 for (idx = 0; idx < (int)p2[1];
5787                                                      idx++)
5788                                                         if (!
5789                                                             (p2[2 + idx] == 0
5790                                                              || (idx <
5791                                                                  (int)p1[4]
5792                                                                  &&
5793                                                                  ((p2[2 + idx] &
5794                                                                    ~p1[5 +
5795                                                                        idx]) ==
5796                                                                   0))))
5797                                                                 break;
5798
5799                                                 if (idx == p2[1]) {
5800                                                         p[-3] =
5801                                                             (unsigned char)
5802                                                             pop_failure_jump;
5803                                                         DEBUG_PRINT1
5804                                                             ("  No match => pop_failure_jump.\n");
5805                                                 }
5806                                         } else if ((re_opcode_t) p1[3] ==
5807                                                    charset) {
5808                                                 int idx;
5809                                                 /* We win if the charset inside the loop
5810                                                    has no overlap with the one after the loop.  */
5811                                                 for (idx = 0;
5812                                                      idx < (int)p2[1]
5813                                                      && idx < (int)p1[4]; idx++)
5814                                                         if ((p2[2 + idx] &
5815                                                              p1[5 + idx]) != 0)
5816                                                                 break;
5817
5818                                                 if (idx == p2[1]
5819                                                     || idx == p1[4]) {
5820                                                         p[-3] =
5821                                                             (unsigned char)
5822                                                             pop_failure_jump;
5823                                                         DEBUG_PRINT1
5824                                                             ("  No match => pop_failure_jump.\n");
5825                                                 }
5826                                         }
5827                                 }
5828                         }
5829                         p -= 2; /* Point at relative address again.  */
5830                         if ((re_opcode_t) p[-1] != pop_failure_jump) {
5831                                 p[-1] = (unsigned char)jump;
5832                                 DEBUG_PRINT1("  Match => jump.\n");
5833                                 goto unconditional_jump;
5834                         }
5835                         /* Note fall through.  */
5836
5837                         /* The end of a simple repeat has a pop_failure_jump
5838                            back to its matching on_failure_jump, where the
5839                            latter will push a failure point.  The
5840                            pop_failure_jump takes off failure points put on by
5841                            this pop_failure_jump's matching on_failure_jump; we
5842                            got through the pattern to here from the matching
5843                            on_failure_jump, so didn't fail.  */
5844                 case pop_failure_jump: {
5845                         /* We need to pass separate storage for the
5846                            lowest and highest registers, even though we
5847                            don't care about the actual values.
5848                            Otherwise, we will restore only one register
5849                            from the stack, since lowest will == highest
5850                            in `pop_failure_point'.  */
5851                         int dummy_low_reg, dummy_high_reg;
5852                         const unsigned char *pdummy = NULL;
5853                         const unsigned char *sdummy = NULL;
5854
5855                         DEBUG_PRINT1("EXECUTING pop_failure_jump.\n");
5856                         POP_FAILURE_POINT(sdummy, pdummy,
5857                                           dummy_low_reg, dummy_high_reg,
5858                                           reg_dummy, reg_dummy,
5859                                           reg_info_dummy);
5860                         SXE_SET_UNUSED(pdummy), SXE_SET_UNUSED(sdummy);
5861                 }
5862                         /* Note fall through.  */
5863
5864                         /* Unconditionally jump (without popping any failure
5865                            points).  */
5866                 case jump:
5867                 unconditional_jump:
5868                         /* Get the amount to jump. */
5869                         EXTRACT_NUMBER_AND_INCR(mcnt, p);
5870                         DEBUG_PRINT2("EXECUTING jump %d ", mcnt);
5871                         p += mcnt;      /* Do the jump.  */
5872                         DEBUG_PRINT2("(to 0x%lx).\n", (long)p);
5873                         break;
5874
5875                         /* We need this opcode so we can detect where alternatives end
5876                            in `group_match_null_string_p' et al.  */
5877                 case jump_past_alt:
5878                         DEBUG_PRINT1("EXECUTING jump_past_alt.\n");
5879                         goto unconditional_jump;
5880
5881                         /* Normally, the on_failure_jump pushes a failure point, which
5882                            then gets popped at pop_failure_jump.  We will end up at
5883                            pop_failure_jump, also, and with a pattern of, say, `a+', we
5884                            are skipping over the on_failure_jump, so we have to push
5885                            something meaningless for pop_failure_jump to pop.  */
5886                 case dummy_failure_jump:
5887                         DEBUG_PRINT1("EXECUTING dummy_failure_jump.\n");
5888                         /* It doesn't matter what we push for the string here.  What
5889                            the code at `fail' tests is the value for the pattern.  */
5890                         PUSH_FAILURE_POINT(NULL, NULL, -2);
5891                         goto unconditional_jump;
5892
5893                         /* At the end of an alternative, we need to push a dummy failure
5894                            point in case we are followed by a `pop_failure_jump', because
5895                            we don't want the failure point for the alternative to be
5896                            popped.  For example, matching `(a|ab)*' against `aab'
5897                            requires that we match the `ab' alternative.  */
5898                 case push_dummy_failure:
5899                         DEBUG_PRINT1("EXECUTING push_dummy_failure.\n");
5900                         /* See comments just above at `dummy_failure_jump' about the
5901                            two zeroes.  */
5902                         PUSH_FAILURE_POINT(NULL, NULL, -2);
5903                         break;
5904
5905                         /* Have to succeed matching what follows at least n times.
5906                            After that, handle like `on_failure_jump'.  */
5907                 case succeed_n:
5908                         EXTRACT_NUMBER(mcnt, p + 2);
5909                         DEBUG_PRINT2("EXECUTING succeed_n %d.\n", mcnt);
5910
5911                         assert(mcnt >= 0);
5912                         /* Originally, this is how many times we HAVE to succeed.  */
5913                         if (mcnt > 0) {
5914                                 mcnt--;
5915                                 p += 2;
5916                                 STORE_NUMBER_AND_INCR(p, mcnt);
5917                                 DEBUG_PRINT3("  Setting 0x%lx to %d.\n",
5918                                              (long)p, mcnt);
5919                         } else if (mcnt == 0) {
5920                                 DEBUG_PRINT2
5921                                     ("  Setting two bytes from 0x%lx to no_op.\n",
5922                                      (long)(p + 2));
5923                                 p[2] = (unsigned char)no_op;
5924                                 p[3] = (unsigned char)no_op;
5925                                 goto on_failure;
5926                         }
5927                         break;
5928
5929                 case jump_n:
5930                         EXTRACT_NUMBER(mcnt, p + 2);
5931                         DEBUG_PRINT2("EXECUTING jump_n %d.\n", mcnt);
5932
5933                         /* Originally, this is how many times we CAN jump.  */
5934                         if (mcnt) {
5935                                 mcnt--;
5936                                 STORE_NUMBER(p + 2, mcnt);
5937                                 goto unconditional_jump;
5938                         }
5939                         /* If don't have to jump any more, skip over the rest of command.  */
5940                         else
5941                                 p += 4;
5942                         break;
5943
5944                 case set_number_at:
5945                         {
5946                                 DEBUG_PRINT1("EXECUTING set_number_at.\n");
5947
5948                                 EXTRACT_NUMBER_AND_INCR(mcnt, p);
5949                                 p1 = p + mcnt;
5950                                 EXTRACT_NUMBER_AND_INCR(mcnt, p);
5951                                 DEBUG_PRINT3("  Setting 0x%lx to %d.\n",
5952                                              (long)p1, mcnt);
5953                                 STORE_NUMBER(p1, mcnt);
5954                                 break;
5955                         }
5956
5957                 case wordbound:
5958                         DEBUG_PRINT1("EXECUTING wordbound.\n");
5959                         should_succeed = 1;
5960                       matchwordbound:
5961                         {
5962                                 /* XEmacs change */
5963                                 /* Straightforward and (I hope) correct implementation.
5964                                    Probably should be optimized by arranging to compute
5965                                    pos only once. */
5966                                 /* emch1 is the character before d, syn1 is the syntax of
5967                                    emch1, emch2 is the character at d, and syn2 is the
5968                                    syntax of emch2. */
5969                                 Emchar emch1, emch2;
5970                                 /* GCC isn't smart enough to see these are initialized if used. */
5971                                 int syn1 = 0, syn2 = 0;
5972                                 re_char *d_before, *d_after;
5973                                 int result,
5974                                     at_beg = AT_STRINGS_BEG(d),
5975                                     at_end = AT_STRINGS_END(d);
5976 #ifdef emacs
5977                                 int xpos;
5978 #endif
5979
5980                                 if (at_beg && at_end) {
5981                                         result = 0;
5982                                 } else {
5983                                         if (!at_beg) {
5984                                                 d_before =
5985                                                     POS_BEFORE_GAP_UNSAFE(d);
5986                                                 DEC_CHARPTR(d_before);
5987                                                 emch1 =
5988                                                     charptr_emchar(d_before);
5989 #ifdef emacs
5990                                                 xpos =
5991                                                     SYNTAX_CACHE_BYTE_TO_CHAR
5992                                                     (PTR_TO_OFFSET(d)) - 1;
5993                                                 UPDATE_SYNTAX_CACHE(xpos);
5994 #endif
5995                                                 syn1 = SYNTAX_FROM_CACHE
5996                                                     (XCHAR_TABLE
5997                                                      (regex_emacs_buffer->
5998                                                       mirror_syntax_table),
5999                                                      emch1);
6000                                         }
6001                                         if (!at_end) {
6002                                                 d_after =
6003                                                     POS_AFTER_GAP_UNSAFE(d);
6004                                                 emch2 = charptr_emchar(d_after);
6005 #ifdef emacs
6006                                                 xpos =
6007                                                     SYNTAX_CACHE_BYTE_TO_CHAR
6008                                                     (PTR_TO_OFFSET(d));
6009                                                 UPDATE_SYNTAX_CACHE_FORWARD(xpos
6010                                                                             +
6011                                                                             1);
6012 #endif
6013                                                 syn2 = SYNTAX_FROM_CACHE
6014                                                     (XCHAR_TABLE
6015                                                      (regex_emacs_buffer->
6016                                                       mirror_syntax_table),
6017                                                      emch2);
6018                                         }
6019
6020                                         if (at_beg)
6021                                                 result = (syn2 == Sword);
6022                                         else if (at_end)
6023                                                 result = (syn1 == Sword);
6024                                         else
6025                                                 result =
6026                                                     ((syn1 == Sword) !=
6027                                                      (syn2 == Sword));
6028                                 }
6029
6030                                 if (result == should_succeed)
6031                                         break;
6032                                 goto fail;
6033                         }
6034
6035                 case notwordbound:
6036                         DEBUG_PRINT1("EXECUTING notwordbound.\n");
6037                         should_succeed = 0;
6038                         goto matchwordbound;
6039
6040                 case wordbeg:
6041                         DEBUG_PRINT1("EXECUTING wordbeg.\n");
6042                         if (AT_STRINGS_END(d))
6043                                 goto fail;
6044                         {
6045                                 /* XEmacs: this originally read:
6046
6047                                    if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
6048                                    break;
6049
6050                                  */
6051                                 re_char *dtmp = POS_AFTER_GAP_UNSAFE(d);
6052                                 Emchar emch = charptr_emchar(dtmp);
6053 #ifdef emacs
6054                                 int charpos =
6055                                     SYNTAX_CACHE_BYTE_TO_CHAR(PTR_TO_OFFSET(d));
6056                                 UPDATE_SYNTAX_CACHE(charpos);
6057 #endif
6058                                 if (SYNTAX_FROM_CACHE
6059                                     (XCHAR_TABLE
6060                                      (regex_emacs_buffer->mirror_syntax_table),
6061                                      emch) != Sword)
6062                                         goto fail;
6063                                 if (AT_STRINGS_BEG(d))
6064                                         break;
6065                                 dtmp = POS_BEFORE_GAP_UNSAFE(d);
6066                                 DEC_CHARPTR(dtmp);
6067                                 emch = charptr_emchar(dtmp);
6068 #ifdef emacs
6069                                 UPDATE_SYNTAX_CACHE_BACKWARD(charpos - 1);
6070 #endif
6071                                 if (SYNTAX_FROM_CACHE
6072                                     (XCHAR_TABLE
6073                                      (regex_emacs_buffer->mirror_syntax_table),
6074                                      emch) != Sword)
6075                                         break;
6076                                 goto fail;
6077                         }
6078
6079                 case wordend:
6080                         DEBUG_PRINT1("EXECUTING wordend.\n");
6081                         if (AT_STRINGS_BEG(d))
6082                                 goto fail;
6083                         {
6084                                 /* XEmacs: this originally read:
6085
6086                                    if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
6087                                    && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
6088                                    break;
6089
6090                                    The or condition is incorrect (reversed).
6091                                  */
6092                                 re_char *dtmp;
6093                                 Emchar emch;
6094 #ifdef emacs
6095                                 int charpos =
6096                                     SYNTAX_CACHE_BYTE_TO_CHAR(PTR_TO_OFFSET(d))
6097                                     - 1;
6098                                 UPDATE_SYNTAX_CACHE(charpos);
6099 #endif
6100                                 dtmp = POS_BEFORE_GAP_UNSAFE(d);
6101                                 DEC_CHARPTR(dtmp);
6102                                 emch = charptr_emchar(dtmp);
6103                                 if (SYNTAX_FROM_CACHE
6104                                     (XCHAR_TABLE
6105                                      (regex_emacs_buffer->mirror_syntax_table),
6106                                      emch) != Sword)
6107                                         goto fail;
6108                                 if (AT_STRINGS_END(d))
6109                                         break;
6110                                 dtmp = POS_AFTER_GAP_UNSAFE(d);
6111                                 emch = charptr_emchar(dtmp);
6112 #ifdef emacs
6113                                 UPDATE_SYNTAX_CACHE_FORWARD(charpos + 1);
6114 #endif
6115                                 if (SYNTAX_FROM_CACHE
6116                                     (XCHAR_TABLE
6117                                      (regex_emacs_buffer->mirror_syntax_table),
6118                                      emch) != Sword)
6119                                         break;
6120                                 goto fail;
6121                         }
6122
6123 #ifdef emacs
6124                 case before_dot:
6125                         DEBUG_PRINT1("EXECUTING before_dot.\n");
6126                         if (!
6127                             (NILP(regex_match_object)
6128                              || BUFFERP(regex_match_object))
6129                             || (BUF_PTR_BYTE_POS(regex_emacs_buffer, d)
6130                                 >= BUF_PT(regex_emacs_buffer)))
6131                                 goto fail;
6132                         break;
6133
6134                 case at_dot:
6135                         DEBUG_PRINT1("EXECUTING at_dot.\n");
6136                         if (!
6137                             (NILP(regex_match_object)
6138                              || BUFFERP(regex_match_object))
6139                             || (BUF_PTR_BYTE_POS(regex_emacs_buffer, d)
6140                                 != BUF_PT(regex_emacs_buffer)))
6141                                 goto fail;
6142                         break;
6143
6144                 case after_dot:
6145                         DEBUG_PRINT1("EXECUTING after_dot.\n");
6146                         if (!
6147                             (NILP(regex_match_object)
6148                              || BUFFERP(regex_match_object))
6149                             || (BUF_PTR_BYTE_POS(regex_emacs_buffer, d)
6150                                 <= BUF_PT(regex_emacs_buffer)))
6151                                 goto fail;
6152                         break;
6153 #if 0                           /* not emacs19 */
6154                 case at_dot:
6155                         DEBUG_PRINT1("EXECUTING at_dot.\n");
6156                         if (BUF_PTR_BYTE_POS
6157                             (regex_emacs_buffer,
6158                              (unsigned char *)d) + 1 !=
6159                             BUF_PT(regex_emacs_buffer))
6160                                 goto fail;
6161                         break;
6162 #endif                          /* not emacs19 */
6163
6164                 case syntaxspec:
6165                         DEBUG_PRINT2("EXECUTING syntaxspec %d.\n", mcnt);
6166                         mcnt = *p++;
6167                         goto matchsyntax;
6168
6169                 case wordchar:
6170                         DEBUG_PRINT1("EXECUTING Emacs wordchar.\n");
6171                         mcnt = (int)Sword;
6172                       matchsyntax:
6173                         should_succeed = 1;
6174                       matchornotsyntax:
6175                         {
6176                                 int matches;
6177                                 Emchar emch;
6178
6179                                 REGEX_PREFETCH();
6180 #ifdef emacs
6181                                 {
6182                                         int charpos =
6183                                             SYNTAX_CACHE_BYTE_TO_CHAR
6184                                             (PTR_TO_OFFSET(d));
6185                                         UPDATE_SYNTAX_CACHE(charpos);
6186                                 }
6187 #endif
6188
6189                                 emch = charptr_emchar((const Bufbyte *)d);
6190                                 matches =
6191                                     (SYNTAX_FROM_CACHE
6192                                      (XCHAR_TABLE
6193                                       (regex_emacs_buffer->mirror_syntax_table),
6194                                       emch) == (enum syntaxcode)mcnt);
6195                                 INC_CHARPTR(d);
6196                                 if (matches != should_succeed)
6197                                         goto fail;
6198                                 SET_REGS_MATCHED();
6199                         }
6200                         break;
6201
6202                 case notsyntaxspec:
6203                         DEBUG_PRINT2("EXECUTING notsyntaxspec %d.\n", mcnt);
6204                         mcnt = *p++;
6205                         goto matchnotsyntax;
6206
6207                 case notwordchar:
6208                         DEBUG_PRINT1("EXECUTING Emacs notwordchar.\n");
6209                         mcnt = (int)Sword;
6210                       matchnotsyntax:
6211                         should_succeed = 0;
6212                         goto matchornotsyntax;
6213
6214 #ifdef MULE
6215 /* 97/2/17 jhod Mule category code patch */
6216                 case categoryspec:
6217                         should_succeed = 1;
6218                       matchornotcategory:
6219                         {
6220                                 Emchar emch;
6221
6222                                 mcnt = *p++;
6223                                 REGEX_PREFETCH();
6224                                 emch = charptr_emchar((const Bufbyte *)d);
6225                                 INC_CHARPTR(d);
6226                                 if (check_category_char
6227                                     (emch, regex_emacs_buffer->category_table,
6228                                      mcnt, should_succeed))
6229                                         goto fail;
6230                                 SET_REGS_MATCHED();
6231                         }
6232                         break;
6233
6234                 case notcategoryspec:
6235                         should_succeed = 0;
6236                         goto matchornotcategory;
6237 /* end of category patch */
6238 #endif                          /* MULE */
6239 #else                           /* not emacs */
6240                 case wordchar:
6241                         DEBUG_PRINT1("EXECUTING non-Emacs wordchar.\n");
6242                         REGEX_PREFETCH();
6243                         if (!WORDCHAR_P_UNSAFE((int)(*d)))
6244                                 goto fail;
6245                         SET_REGS_MATCHED();
6246                         d++;
6247                         break;
6248
6249                 case notwordchar:
6250                         DEBUG_PRINT1("EXECUTING non-Emacs notwordchar.\n");
6251                         REGEX_PREFETCH();
6252                         if (!WORDCHAR_P_UNSAFE((int)(*d)))
6253                                 goto fail;
6254                         SET_REGS_MATCHED();
6255                         d++;
6256                         break;
6257 #endif                          /* emacs */
6258
6259                 default:
6260                         abort();
6261                 }
6262                 /* Successfully executed one pattern command; keep going.  */
6263                 continue;
6264
6265                 /* We goto here if a matching operation fails. */
6266         fail:
6267                 if (!FAIL_STACK_EMPTY()) {
6268                         /* A restart point is known.  Restore to that state.  */
6269                         DEBUG_PRINT1("\nFAIL:\n");
6270                         POP_FAILURE_POINT(d, p, lowest_active_reg,
6271                                           highest_active_reg, regstart, regend,
6272                                           reg_info);
6273
6274                         /* If this failure point is a dummy, try the next one.  */
6275                         if (!p)
6276                                 goto fail;
6277
6278                         /* If we failed to the end of the pattern, don't examine
6279                            *p.  */
6280                         assert(p <= pend);
6281                         if (p < pend) {
6282                                 re_bool is_a_jump_n = false;
6283
6284                                 /* If failed to a backwards jump that's part of
6285                                    a repetition loop, need to pop this failure
6286                                    point and use the next one.  */
6287                                 switch ((unsigned int)(re_opcode_t)*p) {
6288                                 case jump_n:
6289                                         is_a_jump_n = true;
6290                                 case maybe_pop_jump:
6291                                 case pop_failure_jump:
6292                                 case jump:
6293                                         p1 = p + 1;
6294                                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6295                                         p1 += mcnt;
6296
6297                                         if ((is_a_jump_n
6298                                              && (re_opcode_t) * p1 == succeed_n)
6299                                             || (!is_a_jump_n
6300                                                 && (re_opcode_t) * p1 ==
6301                                                 on_failure_jump))
6302                                                 goto fail;
6303                                         break;
6304                                 default:
6305                                         /* do nothing */ ;
6306                                 }
6307                         }
6308
6309                         if (d >= string1 && d <= end1)
6310                                 dend = end_match_1;
6311                 } else
6312                         break;  /* Matching at this starting point really fails.  */
6313         }                       /* for (;;) */
6314
6315         if (best_regs_set)
6316                 goto restore_best_regs;
6317
6318         FREE_VARIABLES();
6319
6320         return -1;              /* Failure to match.  */
6321 }                               /* re_match_2 */
6322
6323 \f
6324 /* Subroutine definitions for re_match_2.  */
6325
6326 /* We are passed P pointing to a register number after a start_memory.
6327
6328    Return true if the pattern up to the corresponding stop_memory can
6329    match the empty string, and false otherwise.
6330
6331    If we find the matching stop_memory, sets P to point to one past its number.
6332    Otherwise, sets P to an undefined byte less than or equal to END.
6333
6334    We don't handle duplicates properly (yet).  */
6335
6336 static re_bool
6337 group_match_null_string_p(unsigned char **p, unsigned char *end,
6338                           register_info_type * register_info)
6339 {
6340         int mcnt;
6341         /* Point to after the args to the start_memory.  */
6342         unsigned char *p1 = *p + 2;
6343
6344         while (p1 < end) {
6345                 /* Skip over opcodes that can match nothing, and return true or
6346                    false, as appropriate, when we get to one that can't, or to the
6347                    matching stop_memory.  */
6348
6349                 switch ((unsigned int)(re_opcode_t)*p1) {
6350                         /* Could be either a loop or a series of alternatives.  */
6351                 case on_failure_jump:
6352                         p1++;
6353                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6354
6355                         /* If the next operation is not a jump backwards in the
6356                            pattern.  */
6357
6358                         if (mcnt >= 0) {
6359                                 /* Go through the on_failure_jumps of the alternatives,
6360                                    seeing if any of the alternatives cannot match nothing.
6361                                    The last alternative starts with only a jump,
6362                                    whereas the rest start with on_failure_jump and end
6363                                    with a jump, e.g., here is the pattern for `a|b|c':
6364
6365                                    /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
6366                                    /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
6367                                    /exactn/1/c
6368
6369                                    So, we have to first go through the first (n-1)
6370                                    alternatives and then deal with the last one separately.  */
6371
6372                                 /* Deal with the first (n-1) alternatives, which start
6373                                    with an on_failure_jump (see above) that jumps to right
6374                                    past a jump_past_alt.  */
6375
6376                                 while ((re_opcode_t) p1[mcnt - 3] ==
6377                                        jump_past_alt) {
6378                                         /* `mcnt' holds how many bytes long the alternative
6379                                            is, including the ending `jump_past_alt' and
6380                                            its number.  */
6381
6382                                         if (!alt_match_null_string_p
6383                                             (p1, p1 + mcnt - 3, register_info))
6384                                                 return false;
6385
6386                                         /* Move to right after this alternative, including the
6387                                            jump_past_alt.  */
6388                                         p1 += mcnt;
6389
6390                                         /* Break if it's the beginning of an n-th alternative
6391                                            that doesn't begin with an on_failure_jump.  */
6392                                         if ((re_opcode_t) * p1 !=
6393                                             on_failure_jump)
6394                                                 break;
6395
6396                                         /* Still have to check that it's not an n-th
6397                                            alternative that starts with an on_failure_jump.  */
6398                                         p1++;
6399                                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6400                                         if ((re_opcode_t) p1[mcnt - 3] !=
6401                                             jump_past_alt) {
6402                                                 /* Get to the beginning of the n-th alternative.  */
6403                                                 p1 -= 3;
6404                                                 break;
6405                                         }
6406                                 }
6407
6408                                 /* Deal with the last alternative: go back and get number
6409                                    of the `jump_past_alt' just before it.  `mcnt' contains
6410                                    the length of the alternative.  */
6411                                 EXTRACT_NUMBER(mcnt, p1 - 2);
6412
6413                                 if (!alt_match_null_string_p
6414                                     (p1, p1 + mcnt, register_info))
6415                                         return false;
6416
6417                                 p1 += mcnt;     /* Get past the n-th alternative.  */
6418                         }       /* if mcnt > 0 */
6419                         break;
6420
6421                 case stop_memory:
6422                         assert(p1[1] == **p);
6423                         *p = p1 + 2;
6424                         return true;
6425
6426                 default:
6427                         if (!common_op_match_null_string_p(&p1, end, register_info))
6428                                 return false;
6429                 }
6430         }                       /* while p1 < end */
6431
6432         return false;
6433 }                               /* group_match_null_string_p */
6434
6435 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
6436    It expects P to be the first byte of a single alternative and END one
6437    byte past the last. The alternative can contain groups.  */
6438
6439 static re_bool
6440 alt_match_null_string_p(unsigned char *p, unsigned char *end,
6441                         register_info_type * register_info)
6442 {
6443         int mcnt;
6444         unsigned char *p1 = p;
6445
6446         while (p1 < end) {
6447                 /* Skip over opcodes that can match nothing, and break when we get
6448                    to one that can't.  */
6449
6450                 switch ((unsigned int)(re_opcode_t)*p1) {
6451                         /* It's a loop.  */
6452                 case on_failure_jump:
6453                         p1++;
6454                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6455                         p1 += mcnt;
6456                         break;
6457
6458                 default:
6459                         if (!common_op_match_null_string_p(&p1, end, register_info))
6460                                 return false;
6461                 }
6462         }                       /* while p1 < end */
6463
6464         return true;
6465 }                               /* alt_match_null_string_p */
6466
6467 /* Deals with the ops common to group_match_null_string_p and
6468    alt_match_null_string_p.
6469
6470    Sets P to one after the op and its arguments, if any.  */
6471
6472 static re_bool
6473 common_op_match_null_string_p(unsigned char **p, unsigned char *end,
6474                               register_info_type * register_info)
6475 {
6476         int mcnt;
6477         re_bool ret;
6478         int reg_no;
6479         unsigned char *p1 = *p;
6480
6481         switch ((unsigned int)(re_opcode_t)*p1++) {
6482         case no_op:
6483         case begline:
6484         case endline:
6485         case begbuf:
6486         case endbuf:
6487         case wordbeg:
6488         case wordend:
6489         case wordbound:
6490         case notwordbound:
6491 #ifdef emacs
6492         case before_dot:
6493         case at_dot:
6494         case after_dot:
6495 #endif
6496                 break;
6497
6498         case start_memory:
6499                 reg_no = *p1;
6500                 assert(reg_no > 0 && reg_no <= MAX_REGNUM);
6501                 ret = group_match_null_string_p(&p1, end, register_info);
6502
6503                 /* Have to set this here in case we're checking a group which
6504                    contains a group and a back reference to it.  */
6505
6506                 if (REG_MATCH_NULL_STRING_P(register_info[reg_no]) ==
6507                     MATCH_NULL_UNSET_VALUE)
6508                         REG_MATCH_NULL_STRING_P(register_info[reg_no]) = ret;
6509
6510                 if (!ret)
6511                         return false;
6512                 break;
6513
6514                 /* If this is an optimized succeed_n for zero times, make the jump.  */
6515         case jump:
6516                 EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6517                 if (mcnt >= 0)
6518                         p1 += mcnt;
6519                 else
6520                         return false;
6521                 break;
6522
6523         case succeed_n:
6524                 /* Get to the number of times to succeed.  */
6525                 p1 += 2;
6526                 EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6527
6528                 if (mcnt == 0) {
6529                         p1 -= 4;
6530                         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
6531                         p1 += mcnt;
6532                 } else
6533                         return false;
6534                 break;
6535
6536         case duplicate:
6537                 if (!REG_MATCH_NULL_STRING_P(register_info[*p1]))
6538                         return false;
6539                 break;
6540
6541         case set_number_at:
6542                 p1 += 4;
6543                 break;
6544
6545         default:
6546                 /* All other opcodes mean we cannot match the empty string.  */
6547                 return false;
6548         }
6549
6550         *p = p1;
6551         return true;
6552 }                               /* common_op_match_null_string_p */
6553
6554 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
6555    bytes; nonzero otherwise.  */
6556
6557 static int
6558 bcmp_translate(re_char *s1, re_char *s2,
6559                REGISTER int len, RE_TRANSLATE_TYPE translate)
6560 {
6561         REGISTER const unsigned char *p1 = s1, *p2 = s2;
6562 #ifdef MULE
6563         const unsigned char *p1_end = s1 + len;
6564         const unsigned char *p2_end = s2 + len;
6565
6566         while (p1 != p1_end && p2 != p2_end) {
6567                 Emchar p1_ch, p2_ch;
6568
6569                 p1_ch = charptr_emchar(p1);
6570                 p2_ch = charptr_emchar(p2);
6571
6572                 if (RE_TRANSLATE(p1_ch)
6573                     != RE_TRANSLATE(p2_ch))
6574                         return 1;
6575                 INC_CHARPTR(p1);
6576                 INC_CHARPTR(p2);
6577         }
6578 #else                           /* not MULE */
6579         while (len) {
6580                 if (RE_TRANSLATE(*p1++) != RE_TRANSLATE(*p2++))
6581                         return 1;
6582                 len--;
6583         }
6584 #endif                          /* MULE */
6585         return 0;
6586 }
6587 \f
6588 /* Entry points for GNU code.  */
6589
6590 /* re_compile_pattern is the GNU regular expression compiler: it
6591    compiles PATTERN (of length SIZE) and puts the result in BUFP.
6592    Returns 0 if the pattern was valid, otherwise an error string.
6593
6594    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
6595    are set in BUFP on entry.
6596
6597    We call regex_compile to do the actual compilation.  */
6598
6599 const char *re_compile_pattern(const char *pattern, int length,
6600                                struct re_pattern_buffer *bufp)
6601 {
6602         reg_errcode_t ret;
6603
6604         /* GNU code is written to assume at least RE_NREGS registers will be set
6605            (and at least one extra will be -1).  */
6606         bufp->regs_allocated = REGS_UNALLOCATED;
6607
6608         /* And GNU code determines whether or not to get register information
6609            by passing null for the REGS argument to re_match, etc., not by
6610            setting no_sub.  */
6611         bufp->no_sub = 0;
6612
6613         /* Match anchors at newline.  */
6614         bufp->newline_anchor = 1;
6615
6616         ret = regex_compile((const unsigned char*)pattern,
6617                             length, re_syntax_options, bufp);
6618
6619         if (!ret)
6620                 return NULL;
6621         return gettext(re_error_msgid[(int)ret]);
6622 }
6623 \f
6624 /* Entry points compatible with 4.2 BSD regex library.  We don't define
6625    them unless specifically requested.  */
6626
6627 #ifdef _REGEX_RE_COMP
6628
6629 /* BSD has one and only one pattern buffer.  */
6630 static struct re_pattern_buffer re_comp_buf;
6631
6632 char *re_comp(const char *s)
6633 {
6634         reg_errcode_t ret;
6635
6636         if (!s) {
6637                 if (!re_comp_buf.buffer)
6638                         return gettext("No previous regular expression");
6639                 return 0;
6640         }
6641
6642         if (!re_comp_buf.buffer) {
6643                 re_comp_buf.buffer = (unsigned char *)xmalloc_atomic(200);
6644                 if (re_comp_buf.buffer == NULL)
6645                         return gettext(re_error_msgid[(int)REG_ESPACE]);
6646                 re_comp_buf.allocated = 200;
6647
6648                 re_comp_buf.fastmap = (char *)xmalloc_atomic(1 << BYTEWIDTH);
6649                 if (re_comp_buf.fastmap == NULL)
6650                         return gettext(re_error_msgid[(int)REG_ESPACE]);
6651         }
6652
6653         /* Since `re_exec' always passes NULL for the `regs' argument, we
6654            don't need to initialize the pattern buffer fields which affect it.  */
6655
6656         /* Match anchors at newlines.  */
6657         re_comp_buf.newline_anchor = 1;
6658
6659         ret =
6660             regex_compile((unsigned char *)s, strlen(s), re_syntax_options,
6661                           &re_comp_buf);
6662
6663         if (!ret)
6664                 return NULL;
6665
6666         /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
6667         return (char *)gettext(re_error_msgid[(int)ret]);
6668 }
6669
6670 int re_exec(const char *s)
6671 {
6672         const int len = strlen(s);
6673         return
6674             0 <= re_search(&re_comp_buf, s, len, 0, len,
6675                            (struct re_registers *)0);
6676 }
6677 #endif                          /* _REGEX_RE_COMP */
6678 \f
6679 /* POSIX.2 functions.  Don't define these for Emacs.  */
6680
6681 #ifndef emacs
6682
6683 /* regcomp takes a regular expression as a string and compiles it.
6684
6685    PREG is a regex_t *.  We do not expect any fields to be initialized,
6686    since POSIX says we shouldn't.  Thus, we set
6687
6688      `buffer' to the compiled pattern;
6689      `used' to the length of the compiled pattern;
6690      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
6691        REG_EXTENDED bit in CFLAGS is set; otherwise, to
6692        RE_SYNTAX_POSIX_BASIC;
6693      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
6694      `fastmap' and `fastmap_accurate' to zero;
6695      `re_nsub' to the number of subexpressions in PATTERN.
6696      (non-shy of course.  POSIX probably doesn't know about
6697      shy ones, and in any case they should be invisible.)
6698
6699    PATTERN is the address of the pattern string.
6700
6701    CFLAGS is a series of bits which affect compilation.
6702
6703      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
6704      use POSIX basic syntax.
6705
6706      If REG_NEWLINE is set, then . and [^...] don't match newline.
6707      Also, regexec will try a match beginning after every newline.
6708
6709      If REG_ICASE is set, then we considers upper- and lowercase
6710      versions of letters to be equivalent when matching.
6711
6712      If REG_NOSUB is set, then when PREG is passed to regexec, that
6713      routine will report only success or failure, and nothing about the
6714      registers.
6715
6716    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
6717    the return codes and their meanings.)  */
6718
6719 int regcomp(regex_t * preg, const char *pattern, int cflags)
6720 {
6721         reg_errcode_t ret;
6722         unsigned syntax
6723             = (cflags & REG_EXTENDED) ?
6724             RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
6725
6726         /* regex_compile will allocate the space for the compiled pattern.  */
6727         preg->buffer = 0;
6728         preg->allocated = 0;
6729         preg->used = 0;
6730
6731         /* Don't bother to use a fastmap when searching.  This simplifies the
6732            REG_NEWLINE case: if we used a fastmap, we'd have to put all the
6733            characters after newlines into the fastmap.  This way, we just try
6734            every character.  */
6735         preg->fastmap = 0;
6736
6737         if (cflags & REG_ICASE) {
6738                 int i;
6739
6740                 preg->translate = (char *)xmalloc_atomic(CHAR_SET_SIZE);
6741                 if (preg->translate == NULL)
6742                         return (int)REG_ESPACE;
6743
6744                 /* Map uppercase characters to corresponding lowercase ones.  */
6745                 for (i = 0; i < CHAR_SET_SIZE; i++)
6746                         preg->translate[i] = ISUPPER(i) ? tolower(i) : i;
6747         } else
6748                 preg->translate = NULL;
6749
6750         /* If REG_NEWLINE is set, newlines are treated differently.  */
6751         if (cflags & REG_NEWLINE) {
6752                 /* REG_NEWLINE implies neither . nor [^...] match newline.  */
6753                 syntax &= ~RE_DOT_NEWLINE;
6754                 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
6755                 /* It also changes the matching behavior.  */
6756                 preg->newline_anchor = 1;
6757         } else {
6758                 preg->newline_anchor = 0;
6759         }
6760         preg->no_sub = !!(cflags & REG_NOSUB);
6761
6762         /* POSIX says a null character in the pattern terminates it, so we
6763            can use strlen here in compiling the pattern.  */
6764         ret = regex_compile((const unsigned char*)pattern,
6765                             strlen(pattern), syntax, preg);
6766
6767         /* POSIX doesn't distinguish between an unmatched open-group and an
6768            unmatched close-group: both are REG_EPAREN.  */
6769         if (ret == REG_ERPAREN)
6770                 ret = REG_EPAREN;
6771
6772         return (int)ret;
6773 }
6774
6775 /* regexec searches for a given pattern, specified by PREG, in the
6776    string STRING.
6777
6778    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
6779    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
6780    least NMATCH elements, and we set them to the offsets of the
6781    corresponding matched substrings.
6782
6783    EFLAGS specifies `execution flags' which affect matching: if
6784    REG_NOTBOL is set, then ^ does not match at the beginning of the
6785    string; if REG_NOTEOL is set, then $ does not match at the end.
6786
6787    We return 0 if we find a match and REG_NOMATCH if not.  */
6788
6789 int
6790 regexec(const regex_t * preg, const char *string, Element_count nmatch,
6791         regmatch_t pmatch[], int eflags)
6792 {
6793         int ret;
6794         struct re_registers regs;
6795         regex_t private_preg;
6796         int len = strlen(string);
6797         re_bool want_reg_info = !preg->no_sub && nmatch > 0;
6798
6799         private_preg = *preg;
6800
6801         private_preg.not_bol = !!(eflags & REG_NOTBOL);
6802         private_preg.not_eol = !!(eflags & REG_NOTEOL);
6803
6804         /* The user has told us exactly how many registers to return
6805            information about, via `nmatch'.  We have to pass that on to the
6806            matching routines.  */
6807         private_preg.regs_allocated = REGS_FIXED;
6808
6809         if (want_reg_info) {
6810                 regs.num_regs = nmatch;
6811                 regs.start = TALLOC(nmatch, regoff_t);
6812                 regs.end = TALLOC(nmatch, regoff_t);
6813                 if (regs.start == NULL || regs.end == NULL)
6814                         return (int)REG_NOMATCH;
6815         }
6816
6817         /* Perform the searching operation.  */
6818         ret = re_search(&private_preg, string, len,
6819                         /* start: */ 0, /* range: */ len,
6820                         want_reg_info ? &regs : (struct re_registers *)0);
6821
6822         /* Copy the register information to the POSIX structure.  */
6823         if (want_reg_info) {
6824                 if (ret >= 0) {
6825                         Element_count r;
6826
6827                         for (r = 0; r < nmatch; r++) {
6828                                 pmatch[r].rm_so = regs.start[r];
6829                                 pmatch[r].rm_eo = regs.end[r];
6830                         }
6831                 }
6832
6833                 /* If we needed the temporary register info, free the space now.  */
6834                 xfree(regs.start);
6835                 xfree(regs.end);
6836         }
6837
6838         /* We want zero return to mean success, unlike `re_search'.  */
6839         return ret >= 0 ? (int)REG_NOERROR : (int)REG_NOMATCH;
6840 }
6841
6842 /* Returns a message corresponding to an error code, ERRCODE, returned
6843    from either regcomp or regexec.   We don't use PREG here.  */
6844
6845 Memory_count
6846 regerror(int errcode, const regex_t * preg, char *errbuf,
6847          Memory_count errbuf_size)
6848 {
6849         const char *msg;
6850         Memory_count msg_size;
6851
6852         if (errcode < 0 || (size_t) errcode >= (sizeof(re_error_msgid)
6853                                                 / sizeof(re_error_msgid[0])))
6854                 /* Only error codes returned by the rest of the code should be passed
6855                    to this routine.  If we are given anything else, or if other regex
6856                    code generates an invalid error code, then the program has a bug.
6857                    Dump core so we can fix it.  */
6858                 abort();
6859
6860         msg = gettext(re_error_msgid[errcode]);
6861
6862         msg_size = strlen(msg) + 1;     /* Includes the null.  */
6863
6864         if (errbuf_size != 0) {
6865                 strncpy(errbuf, msg, errbuf_size - 1);
6866                 errbuf[errbuf_size-1]='\0';
6867         }
6868
6869         return msg_size;
6870 }
6871
6872 /* Free dynamically allocated space used by PREG.  */
6873
6874 void regfree(regex_t * preg)
6875 {
6876         if (preg->buffer != NULL) {
6877                 xfree(preg->buffer);
6878         }
6879         preg->buffer = NULL;
6880
6881         preg->allocated = 0;
6882         preg->used = 0;
6883
6884         if (preg->fastmap != NULL) {
6885                 xfree(preg->fastmap);
6886         }
6887         preg->fastmap = NULL;
6888         preg->fastmap_accurate = 0;
6889
6890         if (preg->translate != NULL) {
6891                 xfree(preg->translate);
6892         }
6893         preg->translate = NULL;
6894 }
6895
6896 #endif                          /* not emacs  */
6897 \f
6898 /*
6899 Local variables:
6900 make-backup-files: t
6901 version-control: t
6902 trim-versions-without-asking: nil
6903 End:
6904 */