Cleanup invalid mirrors
[sxemacs] / src / syntax.c
1 /* SXEmacs routines to deal with syntax tables; also word and list parsing.
2    Copyright (C) 1985-1994 Free Software Foundation, Inc.
3    Copyright (C) 1995 Sun Microsystems, Inc.
4
5 This file is part of SXEmacs
6
7 SXEmacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 SXEmacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program.  If not, see <http://www.gnu.org/licenses/>. */
19
20
21 /* Synched up with: FSF 19.28. */
22
23 /* This file has been Mule-ized. */
24
25 #include <config.h>
26 #include "lisp.h"
27
28 #include "buffer.h"
29 #include "syntax.h"
30 #include "extents.h"
31
32 /* Here is a comment from Ken'ichi HANDA <handa@etl.go.jp>
33    explaining the purpose of the Sextword syntax category:
34
35 Japanese words are not separated by spaces, which makes finding word
36 boundaries very difficult.  Theoretically it's impossible without
37 using natural language processing techniques.  But, by defining
38 pseudo-words as below (much simplified for letting you understand it
39 easily) for Japanese, we can have a convenient forward-word function
40 for Japanese.
41
42         A Japanese word is a sequence of characters that consists of
43         zero or more Kanji characters followed by zero or more
44         Hiragana characters.
45
46 Then, the problem is that now we can't say that a sequence of
47 word-constituents makes up a WORD.  For instance, both Hiragana "A"
48 and Kanji "KAN" are word-constituents but the sequence of these two
49 letters can't be a single word.
50
51 So, we introduced Sextword for Japanese letters.  A character of
52 Sextword is a word-constituent but a word boundary may exist between
53 two such characters.  */
54
55 /* Mule 2.4 doesn't seem to have Sextword - I'm removing it -- mrb */
56 /* Recovered by tomo */
57
58 #define ST_COMMENT_STYLE 0x101
59 #define ST_STRING_STYLE  0x102
60
61 Lisp_Object Qsyntax_table;
62 int lookup_syntax_properties;
63
64 Lisp_Object Qsyntax_table_p;
65
66 int words_include_escapes;
67
68 int parse_sexp_ignore_comments;
69
70 /* The following two variables are provided to tell additional information
71    to the regex routines.  We do it this way rather than change the
72    arguments to re_search_2() in an attempt to maintain some call
73    compatibility with other versions of the regex code. */
74
75 /* Tell the regex routines not to QUIT.  Normally there is a QUIT
76    each iteration in re_search_2(). */
77 int no_quit_in_re_search;
78
79 /* Tell the regex routines which buffer to access for SYNTAX() lookups
80    and the like. */
81 struct buffer *regex_emacs_buffer;
82
83 /* In Emacs, this is the string or buffer in which we
84    are matching.  It is used for looking up syntax properties.  */
85 Lisp_Object regex_match_object;
86
87 Lisp_Object Vstandard_syntax_table;
88
89 Lisp_Object Vsyntax_designator_chars_string;
90
91 Lisp_Object Qscan_error;
92
93 /* This is the internal form of the parse state used in parse-partial-sexp.  */
94
95 struct lisp_parse_state {
96         int depth;              /* Depth at end of parsing */
97         Emchar instring;        /* -1 if not within string, else desired terminator */
98         int incomment;          /* Nonzero if within a comment at end of parsing */
99         int comstyle;           /* comment style a=0, or b=1, or ST_COMMENT_STYLE */
100         int quoted;             /* Nonzero if just after an escape char at end of
101                                    parsing */
102         Bufpos thislevelstart;  /* Char number of most recent start-of-expression
103                                    at current level */
104         Bufpos prevlevelstart;  /* Char number of start of containing expression */
105         Bufpos location;        /* Char number at which parsing stopped */
106         int mindepth;           /* Minimum depth seen while scanning  */
107         Bufpos comstr_start;    /* Position just after last comment/string starter
108                                    (if the 'syntax-table text property is not
109                                    supported, used only for comment starts) */
110         Lisp_Object levelstarts;        /* Char numbers of starts-of-expression
111                                            of levels (starting from outermost).  */
112 };
113 \f
114 /* These variables are a cache for finding the start of a defun.
115    find_start_pos    is the place for which the defun start was found.
116    find_start_value  is the defun start position found for it.
117    find_start_buffer is the buffer it was found in.
118    find_start_begv   is the BEGV value when it was found.
119    find_start_modiff is the value of MODIFF when it was found.  */
120
121 static Bufpos find_start_pos;
122 static Bufpos find_start_value;
123 static struct buffer *find_start_buffer;
124 static Bufpos find_start_begv;
125 static int find_start_modiff;
126
127 /* Find a defun-start that is the last one before POS (or nearly the last).
128    We record what we find, so that another call in the same area
129    can return the same value right away.  */
130
131 static Bufpos find_defun_start(struct buffer *buf, Bufpos pos)
132 {
133         Bufpos tem;
134
135         /* Use previous finding, if it's valid and applies to this inquiry.  */
136         if (buf == find_start_buffer
137             /* Reuse the defun-start even if POS is a little farther on.
138                POS might be in the next defun, but that's ok.
139                Our value may not be the best possible, but will still be usable.  */
140             && pos <= find_start_pos + 1000
141             && pos >= find_start_value
142             && BUF_BEGV(buf) == find_start_begv
143             && BUF_MODIFF(buf) == find_start_modiff)
144                 return find_start_value;
145
146         /* Back up to start of line.  */
147         tem = find_next_newline(buf, pos, -1);
148
149         SCS_STATISTICS_SET_FUNCTION(scs_find_defun_start);
150         SETUP_SYNTAX_CACHE(tem, 1);
151         while (tem > BUF_BEGV(buf)) {
152                 UPDATE_SYNTAX_CACHE_BACKWARD(tem);
153
154                 /* Open-paren at start of line means we found our defun-start.  */
155                 if (SYNTAX_FROM_CACHE(mirrortab, BUF_FETCH_CHAR(buf, tem)) ==
156                     Sopen)
157                         break;
158                 /* Move to beg of previous line.  */
159                 tem = find_next_newline(buf, tem, -2);
160         }
161
162         /* Record what we found, for the next try.  */
163         find_start_value = tem;
164         find_start_buffer = buf;
165         find_start_modiff = BUF_MODIFF(buf);
166         find_start_begv = BUF_BEGV(buf);
167         find_start_pos = pos;
168
169         return find_start_value;
170 }
171 \f
172 DEFUN("syntax-table-p", Fsyntax_table_p, 1, 1, 0,       /*
173 Return t if OBJECT is a syntax table.
174 Any vector of 256 elements will do.
175 */
176       (object))
177 {
178         return (CHAR_TABLEP(object)
179                 && XCHAR_TABLE_TYPE(object) == CHAR_TABLE_TYPE_SYNTAX)
180             ? Qt : Qnil;
181 }
182
183 static Lisp_Object check_syntax_table(Lisp_Object obj, Lisp_Object default_)
184 {
185         if (NILP(obj))
186                 obj = default_;
187         while (NILP(Fsyntax_table_p(obj)))
188                 obj = wrong_type_argument(Qsyntax_table_p, obj);
189         return obj;
190 }
191
192 DEFUN("syntax-table", Fsyntax_table, 0, 1, 0,   /*
193 Return the current syntax table.
194 This is the one specified by the current buffer, or by BUFFER if it
195 is non-nil.
196 */
197       (buffer))
198 {
199         return decode_buffer(buffer, 0)->syntax_table;
200 }
201
202 DEFUN("standard-syntax-table", Fstandard_syntax_table, 0, 0, 0, /*
203 Return the standard syntax table.
204 This is the one used for new buffers.
205 */
206       ())
207 {
208         return Vstandard_syntax_table;
209 }
210
211 DEFUN("copy-syntax-table", Fcopy_syntax_table, 0, 1, 0, /*
212 Return a new syntax table which is a copy of SYNTAX-TABLE.
213 SYNTAX-TABLE defaults to the standard syntax table.
214 */
215       (syntax_table))
216 {
217         if (NILP(Vstandard_syntax_table))
218                 return Fmake_char_table(Qsyntax);
219
220         syntax_table = check_syntax_table(syntax_table, Vstandard_syntax_table);
221         return Fcopy_char_table(syntax_table);
222 }
223
224 DEFUN("set-syntax-table", Fset_syntax_table, 1, 2, 0,   /*
225 Select SYNTAX-TABLE as the new syntax table for BUFFER.
226 BUFFER defaults to the current buffer if omitted.
227 */
228       (syntax_table, buffer))
229 {
230         struct buffer *buf = decode_buffer(buffer, 0);
231         syntax_table = check_syntax_table(syntax_table, Qnil);
232         buf->syntax_table = syntax_table;
233         buf->mirror_syntax_table = XCHAR_TABLE(syntax_table)->mirror_table;
234         /* Indicate that this buffer now has a specified syntax table.  */
235         buf->local_var_flags |= XINT(buffer_local_flags.syntax_table);
236         return syntax_table;
237 }
238 \f
239 /* The current syntax state */
240 struct syntax_cache syntax_cache;
241
242 /*
243    Update syntax_cache to an appropriate setting for position POS
244
245    The sign of COUNT gives the relative position of POS wrt the
246    previously valid interval.  (not currently used)
247
248    `syntax_cache.*_change' are the next and previous positions at
249    which syntax_code and c_s_t will need to be recalculated.
250
251    #### Currently this code uses 'get-char-property', which will
252    return the "last smallest" extent at a given position. In cases
253    where overlapping extents are defined, this code will simply use
254    whatever is returned by get-char-property.
255
256    It might be worth it at some point to merge provided syntax tables
257    outward to the current buffer.
258
259    sjt sez:
260    This implementation has to rather inefficient, since it looks at
261    next-extent-change, and a heavily font-locked buffer will be rife
262    with irrelevant extents.  We could do a sledgehammer check on this
263    by looking at the distribution of extent lengths.  Also count up
264    cache hits and misses.
265
266    If we assume that syntax-table is a _text_ property (which also
267    deals with the issue of overlapping syntax-table properties), then
268    the following strategy recommends itself
269      o give the syntax cache a `valid' flag, to be reset whenever a
270        syntax-table property is added, changed, or removed; this could
271        be done by setting syntax_cache's prev_change > next_change
272        (but not compatible with using extents/markers here); if it's a
273        Lisp variable, doing it in Lisp shouldn't be too inefficient
274      o lazily initialize the cache whenever the object being examined
275        differs from the object the cache currently refers to
276      o by using {previous,next-single-property-change} we should be
277        able to get much bigger cache intervals (in most cases, the
278        whole buffer)
279      o cache markers instead of positions so the mere insertion or
280        deletion of text doesn't invalidate the cache, only if it
281        involves a syntax-table property (we could also cache the
282        extents carrying the syntax-table text-property; that gives us
283        another check for invalid cache).
284
285    If I understand this correctly, we need to invalidate the cache in the
286    following cases:
287      o If the referenced object changes (it's a global cache)
288      o If there are insertions or deletions of text (the positions are
289        absolute; fix: use markers or an extent instead?)
290      o If the syntax-table property is altered == added and different or
291        removed and the same (fix: probably computable from range overlap,
292        but is it worth it?  would interact with ins/del); this includes
293        detachment of extents with the same value (but only the boundary
294        extents, as otherwise the range coalesces across the deletion point)
295        and attachment of extents with a different value
296    Note: the above looks a lot like what Ben has implemented in 21.5, but
297    he goes one better by making the cache buffer-local.
298
299    Note: cperl mode uses the text property API, not extents/overlays.
300 */
301
302 #ifdef SYNTAX_CACHE_STATISTICS
303 struct syntax_cache_statistics scs_statistics =
304     { 0, 0, 0, 0, -1, -1, 0.0, 0.0, scs_no_function };
305
306 char *syntax_cache_statistics_function_names[scs_number_of_functions] = {
307         "find_context",
308         "find_defun_start",
309         "scan_words",
310         "Fforward_comment",
311         "scan_lists",
312         "Fbackward_prefix_characters",
313         "scan_sexps_forward"
314 };
315 #endif                          /* SYNTAX_CACHE_STATISTICS */
316
317 void update_syntax_cache(int pos, int count)
318 {
319         Lisp_Object tmp_table;
320
321 #ifdef SYNTAX_CACHE_STATISTICS
322         if (scs_statistics.total_updates == 0) {
323                 int i;
324                 for (i = 0; i < scs_number_of_functions; ++i)
325                         scs_statistics.functions[i] = 0;
326         }
327         if (syntax_cache.prev_change > syntax_cache.next_change)
328                 scs_statistics.inits++;
329         else if (pos < syntax_cache.prev_change)
330                 scs_statistics.misses_lo++;
331         else if (pos >= syntax_cache.next_change)
332                 scs_statistics.misses_hi++;
333 #endif                          /* SYNTAX_CACHE_STATISTICS */
334
335         /* #### Since font-lock undoes any narrowing, maybe the BUF_ZV and
336            BUF_BEGV below should be BUF_Z and BUF_BEG respectively? */
337         if (BUFFERP(syntax_cache.object)) {
338                 int get_change_before = pos + 1;
339
340                 tmp_table = Fget_char_property(make_int(pos), Qsyntax_table,
341                                                syntax_cache.object, Qnil);
342 #if NEXT_SINGLE_PROPERTY_CHANGE
343                 /* #### shouldn't we be using BUF_BEGV here? */
344                 syntax_cache.next_change =
345                     XINT(Fnext_single_property_change
346                          (make_int(pos > 0 ? pos : 1), Qsyntax_table,
347                           syntax_cache.object,
348                           make_int(BUF_ZV(syntax_cache.buffer))));
349 #else
350                 syntax_cache.next_change =
351                     XINT(Fnext_extent_change(make_int(pos > 0 ? pos : 1),
352                                              syntax_cache.object));
353 #endif
354
355                 /* #### shouldn't we be using BUF_BEGV here? */
356                 if (get_change_before < 1)
357                         get_change_before = 1;
358                 else if (get_change_before > BUF_ZV(syntax_cache.buffer))
359                         get_change_before = BUF_ZV(syntax_cache.buffer);
360
361 #if PREVIOUS_SINGLE_PROPERTY_CHANGE
362                 /* #### shouldn't we be using BUF_BEGV here? */
363                 syntax_cache.prev_change =
364                     XINT(Fprevious_single_property_change
365                          (make_int(get_change_before), Qsyntax_table,
366                           syntax_cache.object, make_int(1)));
367 #else
368                 syntax_cache.prev_change =
369                     XINT(Fprevious_extent_change(make_int(get_change_before),
370                                                  syntax_cache.object));
371 #endif
372         } else if (STRINGP(syntax_cache.object)) {
373                 int get_change_before = pos + 1;
374
375                 tmp_table = Fget_char_property(make_int(pos), Qsyntax_table,
376                                                syntax_cache.object, Qnil);
377 #if NEXT_SINGLE_PROPERTY_CHANGE
378                 /* #### shouldn't we be using BUF_BEGV here? */
379                 syntax_cache.next_change =
380                     XINT(Fnext_single_property_change
381                          (make_int(pos >= 0 ? pos : 0), Qsyntax_table,
382                           syntax_cache.object,
383                           make_int(XSTRING_LENGTH(syntax_cache.object))));
384 #else
385                 syntax_cache.next_change =
386                     XINT(Fnext_extent_change(make_int(pos >= 0 ? pos : 0),
387                                              syntax_cache.object));
388 #endif
389
390                 if (get_change_before < 0)
391                         get_change_before = 0;
392                 else if (get_change_before >
393                          XSTRING_LENGTH(syntax_cache.object))
394                         get_change_before = XSTRING_LENGTH(syntax_cache.object);
395
396 #if PREVIOUS_SINGLE_PROPERTY_CHANGE
397                 syntax_cache.prev_change =
398                     XINT(Fprevious_single_property_change
399                          (make_int(get_change_before), Qsyntax_table,
400                           syntax_cache.object, make_int(0)));
401 #else
402                 syntax_cache.prev_change =
403                     XINT(Fprevious_extent_change(make_int(get_change_before),
404                                                  syntax_cache.object));
405 #endif
406         } else {
407                 /* silence compiler */
408                 tmp_table = Qnil;
409                 /* Always aborts.
410                  * #### Is there another sensible thing to do here? */
411                 assert(BUFFERP(syntax_cache.object) ||
412                        STRINGP(syntax_cache.object));
413         }
414
415         if (EQ(Fsyntax_table_p(tmp_table), Qt)) {
416                 syntax_cache.use_code = 0;
417                 syntax_cache.current_syntax_table =
418                         XCHAR_TABLE(tmp_table)->mirror_table;
419         } else if (CONSP(tmp_table) && INTP(XCAR(tmp_table))) {
420                 syntax_cache.use_code = 1;
421                 /* we CANNOT cast the following, XINT goes to long int
422                  * while enums are usually just unsigned ints
423                  * this'll trigger a warning on 64b machines */
424                 syntax_cache.syntax_code = XINT(XCAR(tmp_table));
425         } else {
426                 syntax_cache.use_code = 0;
427                 syntax_cache.current_syntax_table =
428                     syntax_cache.buffer->mirror_syntax_table;
429         }
430
431 #ifdef SYNTAX_CACHE_STATISTICS
432         {
433                 int length =
434                     syntax_cache.next_change - syntax_cache.prev_change;
435                 int misses =
436                     scs_statistics.misses_lo + scs_statistics.misses_hi +
437                     scs_statistics.inits;
438
439                 if (scs_statistics.min_length == -1
440                     || scs_statistics.min_length > length)
441                         scs_statistics.min_length = length;
442                 if (scs_statistics.max_length == -1
443                     || scs_statistics.max_length < length)
444                         scs_statistics.max_length = length;
445                 scs_statistics.mean_length_on_miss =
446                     ((misses - 1) * scs_statistics.mean_length_on_miss +
447                      length) / misses;
448         }
449
450         scs_statistics.mean_length
451             = scs_statistics.total_updates * scs_statistics.mean_length
452             + syntax_cache.next_change - syntax_cache.prev_change;
453         scs_statistics.total_updates++;
454         scs_statistics.mean_length /= scs_statistics.total_updates;
455
456         if (scs_statistics.this_function != scs_no_function) {
457                 scs_statistics.functions[scs_statistics.this_function]++;
458                 scs_statistics.this_function = scs_no_function;
459         }
460
461         if (!
462             (scs_statistics.total_updates %
463              SYNTAX_CACHE_STATISTICS_REPORT_INTERVAL)) {
464                 fprintf(stderr, "Syntax cache stats:\n  ");
465                 fprintf(stderr,
466                         "updates %d, inits %d, misses low %d, misses high %d,",
467                         scs_statistics.total_updates, scs_statistics.inits,
468                         scs_statistics.misses_lo, scs_statistics.misses_hi);
469                 fprintf(stderr, "\n ");
470
471 #define REPORT_FUNCTION(i)                              \
472   fprintf (stderr, " %s %d,",                           \
473            syntax_cache_statistics_function_names[i],   \
474            scs_statistics.functions[i]);
475
476                 REPORT_FUNCTION(scs_find_context);
477                 REPORT_FUNCTION(scs_find_defun_start);
478                 REPORT_FUNCTION(scs_scan_words);
479                 REPORT_FUNCTION(scs_Fforward_comment);
480                 fprintf(stderr, "\n ");
481                 REPORT_FUNCTION(scs_scan_lists);
482                 REPORT_FUNCTION(scs_Fbackward_prefix_characters);
483                 REPORT_FUNCTION(scs_scan_sexps_forward);
484 #undef REPORT_FUNCTION
485
486                 fprintf(stderr, "\n  min length %d, max length %d,",
487                         scs_statistics.min_length, scs_statistics.max_length);
488                 fprintf(stderr,
489                         "\n  mean length %.1f, mean length on miss %.1f\n",
490                         scs_statistics.mean_length,
491                         scs_statistics.mean_length_on_miss);
492         }
493 #endif                          /* SYNTAX_CACHE_STATISTICS */
494 }
495 \f
496 /* Convert a letter which signifies a syntax code
497    into the code it signifies.
498    This is used by modify-syntax-entry, and other things. */
499
500 const unsigned char syntax_spec_code[0400] =
501     { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
502         0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
503         0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
504         0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
505         (char)Swhitespace, 0377, (char)Sstring, 0377,
506         (char)Smath, 0377, 0377, (char)Squote,
507         (char)Sopen, (char)Sclose, 0377, 0377,
508         0377, (char)Swhitespace, (char)Spunct, (char)Scharquote,
509         0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
510         0377, 0377, 0377, 0377,
511         (char)Scomment, 0377, (char)Sendcomment, 0377,
512         (char)Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377,       /* @, A ... */
513         0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
514         0377, 0377, 0377, 0377, 0377, 0377, 0377, (char)Sword,
515         0377, 0377, 0377, 0377, (char)Sescape, 0377, 0377, (char)Ssymbol,
516         0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
517         0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
518         0377, 0377, 0377, 0377, 0377, 0377, 0377, (char)Sword,
519         0377, 0377, 0377, 0377, (char)Sstring_fence, 0377, 0377, 0377
520 };
521
522 unsigned char syntax_code_spec[] = " .w_()'\"$\\/<>@!|";
523
524 DEFUN("syntax-designator-chars", Fsyntax_designator_chars, 0, 0, 0,     /*
525 Return a string of the recognized syntax designator chars.
526 The chars are ordered by their internal syntax codes, which are
527 numbered starting at 0.
528 */
529       ())
530 {
531         return Vsyntax_designator_chars_string;
532 }
533
534 DEFUN("char-syntax", Fchar_syntax, 1, 2, 0,     /*
535 Return the syntax code of CHARACTER, described by a character.
536 For example, if CHARACTER is a word constituent,
537 the character `?w' is returned.
538 The characters that correspond to various syntax codes
539 are listed in the documentation of `modify-syntax-entry'.
540 Optional second argument SYNTAX-TABLE defaults to the current buffer's
541 syntax table.
542 */
543       (character, syntax_table))
544 {
545         Lisp_Char_Table *mirrortab;
546
547         if (NILP(character)) {
548                 character = make_char('\000');
549         }
550         CHECK_CHAR_COERCE_INT(character);
551         syntax_table =
552             check_syntax_table(syntax_table, current_buffer->syntax_table);
553         mirrortab = XCHAR_TABLE(XCHAR_TABLE(syntax_table)->mirror_table);
554         return
555             make_char(syntax_code_spec
556                       [(int)SYNTAX(mirrortab, XCHAR(character))]);
557 }
558
559 #ifdef MULE
560
561 enum syntaxcode
562 charset_syntax(struct buffer *buf, Lisp_Object charset, int *multi_p_out)
563 {
564         *multi_p_out = 1;
565         /* #### get this right */
566         return Spunct;
567 }
568
569 #endif
570
571 Lisp_Object syntax_match(Lisp_Object syntax_table, Emchar ch)
572 {
573         Lisp_Object code = XCHAR_TABLE_VALUE_UNSAFE(syntax_table, ch);
574         Lisp_Object code2 = code;
575
576         if (CONSP(code))
577                 code2 = XCAR(code);
578         if (SYNTAX_FROM_CODE(XINT(code2)) == Sinherit)
579                 code = XCHAR_TABLE_VALUE_UNSAFE(Vstandard_syntax_table, ch);
580
581         return CONSP(code) ? XCDR(code) : Qnil;
582 }
583
584 DEFUN("matching-paren", Fmatching_paren, 1, 2, 0,       /*
585 Return the matching parenthesis of CHARACTER, or nil if none.
586 Optional second argument SYNTAX-TABLE defaults to the current buffer's
587 syntax table.
588 */
589       (character, syntax_table))
590 {
591         Lisp_Char_Table *mirrortab;
592         int code;
593
594         CHECK_CHAR_COERCE_INT(character);
595         syntax_table =
596             check_syntax_table(syntax_table, current_buffer->syntax_table);
597         mirrortab = XCHAR_TABLE(XCHAR_TABLE(syntax_table)->mirror_table);
598         code = SYNTAX(mirrortab, XCHAR(character));
599         if (code == Sopen || code == Sclose || code == Sstring)
600                 return syntax_match(syntax_table, XCHAR(character));
601         return Qnil;
602 }
603 \f
604 #ifdef MULE
605 /* Return 1 if there is a word boundary between two word-constituent
606    characters C1 and C2 if they appear in this order, else return 0.
607    There is no word boundary between two word-constituent ASCII
608    characters.  */
609 #define WORD_BOUNDARY_P(c1, c2)                 \
610   (!(CHAR_ASCII_P (c1) && CHAR_ASCII_P (c2))    \
611    && word_boundary_p (c1, c2))
612
613 extern int word_boundary_p(Emchar c1, Emchar c2);
614 #endif
615
616 /* Return the position across COUNT words from FROM.
617    If that many words cannot be found before the end of the buffer, return 0.
618    COUNT negative means scan backward and stop at word beginning.  */
619
620 Bufpos scan_words(struct buffer *buf, Bufpos from, int count)
621 {
622         Bufpos limit = count > 0 ? BUF_ZV(buf) : BUF_BEGV(buf);
623         Emchar ch0, ch1;
624         enum syntaxcode code;
625
626         SCS_STATISTICS_SET_FUNCTION(scs_scan_words);
627         SETUP_SYNTAX_CACHE_FOR_BUFFER(buf, from, count);
628
629         /* #### is it really worth it to hand expand both cases? JV */
630         while (count > 0) {
631                 QUIT;
632
633                 while (1) {
634                         if (from == limit)
635                                 return 0;
636
637                         UPDATE_SYNTAX_CACHE_FORWARD(from);
638                         ch0 = BUF_FETCH_CHAR(buf, from);
639                         code = SYNTAX_FROM_CACHE(mirrortab, ch0);
640
641                         from++;
642                         if (words_include_escapes
643                             && (code == Sescape || code == Scharquote))
644                                 break;
645                         if (code == Sword)
646                                 break;
647                 }
648
649                 QUIT;
650
651                 while (from != limit) {
652                         UPDATE_SYNTAX_CACHE_FORWARD(from);
653                         ch1 = BUF_FETCH_CHAR(buf, from);
654                         code = SYNTAX_FROM_CACHE(mirrortab, ch1);
655                         if (!(words_include_escapes
656                               && (code == Sescape || code == Scharquote)))
657                                 if (code != Sword
658 #ifdef MULE
659                                     || WORD_BOUNDARY_P(ch0, ch1)
660 #endif
661                                     )
662                                         break;
663 #ifdef MULE
664                         ch0 = ch1;
665 #endif
666                         from++;
667                 }
668                 count--;
669         }
670
671         while (count < 0) {
672                 QUIT;
673
674                 while (1) {
675                         if (from == limit)
676                                 return 0;
677
678                         UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
679                         ch1 = BUF_FETCH_CHAR(buf, from - 1);
680                         code = SYNTAX_FROM_CACHE(mirrortab, ch1);
681                         from--;
682
683                         if (words_include_escapes
684                             && (code == Sescape || code == Scharquote))
685                                 break;
686                         if (code == Sword)
687                                 break;
688                 }
689
690                 QUIT;
691
692                 while (from != limit) {
693                         UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
694                         ch0 = BUF_FETCH_CHAR(buf, from - 1);
695                         code = SYNTAX_FROM_CACHE(mirrortab, ch0);
696
697                         if (!(words_include_escapes
698                               && (code == Sescape || code == Scharquote)))
699                                 if (code != Sword
700 #ifdef MULE
701                                     || WORD_BOUNDARY_P(ch0, ch1)
702 #endif
703                                     )
704                                         break;
705 #ifdef MULE
706                         ch1 = ch0;
707 #endif
708                         from--;
709                 }
710                 count++;
711         }
712
713         return from;
714 }
715
716 DEFUN("forward-word", Fforward_word, 0, 2, "_p",        /*
717 Move point forward COUNT words (backward if COUNT is negative).
718 Normally t is returned, but if an edge of the buffer is reached,
719 point is left there and nil is returned.
720
721 The characters that are moved over may be added to the current selection
722 \(i.e. active region) if the Shift key is held down, a motion key is used
723 to invoke this command, and `shifted-motion-keys-select-region' is t; see
724 the documentation for this variable for more details.
725
726 COUNT defaults to 1, and BUFFER defaults to the current buffer.
727 */
728       (count, buffer))
729 {
730         Bufpos val;
731         struct buffer *buf = decode_buffer(buffer, 0);
732         EMACS_INT n;
733
734         if (NILP(count))
735                 n = 1;
736         else {
737                 CHECK_INT(count);
738                 n = XINT(count);
739         }
740
741         val = scan_words(buf, BUF_PT(buf), n);
742         if (val) {
743                 BUF_SET_PT(buf, val);
744                 return Qt;
745         } else {
746                 BUF_SET_PT(buf, n > 0 ? BUF_ZV(buf) : BUF_BEGV(buf));
747                 return Qnil;
748         }
749 }
750 \f
751 static void scan_sexps_forward(struct buffer *buf,
752                                struct lisp_parse_state *,
753                                Bufpos from, Bufpos end,
754                                int targetdepth, int stopbefore,
755                                Lisp_Object oldstate, int commentstop);
756
757 static int
758 find_start_of_comment(struct buffer *buf, Bufpos from, Bufpos stop,
759                       int comstyle)
760 {
761         Emchar c;
762         enum syntaxcode code;
763
764         /* Look back, counting the parity of string-quotes,
765            and recording the comment-starters seen.
766            When we reach a safe place, assume that's not in a string;
767            then step the main scan to the earliest comment-starter seen
768            an even number of string quotes away from the safe place.
769
770            OFROM[I] is position of the earliest comment-starter seen
771            which is I+2X quotes from the comment-end.
772            PARITY is current parity of quotes from the comment end.  */
773         int parity = 0;
774         Emchar my_stringend = 0;
775         int string_lossage = 0;
776         Bufpos comment_end = from;
777         Bufpos comstart_pos = 0;
778         int comstart_parity = 0;
779         int styles_match_p = 0;
780         /* mask to match comment styles against; for ST_COMMENT_STYLE, this
781            will get set to SYNTAX_COMMENT_STYLE_B, but never get checked */
782         int mask = comstyle ? SYNTAX_COMMENT_STYLE_B : SYNTAX_COMMENT_STYLE_A;
783
784         /* At beginning of range to scan, we're outside of strings;
785            that determines quote parity to the comment-end.  */
786         while (from != stop) {
787                 int syncode;
788
789                 /* Move back and examine a character.  */
790                 from--;
791                 UPDATE_SYNTAX_CACHE_BACKWARD(from);
792
793                 c = BUF_FETCH_CHAR(buf, from);
794                 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
795                 code = SYNTAX_FROM_CODE(syncode);
796
797                 /* is this a 1-char comment end sequence? if so, try
798                    to see if style matches previously extracted mask */
799                 if (code == Sendcomment) {
800                         /* MT had SYNTAX_CODE_COMMENT_1CHAR_MASK (syncode) & mask
801                            but (as a Boolean) that's just a complicated way to write: */
802                         styles_match_p =
803                             SYNTAX_CODE_MATCHES_1CHAR_P(syncode, mask);
804                 }
805
806                 /* or are we looking at a 1-char comment start sequence
807                    of the style matching mask? */
808                 else if (code == Scomment) {
809                         styles_match_p =
810                             SYNTAX_CODE_MATCHES_1CHAR_P(syncode, mask);
811                 }
812
813                 /* otherwise, is this a 2-char comment end or start sequence? */
814                 else if (from > stop)
815                         do {
816                                 /* 2-char comment end sequence? */
817                                 if (SYNTAX_CODE_END_SECOND_P(syncode)) {
818                                         int prev_syncode;
819                                         UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
820                                         prev_syncode =
821                                             SYNTAX_CODE_FROM_CACHE(mirrortab,
822                                                                    BUF_FETCH_CHAR
823                                                                    (buf,
824                                                                     from - 1));
825
826                                         if (SYNTAX_CODES_END_P
827                                             (prev_syncode, syncode)) {
828                                                 code = Sendcomment;
829                                                 styles_match_p =
830                                                     SYNTAX_CODES_MATCH_END_P
831                                                     (prev_syncode, syncode,
832                                                      mask);
833                                                 from--;
834                                                 UPDATE_SYNTAX_CACHE_BACKWARD
835                                                     (from);
836                                                 c = BUF_FETCH_CHAR(buf, from);
837
838                                                 /* Found a comment-end sequence, so skip past the
839                                                    check for a comment-start */
840                                                 break;
841                                         }
842                                 }
843
844                                 /* 2-char comment start sequence? */
845                                 if (SYNTAX_CODE_START_SECOND_P(syncode)) {
846                                         int prev_syncode;
847                                         UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
848                                         prev_syncode =
849                                             SYNTAX_CODE_FROM_CACHE(mirrortab,
850                                                                    BUF_FETCH_CHAR
851                                                                    (buf,
852                                                                     from - 1));
853
854                                         if (SYNTAX_CODES_START_P
855                                             (prev_syncode, syncode)) {
856                                                 code = Scomment;
857                                                 styles_match_p =
858                                                     SYNTAX_CODES_MATCH_START_P
859                                                     (prev_syncode, syncode,
860                                                      mask);
861                                                 from--;
862                                                 UPDATE_SYNTAX_CACHE_BACKWARD
863                                                     (from);
864                                                 c = BUF_FETCH_CHAR(buf, from);
865                                         }
866                                 }
867                         } while (0);
868
869                 /* Ignore escaped characters.  */
870                 if (char_quoted(buf, from))
871                         continue;
872
873                 /* Track parity of quotes.  */
874                 if (code == Sstring) {
875                         parity ^= 1;
876                         if (my_stringend == 0)
877                                 my_stringend = c;
878                         /* If we have two kinds of string delimiters.
879                            There's no way to grok this scanning backwards.  */
880                         else if (my_stringend != c)
881                                 string_lossage = 1;
882                 }
883
884                 if (code == Sstring_fence || code == Scomment_fence) {
885                         parity ^= 1;
886                         if (my_stringend == 0)
887                                 my_stringend =
888                                     code ==
889                                     Sstring_fence ? ST_STRING_STYLE :
890                                     ST_COMMENT_STYLE;
891                         /* If we have two kinds of string delimiters.
892                            There's no way to grok this scanning backwards.  */
893                         else if (my_stringend != (code == Sstring_fence
894                                                   ? ST_STRING_STYLE :
895                                                   ST_COMMENT_STYLE))
896                                 string_lossage = 1;
897                 }
898
899                 /* Record comment-starters according to that
900                    quote-parity to the comment-end.  */
901                 if (code == Scomment && styles_match_p) {
902                         comstart_parity = parity;
903                         comstart_pos = from;
904                 }
905
906                 /* If we find another earlier comment-ender,
907                    any comment-starts earlier than that don't count
908                    (because they go with the earlier comment-ender).  */
909                 if (code == Sendcomment && styles_match_p)
910                         break;
911
912                 /* Assume a defun-start point is outside of strings.  */
913                 if (code == Sopen
914                     && (from == stop || BUF_FETCH_CHAR(buf, from - 1) == '\n'))
915                         break;
916         }
917
918         if (comstart_pos == 0)
919                 from = comment_end;
920         /* If the earliest comment starter
921            is followed by uniform paired string quotes or none,
922            we know it can't be inside a string
923            since if it were then the comment ender would be inside one.
924            So it does start a comment.  Skip back to it.  */
925         else if (comstart_parity == 0 && !string_lossage)
926                 from = comstart_pos;
927         else {
928                 /* We had two kinds of string delimiters mixed up
929                    together.  Decode this going forwards.
930                    Scan fwd from the previous comment ender
931                    to the one in question; this records where we
932                    last passed a comment starter.  */
933
934                 struct lisp_parse_state state;
935                 scan_sexps_forward(buf, &state,
936                                    find_defun_start(buf, comment_end),
937                                    comment_end - 1, -10000, 0, Qnil, 0);
938                 if (state.incomment)
939                         from = state.comstr_start;
940                 else
941                         /* We can't grok this as a comment; scan it normally.  */
942                         from = comment_end;
943                 UPDATE_SYNTAX_CACHE_FORWARD(from - 1);
944         }
945         return from;
946 }
947
948 static Bufpos
949 find_end_of_comment(struct buffer *buf, Bufpos from, Bufpos stop, int comstyle)
950 {
951         int c;
952         int syncode;
953         enum syntaxcode code, next_code;
954         /* mask to match comment styles against; for ST_COMMENT_STYLE, this
955            will get set to SYNTAX_COMMENT_STYLE_B, but never get checked */
956         int mask = comstyle ? SYNTAX_COMMENT_STYLE_B : SYNTAX_COMMENT_STYLE_A;
957
958         /* This is only called by functions which have already set up the
959            syntax_cache and are keeping it up-to-date */
960         while (1) {
961                 if (from == stop) {
962                         return -1;
963                 }
964
965                 UPDATE_SYNTAX_CACHE_FORWARD(from);
966                 c = BUF_FETCH_CHAR(buf, from);
967                 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
968                 code = SYNTAX_FROM_CODE(syncode);
969
970                 from++;
971                 UPDATE_SYNTAX_CACHE_FORWARD(from);
972
973                 /* At end of current generic comment? */
974                 if (comstyle == ST_COMMENT_STYLE) {
975                         if (code == Scomment_fence)
976                                 break;  /* matched */
977                         else
978                                 continue;       /* Ignore other styles in generic comments */
979                 }
980                 /* At end of current one-character comment of specified style? */
981                 else if (code == Sendcomment &&
982                          SYNTAX_CODE_MATCHES_1CHAR_P(syncode, mask)) {
983                         /* pre-MT code effectively does from-- here, that seems wrong */
984                         break;
985                 }
986
987                 /* At end of current two-character comment of specified style? */
988                 c = BUF_FETCH_CHAR(buf, from);
989                 next_code = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
990                 if (from < stop
991                     && SYNTAX_CODES_MATCH_END_P(syncode, next_code, mask)) {
992                         from++;
993                         UPDATE_SYNTAX_CACHE_FORWARD(from);
994                         break;
995                 }
996         }
997         return from;
998 }
999 \f
1000 /* #### between FSF 19.23 and 19.28 there are some changes to the logic
1001    in this function (and minor changes to find_start_of_comment(),
1002    above, which is part of Fforward_comment() in FSF).  Attempts to port
1003    that logic made this function break, so I'm leaving it out.  If anyone
1004    ever complains about this function not working properly, take a look
1005    at those changes.  --ben */
1006
1007 DEFUN("forward-comment", Fforward_comment, 0, 2, 0,     /*
1008 Move forward across up to COUNT comments, or backwards if COUNT is negative.
1009 Stop scanning if we find something other than a comment or whitespace.
1010 Set point to where scanning stops.
1011 If COUNT comments are found as expected, with nothing except whitespace
1012 between them, return t; otherwise return nil.
1013 Point is set in either case.
1014 COUNT defaults to 1, and BUFFER defaults to the current buffer.
1015 */
1016       (count, buffer))
1017 {
1018         Bufpos from;
1019         Bufpos stop;
1020         Emchar c;
1021         enum syntaxcode code;
1022         int syncode;
1023         EMACS_INT n;
1024         struct buffer *buf = decode_buffer(buffer, 0);
1025
1026         if (NILP(count))
1027                 n = 1;
1028         else {
1029                 CHECK_INT(count);
1030                 n = XINT(count);
1031         }
1032
1033         from = BUF_PT(buf);
1034
1035         SCS_STATISTICS_SET_FUNCTION(scs_Fforward_comment);
1036         SETUP_SYNTAX_CACHE(from, n);
1037         while (n > 0) {
1038                 QUIT;
1039
1040                 stop = BUF_ZV(buf);
1041                 while (from < stop) {
1042                         int comstyle = 0;       /* Code for comment style: 0 for A, 1 for B,
1043                                                    or ST_COMMENT_STYLE */
1044
1045                         if (char_quoted(buf, from)) {
1046                                 from++;
1047                                 continue;
1048                         }
1049
1050                         UPDATE_SYNTAX_CACHE_FORWARD(from);
1051                         c = BUF_FETCH_CHAR(buf, from);
1052                         syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
1053                         code = SYNTAX_FROM_CODE(syncode);
1054
1055                         if (code == Scomment) {
1056                                 /* we have encountered a single character comment start
1057                                    sequence, and we are ignoring all text inside comments.
1058                                    we must record the comment style this character begins
1059                                    so that later, only a comment end of the same style actually
1060                                    ends the comment section */
1061                                 comstyle =
1062                                     SYNTAX_CODE_COMMENT_1CHAR_MASK(syncode)
1063                                     == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1064                         }
1065
1066                         else if (code == Scomment_fence) {
1067                                 from++;
1068                                 code = Scomment;
1069                                 comstyle = ST_COMMENT_STYLE;
1070                         }
1071
1072                         else if (from < stop
1073                                  && SYNTAX_CODE_START_FIRST_P(syncode)) {
1074                                 int next_syncode;
1075                                 UPDATE_SYNTAX_CACHE_FORWARD(from + 1);
1076                                 next_syncode =
1077                                     SYNTAX_CODE_FROM_CACHE(mirrortab,
1078                                                            BUF_FETCH_CHAR(buf,
1079                                                                           from +
1080                                                                           1));
1081
1082                                 if (SYNTAX_CODES_START_P(syncode, next_syncode)) {
1083                                         /* we have encountered a 2char comment start sequence and we
1084                                            are ignoring all text inside comments. we must record
1085                                            the comment style this sequence begins so that later,
1086                                            only a comment end of the same style actually ends
1087                                            the comment section */
1088                                         code = Scomment;
1089                                         comstyle =
1090                                             SYNTAX_CODES_COMMENT_MASK_START
1091                                             (syncode, next_syncode)
1092                                             == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1093                                         from++;
1094                                 }
1095                         }
1096
1097                         if (code == Scomment) {
1098                                 Bufpos newfrom =
1099                                     find_end_of_comment(buf, from, stop,
1100                                                         comstyle);
1101                                 if (newfrom < 0) {
1102                                         /* we stopped because from==stop */
1103                                         BUF_SET_PT(buf, stop);
1104                                         return Qnil;
1105                                 }
1106                                 from = newfrom;
1107
1108                                 /* We have skipped one comment.  */
1109                                 break;
1110                         } else if (code != Swhitespace
1111                                    && code != Sendcomment && code != Scomment) {
1112                                 BUF_SET_PT(buf, from);
1113                                 return Qnil;
1114                         }
1115                         from++;
1116                 }
1117
1118                 /* End of comment reached */
1119                 n--;
1120         }
1121
1122         while (n < 0) {
1123                 QUIT;
1124
1125                 stop = BUF_BEGV(buf);
1126                 while (from > stop) {
1127                         int comstyle = 0;       /* Code for comment style: 0 for A, 1 for B,
1128                                                    or ST_COMMENT_STYLE */
1129
1130                         from--;
1131                         if (char_quoted(buf, from)) {
1132                                 from--;
1133                                 continue;
1134                         }
1135
1136                         c = BUF_FETCH_CHAR(buf, from);
1137                         syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
1138                         code = SYNTAX_FROM_CODE(syncode);
1139
1140                         if (code == Sendcomment) {
1141                                 /* we have found a single char end comment. we must record
1142                                    the comment style encountered so that later, we can match
1143                                    only the proper comment begin sequence of the same style */
1144                                 comstyle =
1145                                     SYNTAX_CODE_COMMENT_1CHAR_MASK(syncode)
1146                                     == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1147                         }
1148
1149                         else if (code == Scomment_fence) {
1150                                 code = Sendcomment;
1151                                 comstyle = ST_COMMENT_STYLE;
1152                         }
1153
1154                         else if (from > stop
1155                                  /* #### This seems logical but it's not in 21.4.9 */
1156                                  /* && !char_quoted (buf, from - 1) */
1157                                  && SYNTAX_CODE_END_SECOND_P(syncode)) {
1158                                 int prev_syncode;
1159                                 UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
1160                                 prev_syncode =
1161                                     SYNTAX_CODE_FROM_CACHE(mirrortab,
1162                                                            BUF_FETCH_CHAR(buf,
1163                                                                           from -
1164                                                                           1));
1165                                 if (SYNTAX_CODES_END_P(prev_syncode, syncode)) {
1166                                         /* We must record the comment style encountered so that
1167                                            later, we can match only the proper comment begin
1168                                            sequence of the same style.  */
1169                                         code = Sendcomment;
1170                                         comstyle =
1171                                             SYNTAX_CODES_COMMENT_MASK_END
1172                                             (prev_syncode, syncode)
1173                                             == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1174                                         from--;
1175                                 }
1176                         }
1177
1178                         if (code == Sendcomment) {
1179                                 from =
1180                                     find_start_of_comment(buf, from, stop,
1181                                                           comstyle);
1182                                 break;
1183                         }
1184
1185                         else if (code != Swhitespace
1186                                  && code != Scomment && code != Sendcomment) {
1187                                 BUF_SET_PT(buf, from + 1);
1188                                 return Qnil;
1189                         }
1190                 }
1191
1192                 n++;
1193         }
1194
1195         BUF_SET_PT(buf, from);
1196         return Qt;
1197 }
1198 \f
1199 Lisp_Object
1200 scan_lists(struct buffer * buf, Bufpos from, int count, int depth,
1201            int sexpflag, int noerror)
1202 {
1203         Bufpos stop;
1204         Emchar c;
1205         int quoted;
1206         int mathexit = 0;
1207         enum syntaxcode code;
1208         int syncode;
1209         int min_depth = depth;  /* Err out if depth gets less than this. */
1210         Bufpos last_good = from;
1211
1212         if (depth > 0)
1213                 min_depth = 0;
1214
1215         SCS_STATISTICS_SET_FUNCTION(scs_scan_lists);
1216         SETUP_SYNTAX_CACHE_FOR_BUFFER(buf, from, count);
1217         while (count > 0) {
1218                 QUIT;
1219
1220                 stop = BUF_ZV(buf);
1221                 while (from < stop) {
1222                         int comstyle = 0;       /* mask for finding matching comment style */
1223                         Emchar stringterm = '\0';       /* Used by Sstring case in switch */
1224
1225                         UPDATE_SYNTAX_CACHE_FORWARD(from);
1226                         c = BUF_FETCH_CHAR(buf, from);
1227                         syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
1228                         code = SYNTAX_FROM_CODE(syncode);
1229                         if (depth == min_depth)
1230                                 last_good = from;
1231                         from++;
1232
1233                         /* a 1-char comment start sequence */
1234                         if (code == Scomment && parse_sexp_ignore_comments) {
1235                                 comstyle =
1236                                     SYNTAX_CODE_COMMENT_1CHAR_MASK(syncode) ==
1237                                     SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1238                         }
1239
1240                         /* else, a 2-char comment start sequence? */
1241                         else if (from < stop
1242                                  && SYNTAX_CODE_START_FIRST_P(syncode)
1243                                  && parse_sexp_ignore_comments) {
1244                                 int next_syncode;
1245                                 UPDATE_SYNTAX_CACHE_FORWARD(from);
1246                                 next_syncode = SYNTAX_CODE_FROM_CACHE(
1247                                         mirrortab, BUF_FETCH_CHAR(buf, from));
1248
1249                                 if (SYNTAX_CODES_START_P(syncode,
1250                                                          next_syncode)) {
1251                                         /* we have encountered a comment start
1252                                            sequence and we are ignoring all text
1253                                            inside comments. we must record the
1254                                            comment style this sequence begins so
1255                                            that later, only a comment end of the
1256                                            same style actually ends the comment
1257                                            section */
1258                                         code = Scomment;
1259                                         comstyle =
1260                                             SYNTAX_CODES_COMMENT_MASK_START
1261                                             (syncode, next_syncode)
1262                                             == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1263                                         from++;
1264                                 }
1265                         }
1266                         UPDATE_SYNTAX_CACHE_FORWARD(from);
1267
1268                         if (SYNTAX_CODE_PREFIX(syncode))
1269                                 continue;
1270
1271                         switch (code) {
1272                         case Sescape:
1273                         case Scharquote:
1274                                 if (from == stop)
1275                                         goto lose;
1276                                 from++;
1277                                 /* treat following character as a word
1278                                    constituent */
1279                         case Sword:
1280                         case Ssymbol:
1281                                 if (depth || !sexpflag)
1282                                         break;
1283                                 /* This word counts as a sexp; return at end of
1284                                    it. */
1285                                 while (from < stop) {
1286                                         UPDATE_SYNTAX_CACHE_FORWARD(from);
1287                                         switch ((unsigned int)
1288                                                 SYNTAX_FROM_CACHE(
1289                                                         mirrortab,
1290                                                         BUF_FETCH_CHAR(
1291                                                                 buf, from))) {
1292                                         case Scharquote:
1293                                         case Sescape:
1294                                                 from++;
1295                                                 if (from == stop)
1296                                                         goto lose;
1297                                                 break;
1298                                         case Sword:
1299                                         case Ssymbol:
1300                                         case Squote:
1301                                                 break;
1302                                         default:
1303                                                 goto done;
1304                                         }
1305                                         from++;
1306                                 }
1307                                 goto done;
1308
1309                         case Scomment_fence:
1310                                 comstyle = ST_COMMENT_STYLE;
1311                                 /* falls through! */
1312                         case Scomment:
1313                                 if (!parse_sexp_ignore_comments)
1314                                         break;
1315                                 UPDATE_SYNTAX_CACHE_FORWARD(from);
1316                                 {
1317                                         Bufpos newfrom =
1318                                             find_end_of_comment(buf, from, stop,
1319                                                                 comstyle);
1320                                         if (newfrom < 0) {
1321                                                 /* we stopped because from ==
1322                                                    stop in search forward */
1323                                                 from = stop;
1324                                                 if (depth == 0)
1325                                                         goto done;
1326                                                 goto lose;
1327                                         }
1328                                         from = newfrom;
1329                                 }
1330                                 break;
1331
1332                         case Smath:
1333                                 if (!sexpflag)
1334                                         break;
1335                                 if (from != stop
1336                                     && c == BUF_FETCH_CHAR(buf, from))
1337                                         from++;
1338                                 if (mathexit) {
1339                                         mathexit = 0;
1340                                         goto close1;
1341                                 }
1342                                 mathexit = 1;
1343
1344                         case Sopen:
1345                                 if (!++depth)
1346                                         goto done;
1347                                 break;
1348
1349                         case Sclose:
1350                               close1:
1351                                 if (!--depth)
1352                                         goto done;
1353                                 if (depth < min_depth) {
1354                                         if (noerror)
1355                                                 return Qnil;
1356                                         signal_type_error_2(Qscan_error,
1357                                                             "Containing expression "
1358                                                             "ends prematurely",
1359                                                             make_int(last_good),
1360                                                             make_int(from));
1361                                 }
1362                                 break;
1363
1364                         case Sstring:
1365                                 {
1366                                         /* XEmacs change: call syntax_match on
1367                                            character */
1368                                         Emchar ch =
1369                                             BUF_FETCH_CHAR(buf, from - 1);
1370                                         Lisp_Object stermobj =
1371                                             syntax_match(syntax_cache.
1372                                                          current_syntax_table,
1373                                                          ch);
1374
1375                                         if (CHARP(stermobj))
1376                                                 stringterm = XCHAR(stermobj);
1377                                         else
1378                                                 stringterm = ch;
1379                                 }
1380                                 /* falls through! */
1381                         case Sstring_fence:
1382                                 while (1) {
1383                                         if (from >= stop)
1384                                                 goto lose;
1385                                         UPDATE_SYNTAX_CACHE_FORWARD(from);
1386                                         c = BUF_FETCH_CHAR(buf, from);
1387                                         if (code == Sstring
1388                                             ? c == stringterm
1389                                             : SYNTAX_FROM_CACHE(mirrortab,
1390                                                                 c) ==
1391                                             Sstring_fence)
1392                                                 break;
1393
1394                                         switch ((unsigned int)
1395                                                 SYNTAX_FROM_CACHE(
1396                                                         mirrortab, c)) {
1397                                         case Scharquote:
1398                                         case Sescape:
1399                                                 from++;
1400                                                 break;
1401                                         default:
1402                                                 break;
1403                                         }
1404                                         from++;
1405                                 }
1406                                 from++;
1407                                 if (!depth && sexpflag)
1408                                         goto done;
1409                                 break;
1410
1411                                 /* list them all here */
1412                         case Squote:
1413                         case Swhitespace:
1414                         case Spunct:
1415                         case Sendcomment:
1416                         case Sinherit:
1417                         case Smax:
1418
1419                         default:
1420                                 break;
1421                         }
1422                 }
1423
1424                 /* Reached end of buffer.  Error if within object,
1425                    return nil if between */
1426                 if (depth)
1427                         goto lose;
1428
1429                 return Qnil;
1430
1431                 /* End of object reached */
1432               done:
1433                 count--;
1434         }
1435
1436         while (count < 0) {
1437                 QUIT;
1438
1439                 stop = BUF_BEGV(buf);
1440                 while (from > stop) {
1441                         /* mask for finding matching comment style */
1442                         int comstyle = 0;
1443                         /* used by case Sstring in switch below */
1444                         Emchar stringterm = '\0';
1445
1446                         from--;
1447                         UPDATE_SYNTAX_CACHE_BACKWARD(from);
1448                         quoted = char_quoted(buf, from);
1449                         if (quoted) {
1450                                 from--;
1451                                 UPDATE_SYNTAX_CACHE_BACKWARD(from);
1452                         }
1453
1454                         c = BUF_FETCH_CHAR(buf, from);
1455                         syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
1456                         code = SYNTAX_FROM_CODE(syncode);
1457
1458                         if (code == Sendcomment && parse_sexp_ignore_comments) {
1459                                 /* we have found a single char end comment. we
1460                                    must record the comment style encountered so
1461                                    that later, we can match only the proper
1462                                    comment begin sequence of the same style */
1463                                 comstyle =
1464                                     SYNTAX_CODE_COMMENT_1CHAR_MASK(syncode)
1465                                     == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1466                         }
1467
1468                         else if (from > stop
1469                                  && SYNTAX_CODE_END_SECOND_P(syncode)
1470                                  && !char_quoted(buf, from - 1)
1471                                  && parse_sexp_ignore_comments) {
1472                                 int prev_syncode;
1473                                 UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
1474                                 prev_syncode = SYNTAX_CODE_FROM_CACHE
1475                                     (mirrortab, BUF_FETCH_CHAR(buf, from - 1));
1476
1477                                 if (SYNTAX_CODES_END_P(prev_syncode, syncode)) {
1478                                         /* we must record the comment style
1479                                            encountered so that later, we can
1480                                            match only the proper comment begin
1481                                            sequence of the same style */
1482                                         code = Sendcomment;
1483                                         comstyle =
1484                                             SYNTAX_CODES_COMMENT_MASK_END
1485                                             (prev_syncode, syncode)
1486                                             == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1487                                         from--;
1488                                 }
1489                         }
1490
1491                         if (SYNTAX_CODE_PREFIX(syncode)) {
1492                                 continue;
1493                         }
1494
1495                         switch (quoted ? Sword : code) {
1496                         case Sword:
1497                         case Ssymbol:
1498                                 if (depth || !sexpflag)
1499                                         break;
1500                                 /* This word counts as a sexp; count object
1501                                    finished after passing it. */
1502                                 while (from > stop) {
1503                                         /* enum syntaxcode syncode; */
1504                                         UPDATE_SYNTAX_CACHE_BACKWARD(from);
1505                                         quoted = char_quoted(buf, from - 1);
1506
1507                                         if (quoted)
1508                                                 from--;
1509                                         if (!(quoted
1510                                               || (syncode =
1511                                                   SYNTAX_FROM_CACHE(mirrortab,
1512                                                                     BUF_FETCH_CHAR
1513                                                                     (buf,
1514                                                                      from - 1)))
1515                                               == Sword || syncode == Ssymbol
1516                                               || syncode == Squote))
1517                                                 goto done2;
1518                                         from--;
1519                                 }
1520                                 goto done2;
1521
1522                         case Smath:
1523                                 if (!sexpflag)
1524                                         break;
1525                                 if (from != stop
1526                                     && c == BUF_FETCH_CHAR(buf, from - 1))
1527                                         from--;
1528                                 if (mathexit) {
1529                                         mathexit = 0;
1530                                         goto open2;
1531                                 }
1532                                 mathexit = 1;
1533
1534                         case Sclose:
1535                                 if (!++depth)
1536                                         goto done2;
1537                                 break;
1538
1539                         case Sopen:
1540                               open2:
1541                                 if (!--depth)
1542                                         goto done2;
1543                                 if (depth < min_depth) {
1544                                         if (noerror) {
1545                                                 return Qnil;
1546                                         }
1547                                         signal_type_error_2(Qscan_error,
1548                                                             "Containing expression "
1549                                                             "ends premeturely",
1550                                                             make_int(last_good),
1551                                                             make_int(from));
1552                                 }
1553                                 break;
1554
1555                         case Scomment_fence:
1556                                 comstyle = ST_COMMENT_STYLE;
1557                                 /* falls through! */
1558                         case Sendcomment:
1559                                 if (parse_sexp_ignore_comments)
1560                                         from =
1561                                             find_start_of_comment(buf, from,
1562                                                                   stop,
1563                                                                   comstyle);
1564                                 break;
1565
1566                         case Sstring: {
1567                                 /* XEmacs change: call syntax_match() on
1568                                    character */
1569                                 Emchar ch = BUF_FETCH_CHAR(buf, from);
1570                                 Lisp_Object stermobj =
1571                                         syntax_match(syntax_cache.
1572                                                      current_syntax_table, ch);
1573                                 if (CHARP(stermobj)) {
1574                                         stringterm = XCHAR(stermobj);
1575                                 } else {
1576                                         stringterm = ch;
1577                                 }
1578                         }
1579
1580                                 /* falls through! */
1581                         case Sstring_fence:
1582                                 while (1) {
1583                                         if (from == stop)
1584                                                 goto lose;
1585
1586                                         UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
1587                                         c = BUF_FETCH_CHAR(buf, from - 1);
1588                                         if ((code == Sstring
1589                                              ? c == stringterm
1590                                              : SYNTAX_FROM_CACHE(mirrortab,
1591                                                                  c) ==
1592                                              Sstring_fence)
1593                                             && !char_quoted(buf, from - 1)) {
1594                                                 break;
1595                                         }
1596
1597                                         from--;
1598                                 }
1599                                 from--;
1600                                 if (!depth && sexpflag)
1601                                         goto done2;
1602                                 break;
1603
1604                         default:
1605                                 /* shouldnt happen */
1606                                 break;
1607                         }
1608                 }
1609
1610                 /* Reached start of buffer.  Error if within object,
1611                    return nil if between */
1612                 if (depth)
1613                         goto lose;
1614
1615                 return Qnil;
1616
1617               done2:
1618                 count++;
1619         }
1620
1621         return (make_int(from));
1622
1623       lose:
1624         if (!noerror)
1625                 signal_type_error_2(Qscan_error, "Unbalanced parentheses",
1626                                     make_int(last_good), make_int(from));
1627         return Qnil;
1628 }
1629
1630 int char_quoted(struct buffer *buf, Bufpos pos)
1631 {
1632         enum syntaxcode code;
1633         Bufpos beg = BUF_BEGV(buf);
1634         int quoted = 0;
1635         Bufpos startpos = pos;
1636
1637         while (pos > beg) {
1638                 UPDATE_SYNTAX_CACHE_BACKWARD(pos - 1);
1639                 code =
1640                     SYNTAX_FROM_CACHE(mirrortab, BUF_FETCH_CHAR(buf, pos - 1));
1641
1642                 if (code != Scharquote && code != Sescape)
1643                         break;
1644                 pos--;
1645                 quoted = !quoted;
1646         }
1647
1648         UPDATE_SYNTAX_CACHE(startpos);
1649         return quoted;
1650 }
1651
1652 DEFUN("scan-lists", Fscan_lists, 3, 5, 0,       /*
1653 Scan from character number FROM by COUNT lists.
1654 Returns the character number of the position thus found.
1655
1656 If DEPTH is nonzero, paren depth begins counting from that value,
1657 only places where the depth in parentheses becomes zero
1658 are candidates for stopping; COUNT such places are counted.
1659 Thus, a positive value for DEPTH means go out levels.
1660
1661 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1662
1663 If the beginning or end of (the accessible part of) the buffer is reached
1664 and the depth is wrong, an error is signaled.
1665 If the depth is right but the count is not used up, nil is returned.
1666
1667 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1668 of in the current buffer.
1669
1670 If optional arg NOERROR is non-nil, scan-lists will return nil instead of
1671 signalling an error.
1672 */
1673       (from, count, depth, buffer, noerror))
1674 {
1675         struct buffer *buf;
1676
1677         CHECK_INT(from);
1678         CHECK_INT(count);
1679         CHECK_INT(depth);
1680         buf = decode_buffer(buffer, 0);
1681
1682         return scan_lists(buf, XINT(from), XINT(count), XINT(depth), 0,
1683                           !NILP(noerror));
1684 }
1685
1686 DEFUN("scan-sexps", Fscan_sexps, 2, 4, 0,       /*
1687 Scan from character number FROM by COUNT balanced expressions.
1688 If COUNT is negative, scan backwards.
1689 Returns the character number of the position thus found.
1690
1691 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1692
1693 If the beginning or end of (the accessible part of) the buffer is reached
1694 in the middle of a parenthetical grouping, an error is signaled.
1695 If the beginning or end is reached between groupings
1696 but before count is used up, nil is returned.
1697
1698 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1699 of in the current buffer.
1700
1701 If optional arg NOERROR is non-nil, scan-sexps will return nil instead of
1702 signalling an error.
1703 */
1704       (from, count, buffer, noerror))
1705 {
1706         struct buffer *buf = decode_buffer(buffer, 0);
1707         CHECK_INT(from);
1708         CHECK_INT(count);
1709
1710         return scan_lists(buf, XINT(from), XINT(count), 0, 1, !NILP(noerror));
1711 }
1712
1713 DEFUN("backward-prefix-chars", Fbackward_prefix_chars, 0, 1, 0, /*
1714 Move point backward over any number of chars with prefix syntax.
1715 This includes chars with "quote" or "prefix" syntax (' or p).
1716
1717 Optional arg BUFFER defaults to the current buffer.
1718 */
1719       (buffer))
1720 {
1721         struct buffer *buf = decode_buffer(buffer, 0);
1722         Bufpos beg = BUF_BEGV(buf);
1723         Bufpos pos = BUF_PT(buf);
1724 #ifndef emacs
1725         Lisp_Char_Table *mirrortab = XCHAR_TABLE(buf->mirror_syntax_table);
1726 #endif
1727         Emchar c = '\0';        /* initialize to avoid compiler warnings */
1728
1729         SCS_STATISTICS_SET_FUNCTION(scs_Fbackward_prefix_characters);
1730         SETUP_SYNTAX_CACHE_FOR_BUFFER(buf, pos, -1);
1731
1732         while (pos > beg && !char_quoted(buf, pos - 1)
1733                /* Previous statement updates syntax table.  */
1734                &&
1735                (SYNTAX_FROM_CACHE(mirrortab, c = BUF_FETCH_CHAR(buf, pos - 1))
1736                 == Squote
1737                 /* equivalent to SYNTAX_PREFIX (mirrortab, c) */
1738                 || SYNTAX_CODE_PREFIX(SYNTAX_CODE_FROM_CACHE(mirrortab, c))))
1739                 pos--;
1740
1741         BUF_SET_PT(buf, pos);
1742
1743         return Qnil;
1744 }
1745 \f
1746 /* Parse forward from FROM to END,
1747    assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1748    and return a description of the state of the parse at END.
1749    If STOPBEFORE is nonzero, stop at the start of an atom.
1750    If COMMENTSTOP is 1, stop at the start of a comment; if it is -1,
1751      stop at the start of a comment or a string */
1752
1753 static void
1754 scan_sexps_forward(struct buffer *buf, struct lisp_parse_state *stateptr,
1755                    Bufpos from, Bufpos end,
1756                    int targetdepth, int stopbefore,
1757                    Lisp_Object oldstate, int commentstop)
1758 {
1759         struct lisp_parse_state state;
1760
1761         enum syntaxcode code;
1762         struct level {
1763                 int last, prev;
1764         };
1765         struct level levelstart[100];
1766         struct level *curlevel = levelstart;
1767         struct level *endlevel = levelstart + 100;
1768         int depth;              /* Paren depth of current scanning location.
1769                                    level - levelstart equals this except
1770                                    when the depth becomes negative.  */
1771         int mindepth;           /* Lowest DEPTH value seen.  */
1772         int start_quoted = 0;   /* Nonzero means starting after a char quote */
1773         int boundary_stop = commentstop == -1;
1774         Lisp_Object tem;
1775
1776         SCS_STATISTICS_SET_FUNCTION(scs_scan_sexps_forward);
1777         SETUP_SYNTAX_CACHE(from, 1);
1778         if (NILP(oldstate)) {
1779                 depth = 0;
1780                 state.instring = -1;
1781                 state.incomment = 0;
1782                 state.comstyle = 0;     /* comment style a by default */
1783                 state.comstr_start = -1;        /* no comment/string seen.  */
1784         } else {
1785                 tem = Fcar(oldstate);   /* elt 0, depth */
1786                 if (!NILP(tem))
1787                         depth = XINT(tem);
1788                 else
1789                         depth = 0;
1790
1791                 oldstate = Fcdr(oldstate);
1792                 oldstate = Fcdr(oldstate);
1793                 oldstate = Fcdr(oldstate);
1794                 tem = Fcar(oldstate);   /* elt 3, instring */
1795                 state.instring = (!NILP(tem)
1796                                   ? (INTP(tem) ? XINT(tem) : ST_STRING_STYLE)
1797                                   : -1);
1798
1799                 oldstate = Fcdr(oldstate);
1800                 tem = Fcar(oldstate);   /* elt 4, incomment */
1801                 state.incomment = !NILP(tem);
1802
1803                 oldstate = Fcdr(oldstate);
1804                 tem = Fcar(oldstate);   /* elt 5, follows-quote */
1805                 start_quoted = !NILP(tem);
1806
1807                 /* if the eighth element of the list is nil, we are in comment style
1808                    a; if it is t, we are in comment style b; if it is 'syntax-table,
1809                    we are in a generic comment */
1810                 oldstate = Fcdr(oldstate);
1811                 oldstate = Fcdr(oldstate);
1812                 /* The code below was changed radically for syntax-table properties.
1813                    A reasonable place to look if a bug manifests. */
1814                 tem = Fcar(oldstate);   /* elt 7, comment style a/b/fence */
1815                 state.comstyle = NILP(tem) ? 0 : (EQ(tem, Qsyntax_table)
1816                                                   ? ST_COMMENT_STYLE : 1);
1817
1818                 oldstate = Fcdr(oldstate);      /* elt 8, start of last comment/string */
1819                 tem = Fcar(oldstate);
1820                 state.comstr_start = NILP(tem) ? -1 : XINT(tem);
1821
1822                 /* elt 9, char numbers of starts-of-expression of levels
1823                    (starting from outermost). */
1824                 oldstate = Fcdr(oldstate);
1825                 tem = Fcar(oldstate);   /* elt 9, intermediate data for
1826                                            continuation of parsing (subject
1827                                            to change). */
1828                 while (!NILP(tem)) {    /* >= second enclosing sexps.  */
1829                         curlevel->last = XINT(Fcar(tem));
1830                         if (++curlevel == endlevel)
1831                                 error("Nesting too deep for parser");
1832                         curlevel->prev = -1;
1833                         curlevel->last = -1;
1834                         tem = Fcdr(tem);
1835                 }
1836                 /* end radical change section */
1837         }
1838         state.quoted = 0;
1839         mindepth = depth;
1840
1841         curlevel->prev = -1;
1842         curlevel->last = -1;
1843
1844         /* Enter the loop at a place appropriate for initial state. */
1845
1846         if (state.incomment)
1847                 goto startincomment;
1848         if (state.instring >= 0) {
1849                 if (start_quoted)
1850                         goto startquotedinstring;
1851                 goto startinstring;
1852         }
1853         if (start_quoted)
1854                 goto startquoted;
1855
1856         while (from < end) {
1857                 Emchar c;
1858                 int syncode;
1859
1860                 QUIT;
1861
1862                 UPDATE_SYNTAX_CACHE_FORWARD(from);
1863                 c = BUF_FETCH_CHAR(buf, from);
1864                 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
1865                 code = SYNTAX_FROM_CODE(syncode);
1866                 from++;
1867
1868                 /* record the comment style we have entered so that only the
1869                    comment-ender sequence (or single char) of the same style
1870                    actually terminates the comment section. */
1871                 if (code == Scomment) {
1872                         state.comstyle = SYNTAX_CODE_COMMENT_1CHAR_MASK(syncode)
1873                             == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1874                         state.comstr_start = from - 1;
1875                 }
1876
1877                 /* a generic comment delimiter? */
1878                 else if (code == Scomment_fence) {
1879                         state.comstyle = ST_COMMENT_STYLE;
1880                         state.comstr_start = from - 1;
1881                         code = Scomment;
1882                 }
1883
1884                 else if (from < end && SYNTAX_CODE_START_FIRST_P(syncode)) {
1885                         int next_syncode;
1886                         UPDATE_SYNTAX_CACHE_FORWARD(from);
1887                         next_syncode =
1888                             SYNTAX_CODE_FROM_CACHE(mirrortab,
1889                                                    BUF_FETCH_CHAR(buf, from));
1890
1891                         if (SYNTAX_CODES_START_P(syncode, next_syncode)) {
1892                                 code = Scomment;
1893                                 state.comstyle =
1894                                     SYNTAX_CODES_COMMENT_MASK_START(syncode,
1895                                                                     next_syncode)
1896                                     == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1897                                 state.comstr_start = from - 1;
1898                                 from++;
1899                                 UPDATE_SYNTAX_CACHE_FORWARD(from);
1900                         }
1901                 }
1902
1903                 if (SYNTAX_CODE_PREFIX(syncode))
1904                         continue;
1905                 switch (code) {
1906                 case Sescape:
1907                 case Scharquote:
1908                         if (stopbefore)
1909                                 goto stop;      /* this arg means stop at sexp start */
1910                         curlevel->last = from - 1;
1911                       startquoted:
1912                         if (from == end)
1913                                 goto endquoted;
1914                         from++;
1915                         goto symstarted;
1916                         /* treat following character as a word constituent */
1917                 case Sword:
1918                 case Ssymbol:
1919                         if (stopbefore)
1920                                 goto stop;      /* this arg means stop at sexp start */
1921                         curlevel->last = from - 1;
1922                 symstarted:
1923                         while (from < end) {
1924                                 UPDATE_SYNTAX_CACHE_FORWARD(from);
1925                                 switch ((unsigned int)
1926                                         SYNTAX_FROM_CACHE(
1927                                                 mirrortab,
1928                                                 BUF_FETCH_CHAR(buf, from))) {
1929                                 case Scharquote:
1930                                 case Sescape:
1931                                         from++;
1932                                         if (from == end)
1933                                                 goto endquoted;
1934                                         break;
1935                                 case Sword:
1936                                 case Ssymbol:
1937                                 case Squote:
1938                                         break;
1939                                 default:
1940                                         goto symdone;
1941                                 }
1942                                 from++;
1943                         }
1944                       symdone:
1945                         curlevel->prev = curlevel->last;
1946                         break;
1947
1948                 case Scomment:
1949                         state.incomment = 1;
1950                         if (commentstop || boundary_stop)
1951                                 goto done;
1952                       startincomment:
1953                         if (commentstop == 1)
1954                                 goto done;
1955                         UPDATE_SYNTAX_CACHE_FORWARD(from);
1956                         {
1957                                 Bufpos newfrom =
1958                                     find_end_of_comment(buf, from, end,
1959                                                         state.comstyle);
1960                                 if (newfrom < 0) {
1961                                         /* we terminated search because from == end */
1962                                         from = end;
1963                                         goto done;
1964                                 }
1965                                 from = newfrom;
1966                         }
1967                         state.incomment = 0;
1968                         state.comstyle = 0;     /* reset the comment style */
1969                         if (boundary_stop)
1970                                 goto done;
1971                         break;
1972
1973                 case Sopen:
1974                         if (stopbefore)
1975                                 goto stop;      /* this arg means stop at sexp start */
1976                         depth++;
1977                         /* curlevel++->last ran into compiler bug on Apollo */
1978                         curlevel->last = from - 1;
1979                         if (++curlevel == endlevel)
1980                                 error("Nesting too deep for parser");
1981                         curlevel->prev = -1;
1982                         curlevel->last = -1;
1983                         if (targetdepth == depth)
1984                                 goto done;
1985                         break;
1986
1987                 case Sclose:
1988                         depth--;
1989                         if (depth < mindepth)
1990                                 mindepth = depth;
1991                         if (curlevel != levelstart)
1992                                 curlevel--;
1993                         curlevel->prev = curlevel->last;
1994                         if (targetdepth == depth)
1995                                 goto done;
1996                         break;
1997
1998                 case Sstring:
1999                 case Sstring_fence:
2000                         state.comstr_start = from - 1;
2001                         if (stopbefore)
2002                                 goto stop;      /* this arg means stop at sexp start */
2003                         curlevel->last = from - 1;
2004                         if (code == Sstring_fence) {
2005                                 state.instring = ST_STRING_STYLE;
2006                         } else {
2007                                 /* XEmacs change: call syntax_match() on character */
2008                                 Emchar ch = BUF_FETCH_CHAR(buf, from - 1);
2009                                 Lisp_Object stermobj =
2010                                     syntax_match(syntax_cache.
2011                                                  current_syntax_table, ch);
2012
2013                                 if (CHARP(stermobj))
2014                                         state.instring = XCHAR(stermobj);
2015                                 else
2016                                         state.instring = ch;
2017                         }
2018                         if (boundary_stop)
2019                                 goto done;
2020                 startinstring:
2021                         while (1) {
2022                                 enum syntaxcode temp_code;
2023
2024                                 if (from >= end)
2025                                         goto done;
2026
2027                                 UPDATE_SYNTAX_CACHE_FORWARD(from);
2028                                 c = BUF_FETCH_CHAR(buf, from);
2029                                 temp_code = SYNTAX_FROM_CACHE(mirrortab, c);
2030
2031                                 if (state.instring != ST_STRING_STYLE &&
2032                                     temp_code == Sstring && c == state.instring)
2033                                         break;
2034
2035                                 switch ((unsigned int)temp_code) {
2036                                 case Sstring_fence:
2037                                         if (state.instring == ST_STRING_STYLE)
2038                                                 goto string_end;
2039                                         break;
2040                                 case Scharquote:
2041                                 case Sescape:
2042                                         {
2043                                                 from++;
2044                                               startquotedinstring:
2045                                                 if (from >= end)
2046                                                         goto endquoted;
2047                                                 break;
2048                                         }
2049                                 default:
2050                                         break;
2051                                 }
2052                                 from++;
2053                         }
2054                       string_end:
2055                         state.instring = -1;
2056                         curlevel->prev = curlevel->last;
2057                         from++;
2058                         if (boundary_stop)
2059                                 goto done;
2060                         break;
2061
2062                 case Smath:
2063                         break;
2064
2065                 case Swhitespace:
2066                 case Spunct:
2067                 case Squote:
2068                 case Sendcomment:
2069                 case Scomment_fence:
2070                 case Sinherit:
2071                 case Smax:
2072                         /* catch all */
2073                 default:
2074                         break;
2075                 }
2076         }
2077         goto done;
2078
2079       stop:                     /* Here if stopping before start of sexp. */
2080         from--;                 /* We have just fetched the char that starts it; */
2081         goto done;              /* but return the position before it. */
2082
2083       endquoted:
2084         state.quoted = 1;
2085       done:
2086         state.depth = depth;
2087         state.mindepth = mindepth;
2088         state.thislevelstart = curlevel->prev;
2089         state.prevlevelstart
2090             = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2091         state.location = from;
2092         state.levelstarts = Qnil;
2093         while (--curlevel >= levelstart)
2094                 state.levelstarts = Fcons(make_int(curlevel->last),
2095                                           state.levelstarts);
2096
2097         *stateptr = state;
2098 }
2099
2100 DEFUN("parse-partial-sexp", Fparse_partial_sexp, 2, 7, 0,       /*
2101 Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
2102 Parsing stops at TO or when certain criteria are met;
2103 point is set to where parsing stops.
2104 If fifth arg OLDSTATE is omitted or nil,
2105 parsing assumes that FROM is the beginning of a function.
2106 Value is a list of nine elements describing final state of parsing:
2107 0. depth in parens.
2108 1. character address of start of innermost containing list; nil if none.
2109 2. character address of start of last complete sexp terminated.
2110 3. non-nil if inside a string.
2111 (It is the character that will terminate the string,
2112 or t if the string should be terminated by an explicit
2113 `syntax-table' property.)
2114 4. t if inside a comment.
2115 5. t if following a quote character.
2116 6. the minimum paren-depth encountered during this scan.
2117 7. nil if in comment style a, or not in a comment; t if in comment style b;
2118 `syntax-table' if given by an explicit `syntax-table' property.
2119 8. character address of start of last comment or string; nil if none.
2120 9. Intermediate data for continuation of parsing (subject to change).
2121 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
2122 in parentheses becomes equal to TARGETDEPTH.
2123 Fourth arg STOPBEFORE non-nil means stop when come to
2124 any character that starts a sexp.
2125 Fifth arg OLDSTATE is a nine-element list like what this function returns.
2126 It is used to initialize the state of the parse.  Its second and third
2127 elements are ignored.
2128 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment. If it
2129 is `syntax-table', stop after the start of a comment or a string, or after
2130 the end of a comment or string.
2131 */
2132       (from, to, targetdepth, stopbefore, oldstate, commentstop, buffer))
2133 {
2134         struct lisp_parse_state state;
2135         int target;
2136         Bufpos start, end;
2137         struct buffer *buf = decode_buffer(buffer, 0);
2138         Lisp_Object val;
2139
2140         if (!NILP(targetdepth)) {
2141                 CHECK_INT(targetdepth);
2142                 target = XINT(targetdepth);
2143         } else
2144                 target = -100000;       /* We won't reach this depth */
2145
2146         get_buffer_range_char(buf, from, to, &start, &end, 0);
2147         scan_sexps_forward(buf, &state, start, end,
2148                            target, !NILP(stopbefore), oldstate,
2149                            (NILP(commentstop)
2150                             ? 0 : (EQ(commentstop, Qsyntax_table) ? -1 : 1)));
2151         BUF_SET_PT(buf, state.location);
2152
2153         /* reverse order */
2154         val = Qnil;
2155         val = Fcons(state.levelstarts, val);
2156         val = Fcons((state.incomment || (state.instring >= 0))
2157                     ? make_int(state.comstr_start) : Qnil, val);
2158         val = Fcons(state.comstyle ? (state.comstyle == ST_COMMENT_STYLE
2159                                       ? Qsyntax_table : Qt) : Qnil, val);
2160         val = Fcons(make_int(state.mindepth), val);
2161         val = Fcons(state.quoted ? Qt : Qnil, val);
2162         val = Fcons(state.incomment ? Qt : Qnil, val);
2163         val = Fcons(state.instring < 0
2164                     ? Qnil
2165                     : (state.instring == ST_STRING_STYLE
2166                        ? Qt : make_int(state.instring)), val);
2167         val =
2168             Fcons(state.thislevelstart <
2169                   0 ? Qnil : make_int(state.thislevelstart), val);
2170         val =
2171             Fcons(state.prevlevelstart <
2172                   0 ? Qnil : make_int(state.prevlevelstart), val);
2173         val = Fcons(make_int(state.depth), val);
2174
2175         return val;
2176 }
2177
2178 /* Updating of the mirror syntax table.
2179
2180    Each syntax table has a corresponding mirror table in it.
2181    Whenever we make a change to a syntax table, we call
2182    update_syntax_table() on it.
2183
2184    #### We really only need to map over the changed range.
2185
2186    If we change the standard syntax table, we need to map over
2187    all tables because any of them could be inheriting from the
2188    standard syntax table.
2189
2190    When `set-syntax-table' is called, we set the buffer's mirror
2191    syntax table as well.
2192    */
2193
2194 struct cmst_arg {
2195         Lisp_Object mirrortab;
2196         int check_inherit;
2197 };
2198
2199 static int cmst_mapfun(struct chartab_range *range, Lisp_Object val, void *arg)
2200 {
2201         struct cmst_arg *closure = (struct cmst_arg *)arg;
2202
2203         if (CONSP(val))
2204                 val = XCAR(val);
2205         if (SYNTAX_FROM_CODE(XINT(val)) == Sinherit && closure->check_inherit) {
2206                 struct cmst_arg recursive;
2207
2208                 recursive.mirrortab = closure->mirrortab;
2209                 recursive.check_inherit = 0;
2210                 map_char_table(XCHAR_TABLE(Vstandard_syntax_table), range,
2211                                cmst_mapfun, &recursive);
2212         } else
2213                 put_char_table(XCHAR_TABLE(closure->mirrortab), range, val);
2214         return 0;
2215 }
2216
2217 static void update_just_this_syntax_table(Lisp_Char_Table * ct)
2218 {
2219         struct chartab_range range;
2220         struct cmst_arg arg;
2221
2222         arg.mirrortab = ct->mirror_table;
2223         arg.check_inherit = (CHAR_TABLEP(Vstandard_syntax_table)
2224                              && ct != XCHAR_TABLE(Vstandard_syntax_table));
2225         range.type = CHARTAB_RANGE_ALL;
2226         map_char_table(ct, &range, cmst_mapfun, &arg);
2227 }
2228
2229 /* Called from chartab.c when a change is made to a syntax table.
2230    If this is the standard syntax table, we need to recompute
2231    *all* syntax tables (yuck).  Otherwise we just recompute this
2232    one. */
2233
2234 void update_syntax_table(Lisp_Char_Table * ct)
2235 {
2236         /* Don't be stymied at startup. */
2237         if (CHAR_TABLEP(Vstandard_syntax_table)
2238             && ct == XCHAR_TABLE(Vstandard_syntax_table)) {
2239                 Lisp_Object syntab;
2240
2241                 for (syntab = Vall_syntax_tables; !NILP(syntab);
2242                      syntab = XCHAR_TABLE(syntab)->next_table)
2243                         update_just_this_syntax_table(XCHAR_TABLE(syntab));
2244         } else
2245                 update_just_this_syntax_table(ct);
2246 }
2247 \f
2248 /************************************************************************/
2249 /*                            initialization                            */
2250 /************************************************************************/
2251
2252 void syms_of_syntax(void)
2253 {
2254         defsymbol(&Qsyntax_table_p, "syntax-table-p");
2255         defsymbol(&Qsyntax_table, "syntax-table");
2256
2257         DEFSUBR(Fsyntax_table_p);
2258         DEFSUBR(Fsyntax_table);
2259         DEFSUBR(Fstandard_syntax_table);
2260         DEFSUBR(Fcopy_syntax_table);
2261         DEFSUBR(Fset_syntax_table);
2262         DEFSUBR(Fsyntax_designator_chars);
2263         DEFSUBR(Fchar_syntax);
2264         DEFSUBR(Fmatching_paren);
2265         /* DEFSUBR (Fmodify_syntax_entry); now in Lisp. */
2266         /* DEFSUBR (Fdescribe_syntax); now in Lisp. */
2267
2268         DEFSUBR(Fforward_word);
2269
2270         DEFSUBR(Fforward_comment);
2271         DEFSUBR(Fscan_lists);
2272         DEFSUBR(Fscan_sexps);
2273         DEFSUBR(Fbackward_prefix_chars);
2274         DEFSUBR(Fparse_partial_sexp);
2275
2276         DEFERROR_STANDARD(Qscan_error, Qsyntax_error);
2277 }
2278
2279 void vars_of_syntax(void)
2280 {
2281         DEFVAR_BOOL("parse-sexp-ignore-comments", &parse_sexp_ignore_comments   /*
2282 Non-nil means `forward-sexp', etc., should treat comments as whitespace.
2283                                                                                  */ );
2284         parse_sexp_ignore_comments = 0;
2285
2286         DEFVAR_BOOL("lookup-syntax-properties", &lookup_syntax_properties       /*
2287 Non-nil means `forward-sexp', etc., look up character syntax in the
2288 table that is the value of the `syntax-table' text property, if non-nil.
2289 The value of this property should be either a syntax table, or a cons
2290 of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric
2291 syntax code, MATCHCHAR being nil or the character to match (which is
2292 relevant only for open/close type.
2293                                                                                  */ );
2294         lookup_syntax_properties = 0;   /* #### default off until optimized */
2295
2296         DEFVAR_BOOL("words-include-escapes", &words_include_escapes     /*
2297 Non-nil means `forward-word', etc., should treat escape chars part of words.
2298                                                                          */ );
2299         words_include_escapes = 0;
2300
2301         no_quit_in_re_search = 0;
2302 }
2303
2304 static void define_standard_syntax(const char *p, enum syntaxcode syn)
2305 {
2306         for (; *p; p++)
2307                 Fput_char_table(make_char(*p), make_int(syn),
2308                                 Vstandard_syntax_table);
2309 }
2310
2311 void complex_vars_of_syntax(void)
2312 {
2313         Emchar i;
2314         const char *p;
2315         /* Set this now, so first buffer creation can refer to it. */
2316         /* Make it nil before calling copy-syntax-table
2317            so that copy-syntax-table will know not to try to copy from garbage */
2318         Vstandard_syntax_table = Qnil;
2319         Vstandard_syntax_table = Fcopy_syntax_table(Qnil);
2320         staticpro(&Vstandard_syntax_table);
2321
2322         Vsyntax_designator_chars_string = make_string_nocopy(syntax_code_spec,
2323                                                              Smax);
2324         staticpro(&Vsyntax_designator_chars_string);
2325
2326         fill_char_table(XCHAR_TABLE(Vstandard_syntax_table), make_int(Spunct));
2327
2328         for (i = 0; i <= 32; i++)       /* Control 0 plus SPACE */
2329                 Fput_char_table(make_char(i), make_int(Swhitespace),
2330                                 Vstandard_syntax_table);
2331         for (i = 127; i <= 159; i++)    /* DEL plus Control 1 */
2332                 Fput_char_table(make_char(i), make_int(Swhitespace),
2333                                 Vstandard_syntax_table);
2334
2335         define_standard_syntax("abcdefghijklmnopqrstuvwxyz"
2336                                "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2337                                "0123456789" "$%", Sword);
2338         define_standard_syntax("\"", Sstring);
2339         define_standard_syntax("\\", Sescape);
2340         define_standard_syntax("_-+*/&|<>=", Ssymbol);
2341         define_standard_syntax(".,;:?!#@~^'`", Spunct);
2342
2343         for (p = "()[]{}"; *p; p += 2) {
2344                 Fput_char_table(make_char(p[0]),
2345                                 Fcons(make_int(Sopen), make_char(p[1])),
2346                                 Vstandard_syntax_table);
2347                 Fput_char_table(make_char(p[1]),
2348                                 Fcons(make_int(Sclose), make_char(p[0])),
2349                                 Vstandard_syntax_table);
2350         }
2351 }