Initial git import
[sxemacs] / src / syntax.c
1 /* SXEmacs routines to deal with syntax tables; also word and list parsing.
2    Copyright (C) 1985-1994 Free Software Foundation, Inc.
3    Copyright (C) 1995 Sun Microsystems, Inc.
4
5 This file is part of SXEmacs
6
7 SXEmacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 SXEmacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program.  If not, see <http://www.gnu.org/licenses/>. */
19
20
21 /* Synched up with: FSF 19.28. */
22
23 /* This file has been Mule-ized. */
24
25 #include <config.h>
26 #include "lisp.h"
27
28 #include "buffer.h"
29 #include "syntax.h"
30 #include "extents.h"
31
32 /* Here is a comment from Ken'ichi HANDA <handa@etl.go.jp>
33    explaining the purpose of the Sextword syntax category:
34
35 Japanese words are not separated by spaces, which makes finding word
36 boundaries very difficult.  Theoretically it's impossible without
37 using natural language processing techniques.  But, by defining
38 pseudo-words as below (much simplified for letting you understand it
39 easily) for Japanese, we can have a convenient forward-word function
40 for Japanese.
41
42         A Japanese word is a sequence of characters that consists of
43         zero or more Kanji characters followed by zero or more
44         Hiragana characters.
45
46 Then, the problem is that now we can't say that a sequence of
47 word-constituents makes up a WORD.  For instance, both Hiragana "A"
48 and Kanji "KAN" are word-constituents but the sequence of these two
49 letters can't be a single word.
50
51 So, we introduced Sextword for Japanese letters.  A character of
52 Sextword is a word-constituent but a word boundary may exist between
53 two such characters.  */
54
55 /* Mule 2.4 doesn't seem to have Sextword - I'm removing it -- mrb */
56 /* Recovered by tomo */
57
58 #define ST_COMMENT_STYLE 0x101
59 #define ST_STRING_STYLE  0x102
60
61 Lisp_Object Qsyntax_table;
62 int lookup_syntax_properties;
63
64 Lisp_Object Qsyntax_table_p;
65
66 int words_include_escapes;
67
68 int parse_sexp_ignore_comments;
69
70 /* The following two variables are provided to tell additional information
71    to the regex routines.  We do it this way rather than change the
72    arguments to re_search_2() in an attempt to maintain some call
73    compatibility with other versions of the regex code. */
74
75 /* Tell the regex routines not to QUIT.  Normally there is a QUIT
76    each iteration in re_search_2(). */
77 int no_quit_in_re_search;
78
79 /* Tell the regex routines which buffer to access for SYNTAX() lookups
80    and the like. */
81 struct buffer *regex_emacs_buffer;
82
83 /* In Emacs, this is the string or buffer in which we
84    are matching.  It is used for looking up syntax properties.  */
85 Lisp_Object regex_match_object;
86
87 Lisp_Object Vstandard_syntax_table;
88
89 Lisp_Object Vsyntax_designator_chars_string;
90
91 /* This is the internal form of the parse state used in parse-partial-sexp.  */
92
93 struct lisp_parse_state {
94         int depth;              /* Depth at end of parsing */
95         Emchar instring;        /* -1 if not within string, else desired terminator */
96         int incomment;          /* Nonzero if within a comment at end of parsing */
97         int comstyle;           /* comment style a=0, or b=1, or ST_COMMENT_STYLE */
98         int quoted;             /* Nonzero if just after an escape char at end of
99                                    parsing */
100         Bufpos thislevelstart;  /* Char number of most recent start-of-expression
101                                    at current level */
102         Bufpos prevlevelstart;  /* Char number of start of containing expression */
103         Bufpos location;        /* Char number at which parsing stopped */
104         int mindepth;           /* Minimum depth seen while scanning  */
105         Bufpos comstr_start;    /* Position just after last comment/string starter
106                                    (if the 'syntax-table text property is not
107                                    supported, used only for comment starts) */
108         Lisp_Object levelstarts;        /* Char numbers of starts-of-expression
109                                            of levels (starting from outermost).  */
110 };
111 \f
112 /* These variables are a cache for finding the start of a defun.
113    find_start_pos    is the place for which the defun start was found.
114    find_start_value  is the defun start position found for it.
115    find_start_buffer is the buffer it was found in.
116    find_start_begv   is the BEGV value when it was found.
117    find_start_modiff is the value of MODIFF when it was found.  */
118
119 static Bufpos find_start_pos;
120 static Bufpos find_start_value;
121 static struct buffer *find_start_buffer;
122 static Bufpos find_start_begv;
123 static int find_start_modiff;
124
125 /* Find a defun-start that is the last one before POS (or nearly the last).
126    We record what we find, so that another call in the same area
127    can return the same value right away.  */
128
129 static Bufpos find_defun_start(struct buffer *buf, Bufpos pos)
130 {
131         Bufpos tem;
132
133         /* Use previous finding, if it's valid and applies to this inquiry.  */
134         if (buf == find_start_buffer
135             /* Reuse the defun-start even if POS is a little farther on.
136                POS might be in the next defun, but that's ok.
137                Our value may not be the best possible, but will still be usable.  */
138             && pos <= find_start_pos + 1000
139             && pos >= find_start_value
140             && BUF_BEGV(buf) == find_start_begv
141             && BUF_MODIFF(buf) == find_start_modiff)
142                 return find_start_value;
143
144         /* Back up to start of line.  */
145         tem = find_next_newline(buf, pos, -1);
146
147         SCS_STATISTICS_SET_FUNCTION(scs_find_defun_start);
148         SETUP_SYNTAX_CACHE(tem, 1);
149         while (tem > BUF_BEGV(buf)) {
150                 UPDATE_SYNTAX_CACHE_BACKWARD(tem);
151
152                 /* Open-paren at start of line means we found our defun-start.  */
153                 if (SYNTAX_FROM_CACHE(mirrortab, BUF_FETCH_CHAR(buf, tem)) ==
154                     Sopen)
155                         break;
156                 /* Move to beg of previous line.  */
157                 tem = find_next_newline(buf, tem, -2);
158         }
159
160         /* Record what we found, for the next try.  */
161         find_start_value = tem;
162         find_start_buffer = buf;
163         find_start_modiff = BUF_MODIFF(buf);
164         find_start_begv = BUF_BEGV(buf);
165         find_start_pos = pos;
166
167         return find_start_value;
168 }
169 \f
170 DEFUN("syntax-table-p", Fsyntax_table_p, 1, 1, 0,       /*
171 Return t if OBJECT is a syntax table.
172 Any vector of 256 elements will do.
173 */
174       (object))
175 {
176         return (CHAR_TABLEP(object)
177                 && XCHAR_TABLE_TYPE(object) == CHAR_TABLE_TYPE_SYNTAX)
178             ? Qt : Qnil;
179 }
180
181 static Lisp_Object check_syntax_table(Lisp_Object obj, Lisp_Object default_)
182 {
183         if (NILP(obj))
184                 obj = default_;
185         while (NILP(Fsyntax_table_p(obj)))
186                 obj = wrong_type_argument(Qsyntax_table_p, obj);
187         return obj;
188 }
189
190 DEFUN("syntax-table", Fsyntax_table, 0, 1, 0,   /*
191 Return the current syntax table.
192 This is the one specified by the current buffer, or by BUFFER if it
193 is non-nil.
194 */
195       (buffer))
196 {
197         return decode_buffer(buffer, 0)->syntax_table;
198 }
199
200 DEFUN("standard-syntax-table", Fstandard_syntax_table, 0, 0, 0, /*
201 Return the standard syntax table.
202 This is the one used for new buffers.
203 */
204       ())
205 {
206         return Vstandard_syntax_table;
207 }
208
209 DEFUN("copy-syntax-table", Fcopy_syntax_table, 0, 1, 0, /*
210 Return a new syntax table which is a copy of SYNTAX-TABLE.
211 SYNTAX-TABLE defaults to the standard syntax table.
212 */
213       (syntax_table))
214 {
215         if (NILP(Vstandard_syntax_table))
216                 return Fmake_char_table(Qsyntax);
217
218         syntax_table = check_syntax_table(syntax_table, Vstandard_syntax_table);
219         return Fcopy_char_table(syntax_table);
220 }
221
222 DEFUN("set-syntax-table", Fset_syntax_table, 1, 2, 0,   /*
223 Select SYNTAX-TABLE as the new syntax table for BUFFER.
224 BUFFER defaults to the current buffer if omitted.
225 */
226       (syntax_table, buffer))
227 {
228         struct buffer *buf = decode_buffer(buffer, 0);
229         syntax_table = check_syntax_table(syntax_table, Qnil);
230         buf->syntax_table = syntax_table;
231         buf->mirror_syntax_table = XCHAR_TABLE(syntax_table)->mirror_table;
232         /* Indicate that this buffer now has a specified syntax table.  */
233         buf->local_var_flags |= XINT(buffer_local_flags.syntax_table);
234         return syntax_table;
235 }
236 \f
237 /* The current syntax state */
238 struct syntax_cache syntax_cache;
239
240 /* 
241    Update syntax_cache to an appropriate setting for position POS
242
243    The sign of COUNT gives the relative position of POS wrt the
244    previously valid interval.  (not currently used)
245
246    `syntax_cache.*_change' are the next and previous positions at
247    which syntax_code and c_s_t will need to be recalculated.
248
249    #### Currently this code uses 'get-char-property', which will
250    return the "last smallest" extent at a given position. In cases
251    where overlapping extents are defined, this code will simply use
252    whatever is returned by get-char-property.
253
254    It might be worth it at some point to merge provided syntax tables
255    outward to the current buffer.
256
257    sjt sez:
258    This implementation has to rather inefficient, since it looks at
259    next-extent-change, and a heavily font-locked buffer will be rife
260    with irrelevant extents.  We could do a sledgehammer check on this
261    by looking at the distribution of extent lengths.  Also count up
262    cache hits and misses.
263
264    If we assume that syntax-table is a _text_ property (which also
265    deals with the issue of overlapping syntax-table properties), then
266    the following strategy recommends itself
267      o give the syntax cache a `valid' flag, to be reset whenever a
268        syntax-table property is added, changed, or removed; this could
269        be done by setting syntax_cache's prev_change > next_change
270        (but not compatible with using extents/markers here); if it's a
271        Lisp variable, doing it in Lisp shouldn't be too inefficient
272      o lazily initialize the cache whenever the object being examined
273        differs from the object the cache currently refers to
274      o by using {previous,next-single-property-change} we should be
275        able to get much bigger cache intervals (in most cases, the
276        whole buffer)
277      o cache markers instead of positions so the mere insertion or
278        deletion of text doesn't invalidate the cache, only if it
279        involves a syntax-table property (we could also cache the
280        extents carrying the syntax-table text-property; that gives us
281        another check for invalid cache).
282
283    If I understand this correctly, we need to invalidate the cache in the
284    following cases:
285      o If the referenced object changes (it's a global cache)
286      o If there are insertions or deletions of text (the positions are
287        absolute; fix: use markers or an extent instead?)
288      o If the syntax-table property is altered == added and different or
289        removed and the same (fix: probably computable from range overlap,
290        but is it worth it?  would interact with ins/del); this includes
291        detachment of extents with the same value (but only the boundary
292        extents, as otherwise the range coalesces across the deletion point)
293        and attachment of extents with a different value
294    Note: the above looks a lot like what Ben has implemented in 21.5, but
295    he goes one better by making the cache buffer-local.
296
297    Note: cperl mode uses the text property API, not extents/overlays.
298 */
299
300 #ifdef SYNTAX_CACHE_STATISTICS
301 struct syntax_cache_statistics scs_statistics =
302     { 0, 0, 0, 0, -1, -1, 0.0, 0.0, scs_no_function };
303
304 char *syntax_cache_statistics_function_names[scs_number_of_functions] = {
305         "find_context",
306         "find_defun_start",
307         "scan_words",
308         "Fforward_comment",
309         "scan_lists",
310         "Fbackward_prefix_characters",
311         "scan_sexps_forward"
312 };
313 #endif                          /* SYNTAX_CACHE_STATISTICS */
314
315 void update_syntax_cache(int pos, int count)
316 {
317         Lisp_Object tmp_table;
318
319 #ifdef SYNTAX_CACHE_STATISTICS
320         if (scs_statistics.total_updates == 0) {
321                 int i;
322                 for (i = 0; i < scs_number_of_functions; ++i)
323                         scs_statistics.functions[i] = 0;
324         }
325         if (syntax_cache.prev_change > syntax_cache.next_change)
326                 scs_statistics.inits++;
327         else if (pos < syntax_cache.prev_change)
328                 scs_statistics.misses_lo++;
329         else if (pos >= syntax_cache.next_change)
330                 scs_statistics.misses_hi++;
331 #endif                          /* SYNTAX_CACHE_STATISTICS */
332
333         /* #### Since font-lock undoes any narrowing, maybe the BUF_ZV and
334            BUF_BEGV below should be BUF_Z and BUF_BEG respectively? */
335         if (BUFFERP(syntax_cache.object)) {
336                 int get_change_before = pos + 1;
337
338                 tmp_table = Fget_char_property(make_int(pos), Qsyntax_table,
339                                                syntax_cache.object, Qnil);
340 #if NEXT_SINGLE_PROPERTY_CHANGE
341                 /* #### shouldn't we be using BUF_BEGV here? */
342                 syntax_cache.next_change =
343                     XINT(Fnext_single_property_change
344                          (make_int(pos > 0 ? pos : 1), Qsyntax_table,
345                           syntax_cache.object,
346                           make_int(BUF_ZV(syntax_cache.buffer))));
347 #else
348                 syntax_cache.next_change =
349                     XINT(Fnext_extent_change(make_int(pos > 0 ? pos : 1),
350                                              syntax_cache.object));
351 #endif
352
353                 /* #### shouldn't we be using BUF_BEGV here? */
354                 if (get_change_before < 1)
355                         get_change_before = 1;
356                 else if (get_change_before > BUF_ZV(syntax_cache.buffer))
357                         get_change_before = BUF_ZV(syntax_cache.buffer);
358
359 #if PREVIOUS_SINGLE_PROPERTY_CHANGE
360                 /* #### shouldn't we be using BUF_BEGV here? */
361                 syntax_cache.prev_change =
362                     XINT(Fprevious_single_property_change
363                          (make_int(get_change_before), Qsyntax_table,
364                           syntax_cache.object, make_int(1)));
365 #else
366                 syntax_cache.prev_change =
367                     XINT(Fprevious_extent_change(make_int(get_change_before),
368                                                  syntax_cache.object));
369 #endif
370         } else if (STRINGP(syntax_cache.object)) {
371                 int get_change_before = pos + 1;
372
373                 tmp_table = Fget_char_property(make_int(pos), Qsyntax_table,
374                                                syntax_cache.object, Qnil);
375 #if NEXT_SINGLE_PROPERTY_CHANGE
376                 /* #### shouldn't we be using BUF_BEGV here? */
377                 syntax_cache.next_change =
378                     XINT(Fnext_single_property_change
379                          (make_int(pos >= 0 ? pos : 0), Qsyntax_table,
380                           syntax_cache.object,
381                           make_int(XSTRING_LENGTH(syntax_cache.object))));
382 #else
383                 syntax_cache.next_change =
384                     XINT(Fnext_extent_change(make_int(pos >= 0 ? pos : 0),
385                                              syntax_cache.object));
386 #endif
387
388                 if (get_change_before < 0)
389                         get_change_before = 0;
390                 else if (get_change_before >
391                          XSTRING_LENGTH(syntax_cache.object))
392                         get_change_before = XSTRING_LENGTH(syntax_cache.object);
393
394 #if PREVIOUS_SINGLE_PROPERTY_CHANGE
395                 syntax_cache.prev_change =
396                     XINT(Fprevious_single_property_change
397                          (make_int(get_change_before), Qsyntax_table,
398                           syntax_cache.object, make_int(0)));
399 #else
400                 syntax_cache.prev_change =
401                     XINT(Fprevious_extent_change(make_int(get_change_before),
402                                                  syntax_cache.object));
403 #endif
404         } else {
405                 /* silence compiler */
406                 tmp_table = Qnil;
407                 /* Always aborts.
408                  * #### Is there another sensible thing to do here? */
409                 assert(BUFFERP(syntax_cache.object) ||
410                        STRINGP(syntax_cache.object));
411         }
412
413         if (EQ(Fsyntax_table_p(tmp_table), Qt)) {
414                 syntax_cache.use_code = 0;
415                 syntax_cache.current_syntax_table =
416                         XCHAR_TABLE(tmp_table)->mirror_table;
417         } else if (CONSP(tmp_table) && INTP(XCAR(tmp_table))) {
418                 syntax_cache.use_code = 1;
419                 /* we CANNOT cast the following, XINT goes to long int
420                  * while enums are usually just unsigned ints
421                  * this'll trigger a warning on 64b machines */
422                 syntax_cache.syntax_code = XINT(XCAR(tmp_table));
423         } else {
424                 syntax_cache.use_code = 0;
425                 syntax_cache.current_syntax_table =
426                     syntax_cache.buffer->mirror_syntax_table;
427         }
428
429 #ifdef SYNTAX_CACHE_STATISTICS
430         {
431                 int length =
432                     syntax_cache.next_change - syntax_cache.prev_change;
433                 int misses =
434                     scs_statistics.misses_lo + scs_statistics.misses_hi +
435                     scs_statistics.inits;
436
437                 if (scs_statistics.min_length == -1
438                     || scs_statistics.min_length > length)
439                         scs_statistics.min_length = length;
440                 if (scs_statistics.max_length == -1
441                     || scs_statistics.max_length < length)
442                         scs_statistics.max_length = length;
443                 scs_statistics.mean_length_on_miss =
444                     ((misses - 1) * scs_statistics.mean_length_on_miss +
445                      length) / misses;
446         }
447
448         scs_statistics.mean_length
449             = scs_statistics.total_updates * scs_statistics.mean_length
450             + syntax_cache.next_change - syntax_cache.prev_change;
451         scs_statistics.total_updates++;
452         scs_statistics.mean_length /= scs_statistics.total_updates;
453
454         if (scs_statistics.this_function != scs_no_function) {
455                 scs_statistics.functions[scs_statistics.this_function]++;
456                 scs_statistics.this_function = scs_no_function;
457         }
458
459         if (!
460             (scs_statistics.total_updates %
461              SYNTAX_CACHE_STATISTICS_REPORT_INTERVAL)) {
462                 fprintf(stderr, "Syntax cache stats:\n  ");
463                 fprintf(stderr,
464                         "updates %d, inits %d, misses low %d, misses high %d,",
465                         scs_statistics.total_updates, scs_statistics.inits,
466                         scs_statistics.misses_lo, scs_statistics.misses_hi);
467                 fprintf(stderr, "\n ");
468
469 #define REPORT_FUNCTION(i)                              \
470   fprintf (stderr, " %s %d,",                           \
471            syntax_cache_statistics_function_names[i],   \
472            scs_statistics.functions[i]);
473
474                 REPORT_FUNCTION(scs_find_context);
475                 REPORT_FUNCTION(scs_find_defun_start);
476                 REPORT_FUNCTION(scs_scan_words);
477                 REPORT_FUNCTION(scs_Fforward_comment);
478                 fprintf(stderr, "\n ");
479                 REPORT_FUNCTION(scs_scan_lists);
480                 REPORT_FUNCTION(scs_Fbackward_prefix_characters);
481                 REPORT_FUNCTION(scs_scan_sexps_forward);
482 #undef REPORT_FUNCTION
483
484                 fprintf(stderr, "\n  min length %d, max length %d,",
485                         scs_statistics.min_length, scs_statistics.max_length);
486                 fprintf(stderr,
487                         "\n  mean length %.1f, mean length on miss %.1f\n",
488                         scs_statistics.mean_length,
489                         scs_statistics.mean_length_on_miss);
490         }
491 #endif                          /* SYNTAX_CACHE_STATISTICS */
492 }
493 \f
494 /* Convert a letter which signifies a syntax code
495    into the code it signifies.
496    This is used by modify-syntax-entry, and other things. */
497
498 const unsigned char syntax_spec_code[0400] =
499     { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
500         0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
501         0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
502         0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
503         (char)Swhitespace, 0377, (char)Sstring, 0377,
504         (char)Smath, 0377, 0377, (char)Squote,
505         (char)Sopen, (char)Sclose, 0377, 0377,
506         0377, (char)Swhitespace, (char)Spunct, (char)Scharquote,
507         0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
508         0377, 0377, 0377, 0377,
509         (char)Scomment, 0377, (char)Sendcomment, 0377,
510         (char)Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377,       /* @, A ... */
511         0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
512         0377, 0377, 0377, 0377, 0377, 0377, 0377, (char)Sword,
513         0377, 0377, 0377, 0377, (char)Sescape, 0377, 0377, (char)Ssymbol,
514         0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
515         0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
516         0377, 0377, 0377, 0377, 0377, 0377, 0377, (char)Sword,
517         0377, 0377, 0377, 0377, (char)Sstring_fence, 0377, 0377, 0377
518 };
519
520 unsigned char syntax_code_spec[] = " .w_()'\"$\\/<>@!|";
521
522 DEFUN("syntax-designator-chars", Fsyntax_designator_chars, 0, 0, 0,     /*
523 Return a string of the recognized syntax designator chars.
524 The chars are ordered by their internal syntax codes, which are
525 numbered starting at 0.
526 */
527       ())
528 {
529         return Vsyntax_designator_chars_string;
530 }
531
532 DEFUN("char-syntax", Fchar_syntax, 1, 2, 0,     /*
533 Return the syntax code of CHARACTER, described by a character.
534 For example, if CHARACTER is a word constituent,
535 the character `?w' is returned.
536 The characters that correspond to various syntax codes
537 are listed in the documentation of `modify-syntax-entry'.
538 Optional second argument SYNTAX-TABLE defaults to the current buffer's
539 syntax table.
540 */
541       (character, syntax_table))
542 {
543         Lisp_Char_Table *mirrortab;
544
545         if (NILP(character)) {
546                 character = make_char('\000');
547         }
548         CHECK_CHAR_COERCE_INT(character);
549         syntax_table =
550             check_syntax_table(syntax_table, current_buffer->syntax_table);
551         mirrortab = XCHAR_TABLE(XCHAR_TABLE(syntax_table)->mirror_table);
552         return
553             make_char(syntax_code_spec
554                       [(int)SYNTAX(mirrortab, XCHAR(character))]);
555 }
556
557 #ifdef MULE
558
559 enum syntaxcode
560 charset_syntax(struct buffer *buf, Lisp_Object charset, int *multi_p_out)
561 {
562         *multi_p_out = 1;
563         /* #### get this right */
564         return Spunct;
565 }
566
567 #endif
568
569 Lisp_Object syntax_match(Lisp_Object syntax_table, Emchar ch)
570 {
571         Lisp_Object code = XCHAR_TABLE_VALUE_UNSAFE(syntax_table, ch);
572         Lisp_Object code2 = code;
573
574         if (CONSP(code))
575                 code2 = XCAR(code);
576         if (SYNTAX_FROM_CODE(XINT(code2)) == Sinherit)
577                 code = XCHAR_TABLE_VALUE_UNSAFE(Vstandard_syntax_table, ch);
578
579         return CONSP(code) ? XCDR(code) : Qnil;
580 }
581
582 DEFUN("matching-paren", Fmatching_paren, 1, 2, 0,       /*
583 Return the matching parenthesis of CHARACTER, or nil if none.
584 Optional second argument SYNTAX-TABLE defaults to the current buffer's
585 syntax table.
586 */
587       (character, syntax_table))
588 {
589         Lisp_Char_Table *mirrortab;
590         int code;
591
592         CHECK_CHAR_COERCE_INT(character);
593         syntax_table =
594             check_syntax_table(syntax_table, current_buffer->syntax_table);
595         mirrortab = XCHAR_TABLE(XCHAR_TABLE(syntax_table)->mirror_table);
596         code = SYNTAX(mirrortab, XCHAR(character));
597         if (code == Sopen || code == Sclose || code == Sstring)
598                 return syntax_match(syntax_table, XCHAR(character));
599         return Qnil;
600 }
601 \f
602 #ifdef MULE
603 /* Return 1 if there is a word boundary between two word-constituent
604    characters C1 and C2 if they appear in this order, else return 0.
605    There is no word boundary between two word-constituent ASCII
606    characters.  */
607 #define WORD_BOUNDARY_P(c1, c2)                 \
608   (!(CHAR_ASCII_P (c1) && CHAR_ASCII_P (c2))    \
609    && word_boundary_p (c1, c2))
610
611 extern int word_boundary_p(Emchar c1, Emchar c2);
612 #endif
613
614 /* Return the position across COUNT words from FROM.
615    If that many words cannot be found before the end of the buffer, return 0.
616    COUNT negative means scan backward and stop at word beginning.  */
617
618 Bufpos scan_words(struct buffer *buf, Bufpos from, int count)
619 {
620         Bufpos limit = count > 0 ? BUF_ZV(buf) : BUF_BEGV(buf);
621         Emchar ch0, ch1;
622         enum syntaxcode code;
623
624         SCS_STATISTICS_SET_FUNCTION(scs_scan_words);
625         SETUP_SYNTAX_CACHE_FOR_BUFFER(buf, from, count);
626
627         /* #### is it really worth it to hand expand both cases? JV */
628         while (count > 0) {
629                 QUIT;
630
631                 while (1) {
632                         if (from == limit)
633                                 return 0;
634
635                         UPDATE_SYNTAX_CACHE_FORWARD(from);
636                         ch0 = BUF_FETCH_CHAR(buf, from);
637                         code = SYNTAX_FROM_CACHE(mirrortab, ch0);
638
639                         from++;
640                         if (words_include_escapes
641                             && (code == Sescape || code == Scharquote))
642                                 break;
643                         if (code == Sword)
644                                 break;
645                 }
646
647                 QUIT;
648
649                 while (from != limit) {
650                         UPDATE_SYNTAX_CACHE_FORWARD(from);
651                         ch1 = BUF_FETCH_CHAR(buf, from);
652                         code = SYNTAX_FROM_CACHE(mirrortab, ch1);
653                         if (!(words_include_escapes
654                               && (code == Sescape || code == Scharquote)))
655                                 if (code != Sword
656 #ifdef MULE
657                                     || WORD_BOUNDARY_P(ch0, ch1)
658 #endif
659                                     )
660                                         break;
661 #ifdef MULE
662                         ch0 = ch1;
663 #endif
664                         from++;
665                 }
666                 count--;
667         }
668
669         while (count < 0) {
670                 QUIT;
671
672                 while (1) {
673                         if (from == limit)
674                                 return 0;
675
676                         UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
677                         ch1 = BUF_FETCH_CHAR(buf, from - 1);
678                         code = SYNTAX_FROM_CACHE(mirrortab, ch1);
679                         from--;
680
681                         if (words_include_escapes
682                             && (code == Sescape || code == Scharquote))
683                                 break;
684                         if (code == Sword)
685                                 break;
686                 }
687
688                 QUIT;
689
690                 while (from != limit) {
691                         UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
692                         ch0 = BUF_FETCH_CHAR(buf, from - 1);
693                         code = SYNTAX_FROM_CACHE(mirrortab, ch0);
694
695                         if (!(words_include_escapes
696                               && (code == Sescape || code == Scharquote)))
697                                 if (code != Sword
698 #ifdef MULE
699                                     || WORD_BOUNDARY_P(ch0, ch1)
700 #endif
701                                     )
702                                         break;
703 #ifdef MULE
704                         ch1 = ch0;
705 #endif
706                         from--;
707                 }
708                 count++;
709         }
710
711         return from;
712 }
713
714 DEFUN("forward-word", Fforward_word, 0, 2, "_p",        /*
715 Move point forward COUNT words (backward if COUNT is negative).
716 Normally t is returned, but if an edge of the buffer is reached,
717 point is left there and nil is returned.
718
719 The characters that are moved over may be added to the current selection
720 \(i.e. active region) if the Shift key is held down, a motion key is used
721 to invoke this command, and `shifted-motion-keys-select-region' is t; see
722 the documentation for this variable for more details.
723
724 COUNT defaults to 1, and BUFFER defaults to the current buffer.
725 */
726       (count, buffer))
727 {
728         Bufpos val;
729         struct buffer *buf = decode_buffer(buffer, 0);
730         EMACS_INT n;
731
732         if (NILP(count))
733                 n = 1;
734         else {
735                 CHECK_INT(count);
736                 n = XINT(count);
737         }
738
739         val = scan_words(buf, BUF_PT(buf), n);
740         if (val) {
741                 BUF_SET_PT(buf, val);
742                 return Qt;
743         } else {
744                 BUF_SET_PT(buf, n > 0 ? BUF_ZV(buf) : BUF_BEGV(buf));
745                 return Qnil;
746         }
747 }
748 \f
749 static void scan_sexps_forward(struct buffer *buf,
750                                struct lisp_parse_state *,
751                                Bufpos from, Bufpos end,
752                                int targetdepth, int stopbefore,
753                                Lisp_Object oldstate, int commentstop);
754
755 static int
756 find_start_of_comment(struct buffer *buf, Bufpos from, Bufpos stop,
757                       int comstyle)
758 {
759         Emchar c;
760         enum syntaxcode code;
761
762         /* Look back, counting the parity of string-quotes,
763            and recording the comment-starters seen.
764            When we reach a safe place, assume that's not in a string;
765            then step the main scan to the earliest comment-starter seen
766            an even number of string quotes away from the safe place.
767
768            OFROM[I] is position of the earliest comment-starter seen
769            which is I+2X quotes from the comment-end.
770            PARITY is current parity of quotes from the comment end.  */
771         int parity = 0;
772         Emchar my_stringend = 0;
773         int string_lossage = 0;
774         Bufpos comment_end = from;
775         Bufpos comstart_pos = 0;
776         int comstart_parity = 0;
777         int styles_match_p = 0;
778         /* mask to match comment styles against; for ST_COMMENT_STYLE, this
779            will get set to SYNTAX_COMMENT_STYLE_B, but never get checked */
780         int mask = comstyle ? SYNTAX_COMMENT_STYLE_B : SYNTAX_COMMENT_STYLE_A;
781
782         /* At beginning of range to scan, we're outside of strings;
783            that determines quote parity to the comment-end.  */
784         while (from != stop) {
785                 int syncode;
786
787                 /* Move back and examine a character.  */
788                 from--;
789                 UPDATE_SYNTAX_CACHE_BACKWARD(from);
790
791                 c = BUF_FETCH_CHAR(buf, from);
792                 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
793                 code = SYNTAX_FROM_CODE(syncode);
794
795                 /* is this a 1-char comment end sequence? if so, try
796                    to see if style matches previously extracted mask */
797                 if (code == Sendcomment) {
798                         /* MT had SYNTAX_CODE_COMMENT_1CHAR_MASK (syncode) & mask
799                            but (as a Boolean) that's just a complicated way to write: */
800                         styles_match_p =
801                             SYNTAX_CODE_MATCHES_1CHAR_P(syncode, mask);
802                 }
803
804                 /* or are we looking at a 1-char comment start sequence
805                    of the style matching mask? */
806                 else if (code == Scomment) {
807                         styles_match_p =
808                             SYNTAX_CODE_MATCHES_1CHAR_P(syncode, mask);
809                 }
810
811                 /* otherwise, is this a 2-char comment end or start sequence? */
812                 else if (from > stop)
813                         do {
814                                 /* 2-char comment end sequence? */
815                                 if (SYNTAX_CODE_END_SECOND_P(syncode)) {
816                                         int prev_syncode;
817                                         UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
818                                         prev_syncode =
819                                             SYNTAX_CODE_FROM_CACHE(mirrortab,
820                                                                    BUF_FETCH_CHAR
821                                                                    (buf,
822                                                                     from - 1));
823
824                                         if (SYNTAX_CODES_END_P
825                                             (prev_syncode, syncode)) {
826                                                 code = Sendcomment;
827                                                 styles_match_p =
828                                                     SYNTAX_CODES_MATCH_END_P
829                                                     (prev_syncode, syncode,
830                                                      mask);
831                                                 from--;
832                                                 UPDATE_SYNTAX_CACHE_BACKWARD
833                                                     (from);
834                                                 c = BUF_FETCH_CHAR(buf, from);
835
836                                                 /* Found a comment-end sequence, so skip past the
837                                                    check for a comment-start */
838                                                 break;
839                                         }
840                                 }
841
842                                 /* 2-char comment start sequence? */
843                                 if (SYNTAX_CODE_START_SECOND_P(syncode)) {
844                                         int prev_syncode;
845                                         UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
846                                         prev_syncode =
847                                             SYNTAX_CODE_FROM_CACHE(mirrortab,
848                                                                    BUF_FETCH_CHAR
849                                                                    (buf,
850                                                                     from - 1));
851
852                                         if (SYNTAX_CODES_START_P
853                                             (prev_syncode, syncode)) {
854                                                 code = Scomment;
855                                                 styles_match_p =
856                                                     SYNTAX_CODES_MATCH_START_P
857                                                     (prev_syncode, syncode,
858                                                      mask);
859                                                 from--;
860                                                 UPDATE_SYNTAX_CACHE_BACKWARD
861                                                     (from);
862                                                 c = BUF_FETCH_CHAR(buf, from);
863                                         }
864                                 }
865                         } while (0);
866
867                 /* Ignore escaped characters.  */
868                 if (char_quoted(buf, from))
869                         continue;
870
871                 /* Track parity of quotes.  */
872                 if (code == Sstring) {
873                         parity ^= 1;
874                         if (my_stringend == 0)
875                                 my_stringend = c;
876                         /* If we have two kinds of string delimiters.
877                            There's no way to grok this scanning backwards.  */
878                         else if (my_stringend != c)
879                                 string_lossage = 1;
880                 }
881
882                 if (code == Sstring_fence || code == Scomment_fence) {
883                         parity ^= 1;
884                         if (my_stringend == 0)
885                                 my_stringend =
886                                     code ==
887                                     Sstring_fence ? ST_STRING_STYLE :
888                                     ST_COMMENT_STYLE;
889                         /* If we have two kinds of string delimiters.
890                            There's no way to grok this scanning backwards.  */
891                         else if (my_stringend != (code == Sstring_fence
892                                                   ? ST_STRING_STYLE :
893                                                   ST_COMMENT_STYLE))
894                                 string_lossage = 1;
895                 }
896
897                 /* Record comment-starters according to that
898                    quote-parity to the comment-end.  */
899                 if (code == Scomment && styles_match_p) {
900                         comstart_parity = parity;
901                         comstart_pos = from;
902                 }
903
904                 /* If we find another earlier comment-ender,
905                    any comment-starts earlier than that don't count
906                    (because they go with the earlier comment-ender).  */
907                 if (code == Sendcomment && styles_match_p)
908                         break;
909
910                 /* Assume a defun-start point is outside of strings.  */
911                 if (code == Sopen
912                     && (from == stop || BUF_FETCH_CHAR(buf, from - 1) == '\n'))
913                         break;
914         }
915
916         if (comstart_pos == 0)
917                 from = comment_end;
918         /* If the earliest comment starter
919            is followed by uniform paired string quotes or none,
920            we know it can't be inside a string
921            since if it were then the comment ender would be inside one.
922            So it does start a comment.  Skip back to it.  */
923         else if (comstart_parity == 0 && !string_lossage)
924                 from = comstart_pos;
925         else {
926                 /* We had two kinds of string delimiters mixed up
927                    together.  Decode this going forwards.
928                    Scan fwd from the previous comment ender
929                    to the one in question; this records where we
930                    last passed a comment starter.  */
931
932                 struct lisp_parse_state state;
933                 scan_sexps_forward(buf, &state,
934                                    find_defun_start(buf, comment_end),
935                                    comment_end - 1, -10000, 0, Qnil, 0);
936                 if (state.incomment)
937                         from = state.comstr_start;
938                 else
939                         /* We can't grok this as a comment; scan it normally.  */
940                         from = comment_end;
941                 UPDATE_SYNTAX_CACHE_FORWARD(from - 1);
942         }
943         return from;
944 }
945
946 static Bufpos
947 find_end_of_comment(struct buffer *buf, Bufpos from, Bufpos stop, int comstyle)
948 {
949         int c;
950         int syncode;
951         enum syntaxcode code, next_code;
952         /* mask to match comment styles against; for ST_COMMENT_STYLE, this
953            will get set to SYNTAX_COMMENT_STYLE_B, but never get checked */
954         int mask = comstyle ? SYNTAX_COMMENT_STYLE_B : SYNTAX_COMMENT_STYLE_A;
955
956         /* This is only called by functions which have already set up the
957            syntax_cache and are keeping it up-to-date */
958         while (1) {
959                 if (from == stop) {
960                         return -1;
961                 }
962
963                 UPDATE_SYNTAX_CACHE_FORWARD(from);
964                 c = BUF_FETCH_CHAR(buf, from);
965                 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
966                 code = SYNTAX_FROM_CODE(syncode);
967
968                 from++;
969                 UPDATE_SYNTAX_CACHE_FORWARD(from);
970
971                 /* At end of current generic comment? */
972                 if (comstyle == ST_COMMENT_STYLE) {
973                         if (code == Scomment_fence)
974                                 break;  /* matched */
975                         else
976                                 continue;       /* Ignore other styles in generic comments */
977                 }
978                 /* At end of current one-character comment of specified style? */
979                 else if (code == Sendcomment &&
980                          SYNTAX_CODE_MATCHES_1CHAR_P(syncode, mask)) {
981                         /* pre-MT code effectively does from-- here, that seems wrong */
982                         break;
983                 }
984
985                 /* At end of current two-character comment of specified style? */
986                 c = BUF_FETCH_CHAR(buf, from);
987                 next_code = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
988                 if (from < stop
989                     && SYNTAX_CODES_MATCH_END_P(syncode, next_code, mask)) {
990                         from++;
991                         UPDATE_SYNTAX_CACHE_FORWARD(from);
992                         break;
993                 }
994         }
995         return from;
996 }
997 \f
998 /* #### between FSF 19.23 and 19.28 there are some changes to the logic
999    in this function (and minor changes to find_start_of_comment(),
1000    above, which is part of Fforward_comment() in FSF).  Attempts to port
1001    that logic made this function break, so I'm leaving it out.  If anyone
1002    ever complains about this function not working properly, take a look
1003    at those changes.  --ben */
1004
1005 DEFUN("forward-comment", Fforward_comment, 0, 2, 0,     /*
1006 Move forward across up to COUNT comments, or backwards if COUNT is negative.
1007 Stop scanning if we find something other than a comment or whitespace.
1008 Set point to where scanning stops.
1009 If COUNT comments are found as expected, with nothing except whitespace
1010 between them, return t; otherwise return nil.
1011 Point is set in either case.
1012 COUNT defaults to 1, and BUFFER defaults to the current buffer.
1013 */
1014       (count, buffer))
1015 {
1016         Bufpos from;
1017         Bufpos stop;
1018         Emchar c;
1019         enum syntaxcode code;
1020         int syncode;
1021         EMACS_INT n;
1022         struct buffer *buf = decode_buffer(buffer, 0);
1023
1024         if (NILP(count))
1025                 n = 1;
1026         else {
1027                 CHECK_INT(count);
1028                 n = XINT(count);
1029         }
1030
1031         from = BUF_PT(buf);
1032
1033         SCS_STATISTICS_SET_FUNCTION(scs_Fforward_comment);
1034         SETUP_SYNTAX_CACHE(from, n);
1035         while (n > 0) {
1036                 QUIT;
1037
1038                 stop = BUF_ZV(buf);
1039                 while (from < stop) {
1040                         int comstyle = 0;       /* Code for comment style: 0 for A, 1 for B,
1041                                                    or ST_COMMENT_STYLE */
1042
1043                         if (char_quoted(buf, from)) {
1044                                 from++;
1045                                 continue;
1046                         }
1047
1048                         UPDATE_SYNTAX_CACHE_FORWARD(from);
1049                         c = BUF_FETCH_CHAR(buf, from);
1050                         syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
1051                         code = SYNTAX_FROM_CODE(syncode);
1052
1053                         if (code == Scomment) {
1054                                 /* we have encountered a single character comment start
1055                                    sequence, and we are ignoring all text inside comments.
1056                                    we must record the comment style this character begins
1057                                    so that later, only a comment end of the same style actually
1058                                    ends the comment section */
1059                                 comstyle =
1060                                     SYNTAX_CODE_COMMENT_1CHAR_MASK(syncode)
1061                                     == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1062                         }
1063
1064                         else if (code == Scomment_fence) {
1065                                 from++;
1066                                 code = Scomment;
1067                                 comstyle = ST_COMMENT_STYLE;
1068                         }
1069
1070                         else if (from < stop
1071                                  && SYNTAX_CODE_START_FIRST_P(syncode)) {
1072                                 int next_syncode;
1073                                 UPDATE_SYNTAX_CACHE_FORWARD(from + 1);
1074                                 next_syncode =
1075                                     SYNTAX_CODE_FROM_CACHE(mirrortab,
1076                                                            BUF_FETCH_CHAR(buf,
1077                                                                           from +
1078                                                                           1));
1079
1080                                 if (SYNTAX_CODES_START_P(syncode, next_syncode)) {
1081                                         /* we have encountered a 2char comment start sequence and we
1082                                            are ignoring all text inside comments. we must record
1083                                            the comment style this sequence begins so that later,
1084                                            only a comment end of the same style actually ends
1085                                            the comment section */
1086                                         code = Scomment;
1087                                         comstyle =
1088                                             SYNTAX_CODES_COMMENT_MASK_START
1089                                             (syncode, next_syncode)
1090                                             == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1091                                         from++;
1092                                 }
1093                         }
1094
1095                         if (code == Scomment) {
1096                                 Bufpos newfrom =
1097                                     find_end_of_comment(buf, from, stop,
1098                                                         comstyle);
1099                                 if (newfrom < 0) {
1100                                         /* we stopped because from==stop */
1101                                         BUF_SET_PT(buf, stop);
1102                                         return Qnil;
1103                                 }
1104                                 from = newfrom;
1105
1106                                 /* We have skipped one comment.  */
1107                                 break;
1108                         } else if (code != Swhitespace
1109                                    && code != Sendcomment && code != Scomment) {
1110                                 BUF_SET_PT(buf, from);
1111                                 return Qnil;
1112                         }
1113                         from++;
1114                 }
1115
1116                 /* End of comment reached */
1117                 n--;
1118         }
1119
1120         while (n < 0) {
1121                 QUIT;
1122
1123                 stop = BUF_BEGV(buf);
1124                 while (from > stop) {
1125                         int comstyle = 0;       /* Code for comment style: 0 for A, 1 for B,
1126                                                    or ST_COMMENT_STYLE */
1127
1128                         from--;
1129                         if (char_quoted(buf, from)) {
1130                                 from--;
1131                                 continue;
1132                         }
1133
1134                         c = BUF_FETCH_CHAR(buf, from);
1135                         syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
1136                         code = SYNTAX_FROM_CODE(syncode);
1137
1138                         if (code == Sendcomment) {
1139                                 /* we have found a single char end comment. we must record
1140                                    the comment style encountered so that later, we can match
1141                                    only the proper comment begin sequence of the same style */
1142                                 comstyle =
1143                                     SYNTAX_CODE_COMMENT_1CHAR_MASK(syncode)
1144                                     == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1145                         }
1146
1147                         else if (code == Scomment_fence) {
1148                                 code = Sendcomment;
1149                                 comstyle = ST_COMMENT_STYLE;
1150                         }
1151
1152                         else if (from > stop
1153                                  /* #### This seems logical but it's not in 21.4.9 */
1154                                  /* && !char_quoted (buf, from - 1) */
1155                                  && SYNTAX_CODE_END_SECOND_P(syncode)) {
1156                                 int prev_syncode;
1157                                 UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
1158                                 prev_syncode =
1159                                     SYNTAX_CODE_FROM_CACHE(mirrortab,
1160                                                            BUF_FETCH_CHAR(buf,
1161                                                                           from -
1162                                                                           1));
1163                                 if (SYNTAX_CODES_END_P(prev_syncode, syncode)) {
1164                                         /* We must record the comment style encountered so that
1165                                            later, we can match only the proper comment begin
1166                                            sequence of the same style.  */
1167                                         code = Sendcomment;
1168                                         comstyle =
1169                                             SYNTAX_CODES_COMMENT_MASK_END
1170                                             (prev_syncode, syncode)
1171                                             == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1172                                         from--;
1173                                 }
1174                         }
1175
1176                         if (code == Sendcomment) {
1177                                 from =
1178                                     find_start_of_comment(buf, from, stop,
1179                                                           comstyle);
1180                                 break;
1181                         }
1182
1183                         else if (code != Swhitespace
1184                                  && code != Scomment && code != Sendcomment) {
1185                                 BUF_SET_PT(buf, from + 1);
1186                                 return Qnil;
1187                         }
1188                 }
1189
1190                 n++;
1191         }
1192
1193         BUF_SET_PT(buf, from);
1194         return Qt;
1195 }
1196 \f
1197 Lisp_Object
1198 scan_lists(struct buffer * buf, Bufpos from, int count, int depth,
1199            int sexpflag, int noerror)
1200 {
1201         Bufpos stop;
1202         Emchar c;
1203         int quoted;
1204         int mathexit = 0;
1205         enum syntaxcode code;
1206         int syncode;
1207         int min_depth = depth;  /* Err out if depth gets less than this. */
1208
1209         if (depth > 0)
1210                 min_depth = 0;
1211
1212         SCS_STATISTICS_SET_FUNCTION(scs_scan_lists);
1213         SETUP_SYNTAX_CACHE_FOR_BUFFER(buf, from, count);
1214         while (count > 0) {
1215                 QUIT;
1216
1217                 stop = BUF_ZV(buf);
1218                 while (from < stop) {
1219                         int comstyle = 0;       /* mask for finding matching comment style */
1220                         Emchar stringterm = '\0';       /* Used by Sstring case in switch */
1221
1222                         UPDATE_SYNTAX_CACHE_FORWARD(from);
1223                         c = BUF_FETCH_CHAR(buf, from);
1224                         syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
1225                         code = SYNTAX_FROM_CODE(syncode);
1226                         from++;
1227
1228                         /* a 1-char comment start sequence */
1229                         if (code == Scomment && parse_sexp_ignore_comments) {
1230                                 comstyle =
1231                                     SYNTAX_CODE_COMMENT_1CHAR_MASK(syncode) ==
1232                                     SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1233                         }
1234
1235                         /* else, a 2-char comment start sequence? */
1236                         else if (from < stop
1237                                  && SYNTAX_CODE_START_FIRST_P(syncode)
1238                                  && parse_sexp_ignore_comments) {
1239                                 int next_syncode;
1240                                 UPDATE_SYNTAX_CACHE_FORWARD(from);
1241                                 next_syncode = SYNTAX_CODE_FROM_CACHE(
1242                                         mirrortab, BUF_FETCH_CHAR(buf, from));
1243
1244                                 if (SYNTAX_CODES_START_P(syncode,
1245                                                          next_syncode)) {
1246                                         /* we have encountered a comment start
1247                                            sequence and we are ignoring all text
1248                                            inside comments. we must record the
1249                                            comment style this sequence begins so
1250                                            that later, only a comment end of the
1251                                            same style actually ends the comment
1252                                            section */
1253                                         code = Scomment;
1254                                         comstyle =
1255                                             SYNTAX_CODES_COMMENT_MASK_START
1256                                             (syncode, next_syncode)
1257                                             == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1258                                         from++;
1259                                 }
1260                         }
1261                         UPDATE_SYNTAX_CACHE_FORWARD(from);
1262
1263                         if (SYNTAX_CODE_PREFIX(syncode))
1264                                 continue;
1265
1266                         switch (code) {
1267                         case Sescape:
1268                         case Scharquote:
1269                                 if (from == stop)
1270                                         goto lose;
1271                                 from++;
1272                                 /* treat following character as a word
1273                                    constituent */
1274                         case Sword:
1275                         case Ssymbol:
1276                                 if (depth || !sexpflag)
1277                                         break;
1278                                 /* This word counts as a sexp; return at end of
1279                                    it. */
1280                                 while (from < stop) {
1281                                         UPDATE_SYNTAX_CACHE_FORWARD(from);
1282                                         switch ((unsigned int)
1283                                                 SYNTAX_FROM_CACHE(
1284                                                         mirrortab,
1285                                                         BUF_FETCH_CHAR(
1286                                                                 buf, from))) {
1287                                         case Scharquote:
1288                                         case Sescape:
1289                                                 from++;
1290                                                 if (from == stop)
1291                                                         goto lose;
1292                                                 break;
1293                                         case Sword:
1294                                         case Ssymbol:
1295                                         case Squote:
1296                                                 break;
1297                                         default:
1298                                                 goto done;
1299                                         }
1300                                         from++;
1301                                 }
1302                                 goto done;
1303
1304                         case Scomment_fence:
1305                                 comstyle = ST_COMMENT_STYLE;
1306                                 /* falls through! */
1307                         case Scomment:
1308                                 if (!parse_sexp_ignore_comments)
1309                                         break;
1310                                 UPDATE_SYNTAX_CACHE_FORWARD(from);
1311                                 {
1312                                         Bufpos newfrom =
1313                                             find_end_of_comment(buf, from, stop,
1314                                                                 comstyle);
1315                                         if (newfrom < 0) {
1316                                                 /* we stopped because from ==
1317                                                    stop in search forward */
1318                                                 from = stop;
1319                                                 if (depth == 0)
1320                                                         goto done;
1321                                                 goto lose;
1322                                         }
1323                                         from = newfrom;
1324                                 }
1325                                 break;
1326
1327                         case Smath:
1328                                 if (!sexpflag)
1329                                         break;
1330                                 if (from != stop
1331                                     && c == BUF_FETCH_CHAR(buf, from))
1332                                         from++;
1333                                 if (mathexit) {
1334                                         mathexit = 0;
1335                                         goto close1;
1336                                 }
1337                                 mathexit = 1;
1338
1339                         case Sopen:
1340                                 if (!++depth)
1341                                         goto done;
1342                                 break;
1343
1344                         case Sclose:
1345                               close1:
1346                                 if (!--depth)
1347                                         goto done;
1348                                 if (depth < min_depth) {
1349                                         if (noerror)
1350                                                 return Qnil;
1351                                         error("Containing expression "
1352                                               "ends prematurely");
1353                                 }
1354                                 break;
1355
1356                         case Sstring:
1357                                 {
1358                                         /* XEmacs change: call syntax_match on
1359                                            character */
1360                                         Emchar ch =
1361                                             BUF_FETCH_CHAR(buf, from - 1);
1362                                         Lisp_Object stermobj =
1363                                             syntax_match(syntax_cache.
1364                                                          current_syntax_table,
1365                                                          ch);
1366
1367                                         if (CHARP(stermobj))
1368                                                 stringterm = XCHAR(stermobj);
1369                                         else
1370                                                 stringterm = ch;
1371                                 }
1372                                 /* falls through! */
1373                         case Sstring_fence:
1374                                 while (1) {
1375                                         if (from >= stop)
1376                                                 goto lose;
1377                                         UPDATE_SYNTAX_CACHE_FORWARD(from);
1378                                         c = BUF_FETCH_CHAR(buf, from);
1379                                         if (code == Sstring
1380                                             ? c == stringterm
1381                                             : SYNTAX_FROM_CACHE(mirrortab,
1382                                                                 c) ==
1383                                             Sstring_fence)
1384                                                 break;
1385
1386                                         switch ((unsigned int)
1387                                                 SYNTAX_FROM_CACHE(
1388                                                         mirrortab, c)) {
1389                                         case Scharquote:
1390                                         case Sescape:
1391                                                 from++;
1392                                                 break;
1393                                         default:
1394                                                 break;
1395                                         }
1396                                         from++;
1397                                 }
1398                                 from++;
1399                                 if (!depth && sexpflag)
1400                                         goto done;
1401                                 break;
1402
1403                                 /* list them all here */
1404                         case Squote:
1405                         case Swhitespace:
1406                         case Spunct:
1407                         case Sendcomment:
1408                         case Sinherit:
1409                         case Smax:
1410
1411                         default:
1412                                 break;
1413                         }
1414                 }
1415
1416                 /* Reached end of buffer.  Error if within object,
1417                    return nil if between */
1418                 if (depth)
1419                         goto lose;
1420
1421                 return Qnil;
1422
1423                 /* End of object reached */
1424               done:
1425                 count--;
1426         }
1427
1428         while (count < 0) {
1429                 QUIT;
1430
1431                 stop = BUF_BEGV(buf);
1432                 while (from > stop) {
1433                         /* mask for finding matching comment style */
1434                         int comstyle = 0;
1435                         /* used by case Sstring in switch below */
1436                         Emchar stringterm = '\0';
1437
1438                         from--;
1439                         UPDATE_SYNTAX_CACHE_BACKWARD(from);
1440                         quoted = char_quoted(buf, from);
1441                         if (quoted) {
1442                                 from--;
1443                                 UPDATE_SYNTAX_CACHE_BACKWARD(from);
1444                         }
1445
1446                         c = BUF_FETCH_CHAR(buf, from);
1447                         syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
1448                         code = SYNTAX_FROM_CODE(syncode);
1449
1450                         if (code == Sendcomment && parse_sexp_ignore_comments) {
1451                                 /* we have found a single char end comment. we
1452                                    must record the comment style encountered so
1453                                    that later, we can match only the proper
1454                                    comment begin sequence of the same style */
1455                                 comstyle =
1456                                     SYNTAX_CODE_COMMENT_1CHAR_MASK(syncode)
1457                                     == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1458                         }
1459
1460                         else if (from > stop
1461                                  && SYNTAX_CODE_END_SECOND_P(syncode)
1462                                  && !char_quoted(buf, from - 1)
1463                                  && parse_sexp_ignore_comments) {
1464                                 int prev_syncode;
1465                                 UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
1466                                 prev_syncode = SYNTAX_CODE_FROM_CACHE
1467                                     (mirrortab, BUF_FETCH_CHAR(buf, from - 1));
1468
1469                                 if (SYNTAX_CODES_END_P(prev_syncode, syncode)) {
1470                                         /* we must record the comment style
1471                                            encountered so that later, we can
1472                                            match only the proper comment begin
1473                                            sequence of the same style */
1474                                         code = Sendcomment;
1475                                         comstyle =
1476                                             SYNTAX_CODES_COMMENT_MASK_END
1477                                             (prev_syncode, syncode)
1478                                             == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1479                                         from--;
1480                                 }
1481                         }
1482
1483                         if (SYNTAX_CODE_PREFIX(syncode)) {
1484                                 continue;
1485                         }
1486
1487                         switch (quoted ? Sword : code) {
1488                         case Sword:
1489                         case Ssymbol:
1490                                 if (depth || !sexpflag)
1491                                         break;
1492                                 /* This word counts as a sexp; count object
1493                                    finished after passing it. */
1494                                 while (from > stop) {
1495                                         /* enum syntaxcode syncode; */
1496                                         UPDATE_SYNTAX_CACHE_BACKWARD(from);
1497                                         quoted = char_quoted(buf, from - 1);
1498
1499                                         if (quoted)
1500                                                 from--;
1501                                         if (!(quoted
1502                                               || (syncode =
1503                                                   SYNTAX_FROM_CACHE(mirrortab,
1504                                                                     BUF_FETCH_CHAR
1505                                                                     (buf,
1506                                                                      from - 1)))
1507                                               == Sword || syncode == Ssymbol
1508                                               || syncode == Squote))
1509                                                 goto done2;
1510                                         from--;
1511                                 }
1512                                 goto done2;
1513
1514                         case Smath:
1515                                 if (!sexpflag)
1516                                         break;
1517                                 if (from != stop
1518                                     && c == BUF_FETCH_CHAR(buf, from - 1))
1519                                         from--;
1520                                 if (mathexit) {
1521                                         mathexit = 0;
1522                                         goto open2;
1523                                 }
1524                                 mathexit = 1;
1525
1526                         case Sclose:
1527                                 if (!++depth)
1528                                         goto done2;
1529                                 break;
1530
1531                         case Sopen:
1532                               open2:
1533                                 if (!--depth)
1534                                         goto done2;
1535                                 if (depth < min_depth) {
1536                                         if (noerror) {
1537                                                 return Qnil;
1538                                         }
1539                                         error("Containing expression "
1540                                               "ends prematurely");
1541                                 }
1542                                 break;
1543
1544                         case Scomment_fence:
1545                                 comstyle = ST_COMMENT_STYLE;
1546                                 /* falls through! */
1547                         case Sendcomment:
1548                                 if (parse_sexp_ignore_comments)
1549                                         from =
1550                                             find_start_of_comment(buf, from,
1551                                                                   stop,
1552                                                                   comstyle);
1553                                 break;
1554
1555                         case Sstring: {
1556                                 /* XEmacs change: call syntax_match() on
1557                                    character */
1558                                 Emchar ch = BUF_FETCH_CHAR(buf, from);
1559                                 Lisp_Object stermobj =
1560                                         syntax_match(syntax_cache.
1561                                                      current_syntax_table, ch);
1562                                 if (CHARP(stermobj)) {
1563                                         stringterm = XCHAR(stermobj);
1564                                 } else {
1565                                         stringterm = ch;
1566                                 }
1567                         }
1568
1569                                 /* falls through! */
1570                         case Sstring_fence:
1571                                 while (1) {
1572                                         if (from == stop)
1573                                                 goto lose;
1574
1575                                         UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
1576                                         c = BUF_FETCH_CHAR(buf, from - 1);
1577                                         if ((code == Sstring
1578                                              ? c == stringterm
1579                                              : SYNTAX_FROM_CACHE(mirrortab,
1580                                                                  c) ==
1581                                              Sstring_fence)
1582                                             && !char_quoted(buf, from - 1)) {
1583                                                 break;
1584                                         }
1585
1586                                         from--;
1587                                 }
1588                                 from--;
1589                                 if (!depth && sexpflag)
1590                                         goto done2;
1591                                 break;
1592
1593                         default:
1594                                 /* shouldnt happen */
1595                                 break;
1596                         }
1597                 }
1598
1599                 /* Reached start of buffer.  Error if within object,
1600                    return nil if between */
1601                 if (depth)
1602                         goto lose;
1603
1604                 return Qnil;
1605
1606               done2:
1607                 count++;
1608         }
1609
1610         return (make_int(from));
1611
1612       lose:
1613         if (!noerror)
1614                 error("Unbalanced parentheses");
1615         return Qnil;
1616 }
1617
1618 int char_quoted(struct buffer *buf, Bufpos pos)
1619 {
1620         enum syntaxcode code;
1621         Bufpos beg = BUF_BEGV(buf);
1622         int quoted = 0;
1623         Bufpos startpos = pos;
1624
1625         while (pos > beg) {
1626                 UPDATE_SYNTAX_CACHE_BACKWARD(pos - 1);
1627                 code =
1628                     SYNTAX_FROM_CACHE(mirrortab, BUF_FETCH_CHAR(buf, pos - 1));
1629
1630                 if (code != Scharquote && code != Sescape)
1631                         break;
1632                 pos--;
1633                 quoted = !quoted;
1634         }
1635
1636         UPDATE_SYNTAX_CACHE(startpos);
1637         return quoted;
1638 }
1639
1640 DEFUN("scan-lists", Fscan_lists, 3, 5, 0,       /*
1641 Scan from character number FROM by COUNT lists.
1642 Returns the character number of the position thus found.
1643
1644 If DEPTH is nonzero, paren depth begins counting from that value,
1645 only places where the depth in parentheses becomes zero
1646 are candidates for stopping; COUNT such places are counted.
1647 Thus, a positive value for DEPTH means go out levels.
1648
1649 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1650
1651 If the beginning or end of (the accessible part of) the buffer is reached
1652 and the depth is wrong, an error is signaled.
1653 If the depth is right but the count is not used up, nil is returned.
1654
1655 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1656 of in the current buffer.
1657
1658 If optional arg NOERROR is non-nil, scan-lists will return nil instead of
1659 signalling an error.
1660 */
1661       (from, count, depth, buffer, noerror))
1662 {
1663         struct buffer *buf;
1664
1665         CHECK_INT(from);
1666         CHECK_INT(count);
1667         CHECK_INT(depth);
1668         buf = decode_buffer(buffer, 0);
1669
1670         return scan_lists(buf, XINT(from), XINT(count), XINT(depth), 0,
1671                           !NILP(noerror));
1672 }
1673
1674 DEFUN("scan-sexps", Fscan_sexps, 2, 4, 0,       /*
1675 Scan from character number FROM by COUNT balanced expressions.
1676 If COUNT is negative, scan backwards.
1677 Returns the character number of the position thus found.
1678
1679 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1680
1681 If the beginning or end of (the accessible part of) the buffer is reached
1682 in the middle of a parenthetical grouping, an error is signaled.
1683 If the beginning or end is reached between groupings
1684 but before count is used up, nil is returned.
1685
1686 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1687 of in the current buffer.
1688
1689 If optional arg NOERROR is non-nil, scan-sexps will return nil instead of
1690 signalling an error.
1691 */
1692       (from, count, buffer, noerror))
1693 {
1694         struct buffer *buf = decode_buffer(buffer, 0);
1695         CHECK_INT(from);
1696         CHECK_INT(count);
1697
1698         return scan_lists(buf, XINT(from), XINT(count), 0, 1, !NILP(noerror));
1699 }
1700
1701 DEFUN("backward-prefix-chars", Fbackward_prefix_chars, 0, 1, 0, /*
1702 Move point backward over any number of chars with prefix syntax.
1703 This includes chars with "quote" or "prefix" syntax (' or p).
1704
1705 Optional arg BUFFER defaults to the current buffer.
1706 */
1707       (buffer))
1708 {
1709         struct buffer *buf = decode_buffer(buffer, 0);
1710         Bufpos beg = BUF_BEGV(buf);
1711         Bufpos pos = BUF_PT(buf);
1712 #ifndef emacs
1713         Lisp_Char_Table *mirrortab = XCHAR_TABLE(buf->mirror_syntax_table);
1714 #endif
1715         Emchar c = '\0';        /* initialize to avoid compiler warnings */
1716
1717         SCS_STATISTICS_SET_FUNCTION(scs_Fbackward_prefix_characters);
1718         SETUP_SYNTAX_CACHE_FOR_BUFFER(buf, pos, -1);
1719
1720         while (pos > beg && !char_quoted(buf, pos - 1)
1721                /* Previous statement updates syntax table.  */
1722                &&
1723                (SYNTAX_FROM_CACHE(mirrortab, c = BUF_FETCH_CHAR(buf, pos - 1))
1724                 == Squote
1725                 /* equivalent to SYNTAX_PREFIX (mirrortab, c) */
1726                 || SYNTAX_CODE_PREFIX(SYNTAX_CODE_FROM_CACHE(mirrortab, c))))
1727                 pos--;
1728
1729         BUF_SET_PT(buf, pos);
1730
1731         return Qnil;
1732 }
1733 \f
1734 /* Parse forward from FROM to END,
1735    assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1736    and return a description of the state of the parse at END.
1737    If STOPBEFORE is nonzero, stop at the start of an atom.
1738    If COMMENTSTOP is 1, stop at the start of a comment; if it is -1,
1739      stop at the start of a comment or a string */
1740
1741 static void
1742 scan_sexps_forward(struct buffer *buf, struct lisp_parse_state *stateptr,
1743                    Bufpos from, Bufpos end,
1744                    int targetdepth, int stopbefore,
1745                    Lisp_Object oldstate, int commentstop)
1746 {
1747         struct lisp_parse_state state;
1748
1749         enum syntaxcode code;
1750         struct level {
1751                 int last, prev;
1752         };
1753         struct level levelstart[100];
1754         struct level *curlevel = levelstart;
1755         struct level *endlevel = levelstart + 100;
1756         int depth;              /* Paren depth of current scanning location.
1757                                    level - levelstart equals this except
1758                                    when the depth becomes negative.  */
1759         int mindepth;           /* Lowest DEPTH value seen.  */
1760         int start_quoted = 0;   /* Nonzero means starting after a char quote */
1761         int boundary_stop = commentstop == -1;
1762         Lisp_Object tem;
1763
1764         SCS_STATISTICS_SET_FUNCTION(scs_scan_sexps_forward);
1765         SETUP_SYNTAX_CACHE(from, 1);
1766         if (NILP(oldstate)) {
1767                 depth = 0;
1768                 state.instring = -1;
1769                 state.incomment = 0;
1770                 state.comstyle = 0;     /* comment style a by default */
1771                 state.comstr_start = -1;        /* no comment/string seen.  */
1772         } else {
1773                 tem = Fcar(oldstate);   /* elt 0, depth */
1774                 if (!NILP(tem))
1775                         depth = XINT(tem);
1776                 else
1777                         depth = 0;
1778
1779                 oldstate = Fcdr(oldstate);
1780                 oldstate = Fcdr(oldstate);
1781                 oldstate = Fcdr(oldstate);
1782                 tem = Fcar(oldstate);   /* elt 3, instring */
1783                 state.instring = (!NILP(tem)
1784                                   ? (INTP(tem) ? XINT(tem) : ST_STRING_STYLE)
1785                                   : -1);
1786
1787                 oldstate = Fcdr(oldstate);
1788                 tem = Fcar(oldstate);   /* elt 4, incomment */
1789                 state.incomment = !NILP(tem);
1790
1791                 oldstate = Fcdr(oldstate);
1792                 tem = Fcar(oldstate);   /* elt 5, follows-quote */
1793                 start_quoted = !NILP(tem);
1794
1795                 /* if the eighth element of the list is nil, we are in comment style
1796                    a; if it is t, we are in comment style b; if it is 'syntax-table,
1797                    we are in a generic comment */
1798                 oldstate = Fcdr(oldstate);
1799                 oldstate = Fcdr(oldstate);
1800                 /* The code below was changed radically for syntax-table properties.
1801                    A reasonable place to look if a bug manifests. */
1802                 tem = Fcar(oldstate);   /* elt 7, comment style a/b/fence */
1803                 state.comstyle = NILP(tem) ? 0 : (EQ(tem, Qsyntax_table)
1804                                                   ? ST_COMMENT_STYLE : 1);
1805
1806                 oldstate = Fcdr(oldstate);      /* elt 8, start of last comment/string */
1807                 tem = Fcar(oldstate);
1808                 state.comstr_start = NILP(tem) ? -1 : XINT(tem);
1809
1810                 /* elt 9, char numbers of starts-of-expression of levels
1811                    (starting from outermost). */
1812                 oldstate = Fcdr(oldstate);
1813                 tem = Fcar(oldstate);   /* elt 9, intermediate data for
1814                                            continuation of parsing (subject
1815                                            to change). */
1816                 while (!NILP(tem)) {    /* >= second enclosing sexps.  */
1817                         curlevel->last = XINT(Fcar(tem));
1818                         if (++curlevel == endlevel)
1819                                 error("Nesting too deep for parser");
1820                         curlevel->prev = -1;
1821                         curlevel->last = -1;
1822                         tem = Fcdr(tem);
1823                 }
1824                 /* end radical change section */
1825         }
1826         state.quoted = 0;
1827         mindepth = depth;
1828
1829         curlevel->prev = -1;
1830         curlevel->last = -1;
1831
1832         /* Enter the loop at a place appropriate for initial state. */
1833
1834         if (state.incomment)
1835                 goto startincomment;
1836         if (state.instring >= 0) {
1837                 if (start_quoted)
1838                         goto startquotedinstring;
1839                 goto startinstring;
1840         }
1841         if (start_quoted)
1842                 goto startquoted;
1843
1844         while (from < end) {
1845                 Emchar c;
1846                 int syncode;
1847
1848                 QUIT;
1849
1850                 UPDATE_SYNTAX_CACHE_FORWARD(from);
1851                 c = BUF_FETCH_CHAR(buf, from);
1852                 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
1853                 code = SYNTAX_FROM_CODE(syncode);
1854                 from++;
1855
1856                 /* record the comment style we have entered so that only the
1857                    comment-ender sequence (or single char) of the same style
1858                    actually terminates the comment section. */
1859                 if (code == Scomment) {
1860                         state.comstyle = SYNTAX_CODE_COMMENT_1CHAR_MASK(syncode)
1861                             == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1862                         state.comstr_start = from - 1;
1863                 }
1864
1865                 /* a generic comment delimiter? */
1866                 else if (code == Scomment_fence) {
1867                         state.comstyle = ST_COMMENT_STYLE;
1868                         state.comstr_start = from - 1;
1869                         code = Scomment;
1870                 }
1871
1872                 else if (from < end && SYNTAX_CODE_START_FIRST_P(syncode)) {
1873                         int next_syncode;
1874                         UPDATE_SYNTAX_CACHE_FORWARD(from);
1875                         next_syncode =
1876                             SYNTAX_CODE_FROM_CACHE(mirrortab,
1877                                                    BUF_FETCH_CHAR(buf, from));
1878
1879                         if (SYNTAX_CODES_START_P(syncode, next_syncode)) {
1880                                 code = Scomment;
1881                                 state.comstyle =
1882                                     SYNTAX_CODES_COMMENT_MASK_START(syncode,
1883                                                                     next_syncode)
1884                                     == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1885                                 state.comstr_start = from - 1;
1886                                 from++;
1887                                 UPDATE_SYNTAX_CACHE_FORWARD(from);
1888                         }
1889                 }
1890
1891                 if (SYNTAX_CODE_PREFIX(syncode))
1892                         continue;
1893                 switch (code) {
1894                 case Sescape:
1895                 case Scharquote:
1896                         if (stopbefore)
1897                                 goto stop;      /* this arg means stop at sexp start */
1898                         curlevel->last = from - 1;
1899                       startquoted:
1900                         if (from == end)
1901                                 goto endquoted;
1902                         from++;
1903                         goto symstarted;
1904                         /* treat following character as a word constituent */
1905                 case Sword:
1906                 case Ssymbol:
1907                         if (stopbefore)
1908                                 goto stop;      /* this arg means stop at sexp start */
1909                         curlevel->last = from - 1;
1910                 symstarted:
1911                         while (from < end) {
1912                                 UPDATE_SYNTAX_CACHE_FORWARD(from);
1913                                 switch ((unsigned int)
1914                                         SYNTAX_FROM_CACHE(
1915                                                 mirrortab,
1916                                                 BUF_FETCH_CHAR(buf, from))) {
1917                                 case Scharquote:
1918                                 case Sescape:
1919                                         from++;
1920                                         if (from == end)
1921                                                 goto endquoted;
1922                                         break;
1923                                 case Sword:
1924                                 case Ssymbol:
1925                                 case Squote:
1926                                         break;
1927                                 default:
1928                                         goto symdone;
1929                                 }
1930                                 from++;
1931                         }
1932                       symdone:
1933                         curlevel->prev = curlevel->last;
1934                         break;
1935
1936                 case Scomment:
1937                         state.incomment = 1;
1938                         if (commentstop || boundary_stop)
1939                                 goto done;
1940                       startincomment:
1941                         if (commentstop == 1)
1942                                 goto done;
1943                         UPDATE_SYNTAX_CACHE_FORWARD(from);
1944                         {
1945                                 Bufpos newfrom =
1946                                     find_end_of_comment(buf, from, end,
1947                                                         state.comstyle);
1948                                 if (newfrom < 0) {
1949                                         /* we terminated search because from == end */
1950                                         from = end;
1951                                         goto done;
1952                                 }
1953                                 from = newfrom;
1954                         }
1955                         state.incomment = 0;
1956                         state.comstyle = 0;     /* reset the comment style */
1957                         if (boundary_stop)
1958                                 goto done;
1959                         break;
1960
1961                 case Sopen:
1962                         if (stopbefore)
1963                                 goto stop;      /* this arg means stop at sexp start */
1964                         depth++;
1965                         /* curlevel++->last ran into compiler bug on Apollo */
1966                         curlevel->last = from - 1;
1967                         if (++curlevel == endlevel)
1968                                 error("Nesting too deep for parser");
1969                         curlevel->prev = -1;
1970                         curlevel->last = -1;
1971                         if (targetdepth == depth)
1972                                 goto done;
1973                         break;
1974
1975                 case Sclose:
1976                         depth--;
1977                         if (depth < mindepth)
1978                                 mindepth = depth;
1979                         if (curlevel != levelstart)
1980                                 curlevel--;
1981                         curlevel->prev = curlevel->last;
1982                         if (targetdepth == depth)
1983                                 goto done;
1984                         break;
1985
1986                 case Sstring:
1987                 case Sstring_fence:
1988                         state.comstr_start = from - 1;
1989                         if (stopbefore)
1990                                 goto stop;      /* this arg means stop at sexp start */
1991                         curlevel->last = from - 1;
1992                         if (code == Sstring_fence) {
1993                                 state.instring = ST_STRING_STYLE;
1994                         } else {
1995                                 /* XEmacs change: call syntax_match() on character */
1996                                 Emchar ch = BUF_FETCH_CHAR(buf, from - 1);
1997                                 Lisp_Object stermobj =
1998                                     syntax_match(syntax_cache.
1999                                                  current_syntax_table, ch);
2000
2001                                 if (CHARP(stermobj))
2002                                         state.instring = XCHAR(stermobj);
2003                                 else
2004                                         state.instring = ch;
2005                         }
2006                         if (boundary_stop)
2007                                 goto done;
2008                 startinstring:
2009                         while (1) {
2010                                 enum syntaxcode temp_code;
2011
2012                                 if (from >= end)
2013                                         goto done;
2014
2015                                 UPDATE_SYNTAX_CACHE_FORWARD(from);
2016                                 c = BUF_FETCH_CHAR(buf, from);
2017                                 temp_code = SYNTAX_FROM_CACHE(mirrortab, c);
2018
2019                                 if (state.instring != ST_STRING_STYLE &&
2020                                     temp_code == Sstring && c == state.instring)
2021                                         break;
2022
2023                                 switch ((unsigned int)temp_code) {
2024                                 case Sstring_fence:
2025                                         if (state.instring == ST_STRING_STYLE)
2026                                                 goto string_end;
2027                                         break;
2028                                 case Scharquote:
2029                                 case Sescape:
2030                                         {
2031                                                 from++;
2032                                               startquotedinstring:
2033                                                 if (from >= end)
2034                                                         goto endquoted;
2035                                                 break;
2036                                         }
2037                                 default:
2038                                         break;
2039                                 }
2040                                 from++;
2041                         }
2042                       string_end:
2043                         state.instring = -1;
2044                         curlevel->prev = curlevel->last;
2045                         from++;
2046                         if (boundary_stop)
2047                                 goto done;
2048                         break;
2049
2050                 case Smath:
2051                         break;
2052
2053                 case Swhitespace:
2054                 case Spunct:
2055                 case Squote:
2056                 case Sendcomment:
2057                 case Scomment_fence:
2058                 case Sinherit:
2059                 case Smax:
2060                         /* catch all */
2061                 default:
2062                         break;
2063                 }
2064         }
2065         goto done;
2066
2067       stop:                     /* Here if stopping before start of sexp. */
2068         from--;                 /* We have just fetched the char that starts it; */
2069         goto done;              /* but return the position before it. */
2070
2071       endquoted:
2072         state.quoted = 1;
2073       done:
2074         state.depth = depth;
2075         state.mindepth = mindepth;
2076         state.thislevelstart = curlevel->prev;
2077         state.prevlevelstart
2078             = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2079         state.location = from;
2080         state.levelstarts = Qnil;
2081         while (--curlevel >= levelstart)
2082                 state.levelstarts = Fcons(make_int(curlevel->last),
2083                                           state.levelstarts);
2084
2085         *stateptr = state;
2086 }
2087
2088 DEFUN("parse-partial-sexp", Fparse_partial_sexp, 2, 7, 0,       /*
2089 Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
2090 Parsing stops at TO or when certain criteria are met;
2091 point is set to where parsing stops.
2092 If fifth arg OLDSTATE is omitted or nil,
2093 parsing assumes that FROM is the beginning of a function.
2094 Value is a list of nine elements describing final state of parsing:
2095 0. depth in parens.
2096 1. character address of start of innermost containing list; nil if none.
2097 2. character address of start of last complete sexp terminated.
2098 3. non-nil if inside a string.
2099 (It is the character that will terminate the string,
2100 or t if the string should be terminated by an explicit
2101 `syntax-table' property.)
2102 4. t if inside a comment.
2103 5. t if following a quote character.
2104 6. the minimum paren-depth encountered during this scan.
2105 7. nil if in comment style a, or not in a comment; t if in comment style b;
2106 `syntax-table' if given by an explicit `syntax-table' property.
2107 8. character address of start of last comment or string; nil if none.
2108 9. Intermediate data for continuation of parsing (subject to change).
2109 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
2110 in parentheses becomes equal to TARGETDEPTH.
2111 Fourth arg STOPBEFORE non-nil means stop when come to
2112 any character that starts a sexp.
2113 Fifth arg OLDSTATE is a nine-element list like what this function returns.
2114 It is used to initialize the state of the parse.  Its second and third
2115 elements are ignored.
2116 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment. If it
2117 is `syntax-table', stop after the start of a comment or a string, or after
2118 the end of a comment or string.
2119 */
2120       (from, to, targetdepth, stopbefore, oldstate, commentstop, buffer))
2121 {
2122         struct lisp_parse_state state;
2123         int target;
2124         Bufpos start, end;
2125         struct buffer *buf = decode_buffer(buffer, 0);
2126         Lisp_Object val;
2127
2128         if (!NILP(targetdepth)) {
2129                 CHECK_INT(targetdepth);
2130                 target = XINT(targetdepth);
2131         } else
2132                 target = -100000;       /* We won't reach this depth */
2133
2134         get_buffer_range_char(buf, from, to, &start, &end, 0);
2135         scan_sexps_forward(buf, &state, start, end,
2136                            target, !NILP(stopbefore), oldstate,
2137                            (NILP(commentstop)
2138                             ? 0 : (EQ(commentstop, Qsyntax_table) ? -1 : 1)));
2139         BUF_SET_PT(buf, state.location);
2140
2141         /* reverse order */
2142         val = Qnil;
2143         val = Fcons(state.levelstarts, val);
2144         val = Fcons((state.incomment || (state.instring >= 0))
2145                     ? make_int(state.comstr_start) : Qnil, val);
2146         val = Fcons(state.comstyle ? (state.comstyle == ST_COMMENT_STYLE
2147                                       ? Qsyntax_table : Qt) : Qnil, val);
2148         val = Fcons(make_int(state.mindepth), val);
2149         val = Fcons(state.quoted ? Qt : Qnil, val);
2150         val = Fcons(state.incomment ? Qt : Qnil, val);
2151         val = Fcons(state.instring < 0
2152                     ? Qnil
2153                     : (state.instring == ST_STRING_STYLE
2154                        ? Qt : make_int(state.instring)), val);
2155         val =
2156             Fcons(state.thislevelstart <
2157                   0 ? Qnil : make_int(state.thislevelstart), val);
2158         val =
2159             Fcons(state.prevlevelstart <
2160                   0 ? Qnil : make_int(state.prevlevelstart), val);
2161         val = Fcons(make_int(state.depth), val);
2162
2163         return val;
2164 }
2165
2166 /* Updating of the mirror syntax table.
2167
2168    Each syntax table has a corresponding mirror table in it.
2169    Whenever we make a change to a syntax table, we call
2170    update_syntax_table() on it.
2171
2172    #### We really only need to map over the changed range.
2173
2174    If we change the standard syntax table, we need to map over
2175    all tables because any of them could be inheriting from the
2176    standard syntax table.
2177
2178    When `set-syntax-table' is called, we set the buffer's mirror
2179    syntax table as well.
2180    */
2181
2182 struct cmst_arg {
2183         Lisp_Object mirrortab;
2184         int check_inherit;
2185 };
2186
2187 static int cmst_mapfun(struct chartab_range *range, Lisp_Object val, void *arg)
2188 {
2189         struct cmst_arg *closure = (struct cmst_arg *)arg;
2190
2191         if (CONSP(val))
2192                 val = XCAR(val);
2193         if (SYNTAX_FROM_CODE(XINT(val)) == Sinherit && closure->check_inherit) {
2194                 struct cmst_arg recursive;
2195
2196                 recursive.mirrortab = closure->mirrortab;
2197                 recursive.check_inherit = 0;
2198                 map_char_table(XCHAR_TABLE(Vstandard_syntax_table), range,
2199                                cmst_mapfun, &recursive);
2200         } else
2201                 put_char_table(XCHAR_TABLE(closure->mirrortab), range, val);
2202         return 0;
2203 }
2204
2205 static void update_just_this_syntax_table(Lisp_Char_Table * ct)
2206 {
2207         struct chartab_range range;
2208         struct cmst_arg arg;
2209
2210         arg.mirrortab = ct->mirror_table;
2211         arg.check_inherit = (CHAR_TABLEP(Vstandard_syntax_table)
2212                              && ct != XCHAR_TABLE(Vstandard_syntax_table));
2213         range.type = CHARTAB_RANGE_ALL;
2214         map_char_table(ct, &range, cmst_mapfun, &arg);
2215 }
2216
2217 /* Called from chartab.c when a change is made to a syntax table.
2218    If this is the standard syntax table, we need to recompute
2219    *all* syntax tables (yuck).  Otherwise we just recompute this
2220    one. */
2221
2222 void update_syntax_table(Lisp_Char_Table * ct)
2223 {
2224         /* Don't be stymied at startup. */
2225         if (CHAR_TABLEP(Vstandard_syntax_table)
2226             && ct == XCHAR_TABLE(Vstandard_syntax_table)) {
2227                 Lisp_Object syntab;
2228
2229                 for (syntab = Vall_syntax_tables; !NILP(syntab);
2230                      syntab = XCHAR_TABLE(syntab)->next_table)
2231                         update_just_this_syntax_table(XCHAR_TABLE(syntab));
2232         } else
2233                 update_just_this_syntax_table(ct);
2234 }
2235 \f
2236 /************************************************************************/
2237 /*                            initialization                            */
2238 /************************************************************************/
2239
2240 void syms_of_syntax(void)
2241 {
2242         defsymbol(&Qsyntax_table_p, "syntax-table-p");
2243         defsymbol(&Qsyntax_table, "syntax-table");
2244
2245         DEFSUBR(Fsyntax_table_p);
2246         DEFSUBR(Fsyntax_table);
2247         DEFSUBR(Fstandard_syntax_table);
2248         DEFSUBR(Fcopy_syntax_table);
2249         DEFSUBR(Fset_syntax_table);
2250         DEFSUBR(Fsyntax_designator_chars);
2251         DEFSUBR(Fchar_syntax);
2252         DEFSUBR(Fmatching_paren);
2253         /* DEFSUBR (Fmodify_syntax_entry); now in Lisp. */
2254         /* DEFSUBR (Fdescribe_syntax); now in Lisp. */
2255
2256         DEFSUBR(Fforward_word);
2257
2258         DEFSUBR(Fforward_comment);
2259         DEFSUBR(Fscan_lists);
2260         DEFSUBR(Fscan_sexps);
2261         DEFSUBR(Fbackward_prefix_chars);
2262         DEFSUBR(Fparse_partial_sexp);
2263 }
2264
2265 void vars_of_syntax(void)
2266 {
2267         DEFVAR_BOOL("parse-sexp-ignore-comments", &parse_sexp_ignore_comments   /*
2268 Non-nil means `forward-sexp', etc., should treat comments as whitespace.
2269                                                                                  */ );
2270         parse_sexp_ignore_comments = 0;
2271
2272         DEFVAR_BOOL("lookup-syntax-properties", &lookup_syntax_properties       /*
2273 Non-nil means `forward-sexp', etc., look up character syntax in the
2274 table that is the value of the `syntax-table' text property, if non-nil.
2275 The value of this property should be either a syntax table, or a cons
2276 of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric
2277 syntax code, MATCHCHAR being nil or the character to match (which is
2278 relevant only for open/close type.
2279                                                                                  */ );
2280         lookup_syntax_properties = 0;   /* #### default off until optimized */
2281
2282         DEFVAR_BOOL("words-include-escapes", &words_include_escapes     /*
2283 Non-nil means `forward-word', etc., should treat escape chars part of words.
2284                                                                          */ );
2285         words_include_escapes = 0;
2286
2287         no_quit_in_re_search = 0;
2288 }
2289
2290 static void define_standard_syntax(const char *p, enum syntaxcode syn)
2291 {
2292         for (; *p; p++)
2293                 Fput_char_table(make_char(*p), make_int(syn),
2294                                 Vstandard_syntax_table);
2295 }
2296
2297 void complex_vars_of_syntax(void)
2298 {
2299         Emchar i;
2300         const char *p;
2301         /* Set this now, so first buffer creation can refer to it. */
2302         /* Make it nil before calling copy-syntax-table
2303            so that copy-syntax-table will know not to try to copy from garbage */
2304         Vstandard_syntax_table = Qnil;
2305         Vstandard_syntax_table = Fcopy_syntax_table(Qnil);
2306         staticpro(&Vstandard_syntax_table);
2307
2308         Vsyntax_designator_chars_string = make_string_nocopy(syntax_code_spec,
2309                                                              Smax);
2310         staticpro(&Vsyntax_designator_chars_string);
2311
2312         fill_char_table(XCHAR_TABLE(Vstandard_syntax_table), make_int(Spunct));
2313
2314         for (i = 0; i <= 32; i++)       /* Control 0 plus SPACE */
2315                 Fput_char_table(make_char(i), make_int(Swhitespace),
2316                                 Vstandard_syntax_table);
2317         for (i = 127; i <= 159; i++)    /* DEL plus Control 1 */
2318                 Fput_char_table(make_char(i), make_int(Swhitespace),
2319                                 Vstandard_syntax_table);
2320
2321         define_standard_syntax("abcdefghijklmnopqrstuvwxyz"
2322                                "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2323                                "0123456789" "$%", Sword);
2324         define_standard_syntax("\"", Sstring);
2325         define_standard_syntax("\\", Sescape);
2326         define_standard_syntax("_-+*/&|<>=", Ssymbol);
2327         define_standard_syntax(".,;:?!#@~^'`", Spunct);
2328
2329         for (p = "()[]{}"; *p; p += 2) {
2330                 Fput_char_table(make_char(p[0]),
2331                                 Fcons(make_int(Sopen), make_char(p[1])),
2332                                 Vstandard_syntax_table);
2333                 Fput_char_table(make_char(p[1]),
2334                                 Fcons(make_int(Sclose), make_char(p[0])),
2335                                 Vstandard_syntax_table);
2336         }
2337 }