1 /* SXEmacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985-1994 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of SXEmacs
7 SXEmacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 SXEmacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 /* Synched up with: FSF 19.28. */
23 /* This file has been Mule-ized. */
32 /* Here is a comment from Ken'ichi HANDA <handa@etl.go.jp>
33 explaining the purpose of the Sextword syntax category:
35 Japanese words are not separated by spaces, which makes finding word
36 boundaries very difficult. Theoretically it's impossible without
37 using natural language processing techniques. But, by defining
38 pseudo-words as below (much simplified for letting you understand it
39 easily) for Japanese, we can have a convenient forward-word function
42 A Japanese word is a sequence of characters that consists of
43 zero or more Kanji characters followed by zero or more
46 Then, the problem is that now we can't say that a sequence of
47 word-constituents makes up a WORD. For instance, both Hiragana "A"
48 and Kanji "KAN" are word-constituents but the sequence of these two
49 letters can't be a single word.
51 So, we introduced Sextword for Japanese letters. A character of
52 Sextword is a word-constituent but a word boundary may exist between
53 two such characters. */
55 /* Mule 2.4 doesn't seem to have Sextword - I'm removing it -- mrb */
56 /* Recovered by tomo */
58 #define ST_COMMENT_STYLE 0x101
59 #define ST_STRING_STYLE 0x102
61 Lisp_Object Qsyntax_table;
62 int lookup_syntax_properties;
64 Lisp_Object Qsyntax_table_p;
66 int words_include_escapes;
68 int parse_sexp_ignore_comments;
70 /* The following two variables are provided to tell additional information
71 to the regex routines. We do it this way rather than change the
72 arguments to re_search_2() in an attempt to maintain some call
73 compatibility with other versions of the regex code. */
75 /* Tell the regex routines not to QUIT. Normally there is a QUIT
76 each iteration in re_search_2(). */
77 int no_quit_in_re_search;
79 /* Tell the regex routines which buffer to access for SYNTAX() lookups
81 struct buffer *regex_emacs_buffer;
83 /* In Emacs, this is the string or buffer in which we
84 are matching. It is used for looking up syntax properties. */
85 Lisp_Object regex_match_object;
87 Lisp_Object Vstandard_syntax_table;
89 Lisp_Object Vsyntax_designator_chars_string;
91 /* This is the internal form of the parse state used in parse-partial-sexp. */
93 struct lisp_parse_state {
94 int depth; /* Depth at end of parsing */
95 Emchar instring; /* -1 if not within string, else desired terminator */
96 int incomment; /* Nonzero if within a comment at end of parsing */
97 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE */
98 int quoted; /* Nonzero if just after an escape char at end of
100 Bufpos thislevelstart; /* Char number of most recent start-of-expression
102 Bufpos prevlevelstart; /* Char number of start of containing expression */
103 Bufpos location; /* Char number at which parsing stopped */
104 int mindepth; /* Minimum depth seen while scanning */
105 Bufpos comstr_start; /* Position just after last comment/string starter
106 (if the 'syntax-table text property is not
107 supported, used only for comment starts) */
108 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
109 of levels (starting from outermost). */
112 /* These variables are a cache for finding the start of a defun.
113 find_start_pos is the place for which the defun start was found.
114 find_start_value is the defun start position found for it.
115 find_start_buffer is the buffer it was found in.
116 find_start_begv is the BEGV value when it was found.
117 find_start_modiff is the value of MODIFF when it was found. */
119 static Bufpos find_start_pos;
120 static Bufpos find_start_value;
121 static struct buffer *find_start_buffer;
122 static Bufpos find_start_begv;
123 static int find_start_modiff;
125 /* Find a defun-start that is the last one before POS (or nearly the last).
126 We record what we find, so that another call in the same area
127 can return the same value right away. */
129 static Bufpos find_defun_start(struct buffer *buf, Bufpos pos)
133 /* Use previous finding, if it's valid and applies to this inquiry. */
134 if (buf == find_start_buffer
135 /* Reuse the defun-start even if POS is a little farther on.
136 POS might be in the next defun, but that's ok.
137 Our value may not be the best possible, but will still be usable. */
138 && pos <= find_start_pos + 1000
139 && pos >= find_start_value
140 && BUF_BEGV(buf) == find_start_begv
141 && BUF_MODIFF(buf) == find_start_modiff)
142 return find_start_value;
144 /* Back up to start of line. */
145 tem = find_next_newline(buf, pos, -1);
147 SCS_STATISTICS_SET_FUNCTION(scs_find_defun_start);
148 SETUP_SYNTAX_CACHE(tem, 1);
149 while (tem > BUF_BEGV(buf)) {
150 UPDATE_SYNTAX_CACHE_BACKWARD(tem);
152 /* Open-paren at start of line means we found our defun-start. */
153 if (SYNTAX_FROM_CACHE(mirrortab, BUF_FETCH_CHAR(buf, tem)) ==
156 /* Move to beg of previous line. */
157 tem = find_next_newline(buf, tem, -2);
160 /* Record what we found, for the next try. */
161 find_start_value = tem;
162 find_start_buffer = buf;
163 find_start_modiff = BUF_MODIFF(buf);
164 find_start_begv = BUF_BEGV(buf);
165 find_start_pos = pos;
167 return find_start_value;
170 DEFUN("syntax-table-p", Fsyntax_table_p, 1, 1, 0, /*
171 Return t if OBJECT is a syntax table.
172 Any vector of 256 elements will do.
176 return (CHAR_TABLEP(object)
177 && XCHAR_TABLE_TYPE(object) == CHAR_TABLE_TYPE_SYNTAX)
181 static Lisp_Object check_syntax_table(Lisp_Object obj, Lisp_Object default_)
185 while (NILP(Fsyntax_table_p(obj)))
186 obj = wrong_type_argument(Qsyntax_table_p, obj);
190 DEFUN("syntax-table", Fsyntax_table, 0, 1, 0, /*
191 Return the current syntax table.
192 This is the one specified by the current buffer, or by BUFFER if it
197 return decode_buffer(buffer, 0)->syntax_table;
200 DEFUN("standard-syntax-table", Fstandard_syntax_table, 0, 0, 0, /*
201 Return the standard syntax table.
202 This is the one used for new buffers.
206 return Vstandard_syntax_table;
209 DEFUN("copy-syntax-table", Fcopy_syntax_table, 0, 1, 0, /*
210 Return a new syntax table which is a copy of SYNTAX-TABLE.
211 SYNTAX-TABLE defaults to the standard syntax table.
215 if (NILP(Vstandard_syntax_table))
216 return Fmake_char_table(Qsyntax);
218 syntax_table = check_syntax_table(syntax_table, Vstandard_syntax_table);
219 return Fcopy_char_table(syntax_table);
222 DEFUN("set-syntax-table", Fset_syntax_table, 1, 2, 0, /*
223 Select SYNTAX-TABLE as the new syntax table for BUFFER.
224 BUFFER defaults to the current buffer if omitted.
226 (syntax_table, buffer))
228 struct buffer *buf = decode_buffer(buffer, 0);
229 syntax_table = check_syntax_table(syntax_table, Qnil);
230 buf->syntax_table = syntax_table;
231 buf->mirror_syntax_table = XCHAR_TABLE(syntax_table)->mirror_table;
232 /* Indicate that this buffer now has a specified syntax table. */
233 buf->local_var_flags |= XINT(buffer_local_flags.syntax_table);
237 /* The current syntax state */
238 struct syntax_cache syntax_cache;
241 Update syntax_cache to an appropriate setting for position POS
243 The sign of COUNT gives the relative position of POS wrt the
244 previously valid interval. (not currently used)
246 `syntax_cache.*_change' are the next and previous positions at
247 which syntax_code and c_s_t will need to be recalculated.
249 #### Currently this code uses 'get-char-property', which will
250 return the "last smallest" extent at a given position. In cases
251 where overlapping extents are defined, this code will simply use
252 whatever is returned by get-char-property.
254 It might be worth it at some point to merge provided syntax tables
255 outward to the current buffer.
258 This implementation has to rather inefficient, since it looks at
259 next-extent-change, and a heavily font-locked buffer will be rife
260 with irrelevant extents. We could do a sledgehammer check on this
261 by looking at the distribution of extent lengths. Also count up
262 cache hits and misses.
264 If we assume that syntax-table is a _text_ property (which also
265 deals with the issue of overlapping syntax-table properties), then
266 the following strategy recommends itself
267 o give the syntax cache a `valid' flag, to be reset whenever a
268 syntax-table property is added, changed, or removed; this could
269 be done by setting syntax_cache's prev_change > next_change
270 (but not compatible with using extents/markers here); if it's a
271 Lisp variable, doing it in Lisp shouldn't be too inefficient
272 o lazily initialize the cache whenever the object being examined
273 differs from the object the cache currently refers to
274 o by using {previous,next-single-property-change} we should be
275 able to get much bigger cache intervals (in most cases, the
277 o cache markers instead of positions so the mere insertion or
278 deletion of text doesn't invalidate the cache, only if it
279 involves a syntax-table property (we could also cache the
280 extents carrying the syntax-table text-property; that gives us
281 another check for invalid cache).
283 If I understand this correctly, we need to invalidate the cache in the
285 o If the referenced object changes (it's a global cache)
286 o If there are insertions or deletions of text (the positions are
287 absolute; fix: use markers or an extent instead?)
288 o If the syntax-table property is altered == added and different or
289 removed and the same (fix: probably computable from range overlap,
290 but is it worth it? would interact with ins/del); this includes
291 detachment of extents with the same value (but only the boundary
292 extents, as otherwise the range coalesces across the deletion point)
293 and attachment of extents with a different value
294 Note: the above looks a lot like what Ben has implemented in 21.5, but
295 he goes one better by making the cache buffer-local.
297 Note: cperl mode uses the text property API, not extents/overlays.
300 #ifdef SYNTAX_CACHE_STATISTICS
301 struct syntax_cache_statistics scs_statistics =
302 { 0, 0, 0, 0, -1, -1, 0.0, 0.0, scs_no_function };
304 char *syntax_cache_statistics_function_names[scs_number_of_functions] = {
310 "Fbackward_prefix_characters",
313 #endif /* SYNTAX_CACHE_STATISTICS */
315 void update_syntax_cache(int pos, int count)
317 Lisp_Object tmp_table;
319 #ifdef SYNTAX_CACHE_STATISTICS
320 if (scs_statistics.total_updates == 0) {
322 for (i = 0; i < scs_number_of_functions; ++i)
323 scs_statistics.functions[i] = 0;
325 if (syntax_cache.prev_change > syntax_cache.next_change)
326 scs_statistics.inits++;
327 else if (pos < syntax_cache.prev_change)
328 scs_statistics.misses_lo++;
329 else if (pos >= syntax_cache.next_change)
330 scs_statistics.misses_hi++;
331 #endif /* SYNTAX_CACHE_STATISTICS */
333 /* #### Since font-lock undoes any narrowing, maybe the BUF_ZV and
334 BUF_BEGV below should be BUF_Z and BUF_BEG respectively? */
335 if (BUFFERP(syntax_cache.object)) {
336 int get_change_before = pos + 1;
338 tmp_table = Fget_char_property(make_int(pos), Qsyntax_table,
339 syntax_cache.object, Qnil);
340 #if NEXT_SINGLE_PROPERTY_CHANGE
341 /* #### shouldn't we be using BUF_BEGV here? */
342 syntax_cache.next_change =
343 XINT(Fnext_single_property_change
344 (make_int(pos > 0 ? pos : 1), Qsyntax_table,
346 make_int(BUF_ZV(syntax_cache.buffer))));
348 syntax_cache.next_change =
349 XINT(Fnext_extent_change(make_int(pos > 0 ? pos : 1),
350 syntax_cache.object));
353 /* #### shouldn't we be using BUF_BEGV here? */
354 if (get_change_before < 1)
355 get_change_before = 1;
356 else if (get_change_before > BUF_ZV(syntax_cache.buffer))
357 get_change_before = BUF_ZV(syntax_cache.buffer);
359 #if PREVIOUS_SINGLE_PROPERTY_CHANGE
360 /* #### shouldn't we be using BUF_BEGV here? */
361 syntax_cache.prev_change =
362 XINT(Fprevious_single_property_change
363 (make_int(get_change_before), Qsyntax_table,
364 syntax_cache.object, make_int(1)));
366 syntax_cache.prev_change =
367 XINT(Fprevious_extent_change(make_int(get_change_before),
368 syntax_cache.object));
370 } else if (STRINGP(syntax_cache.object)) {
371 int get_change_before = pos + 1;
373 tmp_table = Fget_char_property(make_int(pos), Qsyntax_table,
374 syntax_cache.object, Qnil);
375 #if NEXT_SINGLE_PROPERTY_CHANGE
376 /* #### shouldn't we be using BUF_BEGV here? */
377 syntax_cache.next_change =
378 XINT(Fnext_single_property_change
379 (make_int(pos >= 0 ? pos : 0), Qsyntax_table,
381 make_int(XSTRING_LENGTH(syntax_cache.object))));
383 syntax_cache.next_change =
384 XINT(Fnext_extent_change(make_int(pos >= 0 ? pos : 0),
385 syntax_cache.object));
388 if (get_change_before < 0)
389 get_change_before = 0;
390 else if (get_change_before >
391 XSTRING_LENGTH(syntax_cache.object))
392 get_change_before = XSTRING_LENGTH(syntax_cache.object);
394 #if PREVIOUS_SINGLE_PROPERTY_CHANGE
395 syntax_cache.prev_change =
396 XINT(Fprevious_single_property_change
397 (make_int(get_change_before), Qsyntax_table,
398 syntax_cache.object, make_int(0)));
400 syntax_cache.prev_change =
401 XINT(Fprevious_extent_change(make_int(get_change_before),
402 syntax_cache.object));
405 /* silence compiler */
408 * #### Is there another sensible thing to do here? */
409 assert(BUFFERP(syntax_cache.object) ||
410 STRINGP(syntax_cache.object));
413 if (EQ(Fsyntax_table_p(tmp_table), Qt)) {
414 syntax_cache.use_code = 0;
415 syntax_cache.current_syntax_table =
416 XCHAR_TABLE(tmp_table)->mirror_table;
417 } else if (CONSP(tmp_table) && INTP(XCAR(tmp_table))) {
418 syntax_cache.use_code = 1;
419 /* we CANNOT cast the following, XINT goes to long int
420 * while enums are usually just unsigned ints
421 * this'll trigger a warning on 64b machines */
422 syntax_cache.syntax_code = XINT(XCAR(tmp_table));
424 syntax_cache.use_code = 0;
425 syntax_cache.current_syntax_table =
426 syntax_cache.buffer->mirror_syntax_table;
429 #ifdef SYNTAX_CACHE_STATISTICS
432 syntax_cache.next_change - syntax_cache.prev_change;
434 scs_statistics.misses_lo + scs_statistics.misses_hi +
435 scs_statistics.inits;
437 if (scs_statistics.min_length == -1
438 || scs_statistics.min_length > length)
439 scs_statistics.min_length = length;
440 if (scs_statistics.max_length == -1
441 || scs_statistics.max_length < length)
442 scs_statistics.max_length = length;
443 scs_statistics.mean_length_on_miss =
444 ((misses - 1) * scs_statistics.mean_length_on_miss +
448 scs_statistics.mean_length
449 = scs_statistics.total_updates * scs_statistics.mean_length
450 + syntax_cache.next_change - syntax_cache.prev_change;
451 scs_statistics.total_updates++;
452 scs_statistics.mean_length /= scs_statistics.total_updates;
454 if (scs_statistics.this_function != scs_no_function) {
455 scs_statistics.functions[scs_statistics.this_function]++;
456 scs_statistics.this_function = scs_no_function;
460 (scs_statistics.total_updates %
461 SYNTAX_CACHE_STATISTICS_REPORT_INTERVAL)) {
462 fprintf(stderr, "Syntax cache stats:\n ");
464 "updates %d, inits %d, misses low %d, misses high %d,",
465 scs_statistics.total_updates, scs_statistics.inits,
466 scs_statistics.misses_lo, scs_statistics.misses_hi);
467 fprintf(stderr, "\n ");
469 #define REPORT_FUNCTION(i) \
470 fprintf (stderr, " %s %d,", \
471 syntax_cache_statistics_function_names[i], \
472 scs_statistics.functions[i]);
474 REPORT_FUNCTION(scs_find_context);
475 REPORT_FUNCTION(scs_find_defun_start);
476 REPORT_FUNCTION(scs_scan_words);
477 REPORT_FUNCTION(scs_Fforward_comment);
478 fprintf(stderr, "\n ");
479 REPORT_FUNCTION(scs_scan_lists);
480 REPORT_FUNCTION(scs_Fbackward_prefix_characters);
481 REPORT_FUNCTION(scs_scan_sexps_forward);
482 #undef REPORT_FUNCTION
484 fprintf(stderr, "\n min length %d, max length %d,",
485 scs_statistics.min_length, scs_statistics.max_length);
487 "\n mean length %.1f, mean length on miss %.1f\n",
488 scs_statistics.mean_length,
489 scs_statistics.mean_length_on_miss);
491 #endif /* SYNTAX_CACHE_STATISTICS */
494 /* Convert a letter which signifies a syntax code
495 into the code it signifies.
496 This is used by modify-syntax-entry, and other things. */
498 const unsigned char syntax_spec_code[0400] =
499 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
500 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
501 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
502 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
503 (char)Swhitespace, 0377, (char)Sstring, 0377,
504 (char)Smath, 0377, 0377, (char)Squote,
505 (char)Sopen, (char)Sclose, 0377, 0377,
506 0377, (char)Swhitespace, (char)Spunct, (char)Scharquote,
507 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
508 0377, 0377, 0377, 0377,
509 (char)Scomment, 0377, (char)Sendcomment, 0377,
510 (char)Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
511 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
512 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char)Sword,
513 0377, 0377, 0377, 0377, (char)Sescape, 0377, 0377, (char)Ssymbol,
514 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
515 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
516 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char)Sword,
517 0377, 0377, 0377, 0377, (char)Sstring_fence, 0377, 0377, 0377
520 unsigned char syntax_code_spec[] = " .w_()'\"$\\/<>@!|";
522 DEFUN("syntax-designator-chars", Fsyntax_designator_chars, 0, 0, 0, /*
523 Return a string of the recognized syntax designator chars.
524 The chars are ordered by their internal syntax codes, which are
525 numbered starting at 0.
529 return Vsyntax_designator_chars_string;
532 DEFUN("char-syntax", Fchar_syntax, 1, 2, 0, /*
533 Return the syntax code of CHARACTER, described by a character.
534 For example, if CHARACTER is a word constituent,
535 the character `?w' is returned.
536 The characters that correspond to various syntax codes
537 are listed in the documentation of `modify-syntax-entry'.
538 Optional second argument SYNTAX-TABLE defaults to the current buffer's
541 (character, syntax_table))
543 Lisp_Char_Table *mirrortab;
545 if (NILP(character)) {
546 character = make_char('\000');
548 CHECK_CHAR_COERCE_INT(character);
550 check_syntax_table(syntax_table, current_buffer->syntax_table);
551 mirrortab = XCHAR_TABLE(XCHAR_TABLE(syntax_table)->mirror_table);
553 make_char(syntax_code_spec
554 [(int)SYNTAX(mirrortab, XCHAR(character))]);
560 charset_syntax(struct buffer *buf, Lisp_Object charset, int *multi_p_out)
563 /* #### get this right */
569 Lisp_Object syntax_match(Lisp_Object syntax_table, Emchar ch)
571 Lisp_Object code = XCHAR_TABLE_VALUE_UNSAFE(syntax_table, ch);
572 Lisp_Object code2 = code;
576 if (SYNTAX_FROM_CODE(XINT(code2)) == Sinherit)
577 code = XCHAR_TABLE_VALUE_UNSAFE(Vstandard_syntax_table, ch);
579 return CONSP(code) ? XCDR(code) : Qnil;
582 DEFUN("matching-paren", Fmatching_paren, 1, 2, 0, /*
583 Return the matching parenthesis of CHARACTER, or nil if none.
584 Optional second argument SYNTAX-TABLE defaults to the current buffer's
587 (character, syntax_table))
589 Lisp_Char_Table *mirrortab;
592 CHECK_CHAR_COERCE_INT(character);
594 check_syntax_table(syntax_table, current_buffer->syntax_table);
595 mirrortab = XCHAR_TABLE(XCHAR_TABLE(syntax_table)->mirror_table);
596 code = SYNTAX(mirrortab, XCHAR(character));
597 if (code == Sopen || code == Sclose || code == Sstring)
598 return syntax_match(syntax_table, XCHAR(character));
603 /* Return 1 if there is a word boundary between two word-constituent
604 characters C1 and C2 if they appear in this order, else return 0.
605 There is no word boundary between two word-constituent ASCII
607 #define WORD_BOUNDARY_P(c1, c2) \
608 (!(CHAR_ASCII_P (c1) && CHAR_ASCII_P (c2)) \
609 && word_boundary_p (c1, c2))
611 extern int word_boundary_p(Emchar c1, Emchar c2);
614 /* Return the position across COUNT words from FROM.
615 If that many words cannot be found before the end of the buffer, return 0.
616 COUNT negative means scan backward and stop at word beginning. */
618 Bufpos scan_words(struct buffer *buf, Bufpos from, int count)
620 Bufpos limit = count > 0 ? BUF_ZV(buf) : BUF_BEGV(buf);
622 enum syntaxcode code;
624 SCS_STATISTICS_SET_FUNCTION(scs_scan_words);
625 SETUP_SYNTAX_CACHE_FOR_BUFFER(buf, from, count);
627 /* #### is it really worth it to hand expand both cases? JV */
635 UPDATE_SYNTAX_CACHE_FORWARD(from);
636 ch0 = BUF_FETCH_CHAR(buf, from);
637 code = SYNTAX_FROM_CACHE(mirrortab, ch0);
640 if (words_include_escapes
641 && (code == Sescape || code == Scharquote))
649 while (from != limit) {
650 UPDATE_SYNTAX_CACHE_FORWARD(from);
651 ch1 = BUF_FETCH_CHAR(buf, from);
652 code = SYNTAX_FROM_CACHE(mirrortab, ch1);
653 if (!(words_include_escapes
654 && (code == Sescape || code == Scharquote)))
657 || WORD_BOUNDARY_P(ch0, ch1)
676 UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
677 ch1 = BUF_FETCH_CHAR(buf, from - 1);
678 code = SYNTAX_FROM_CACHE(mirrortab, ch1);
681 if (words_include_escapes
682 && (code == Sescape || code == Scharquote))
690 while (from != limit) {
691 UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
692 ch0 = BUF_FETCH_CHAR(buf, from - 1);
693 code = SYNTAX_FROM_CACHE(mirrortab, ch0);
695 if (!(words_include_escapes
696 && (code == Sescape || code == Scharquote)))
699 || WORD_BOUNDARY_P(ch0, ch1)
714 DEFUN("forward-word", Fforward_word, 0, 2, "_p", /*
715 Move point forward COUNT words (backward if COUNT is negative).
716 Normally t is returned, but if an edge of the buffer is reached,
717 point is left there and nil is returned.
719 The characters that are moved over may be added to the current selection
720 \(i.e. active region) if the Shift key is held down, a motion key is used
721 to invoke this command, and `shifted-motion-keys-select-region' is t; see
722 the documentation for this variable for more details.
724 COUNT defaults to 1, and BUFFER defaults to the current buffer.
729 struct buffer *buf = decode_buffer(buffer, 0);
739 val = scan_words(buf, BUF_PT(buf), n);
741 BUF_SET_PT(buf, val);
744 BUF_SET_PT(buf, n > 0 ? BUF_ZV(buf) : BUF_BEGV(buf));
749 static void scan_sexps_forward(struct buffer *buf,
750 struct lisp_parse_state *,
751 Bufpos from, Bufpos end,
752 int targetdepth, int stopbefore,
753 Lisp_Object oldstate, int commentstop);
756 find_start_of_comment(struct buffer *buf, Bufpos from, Bufpos stop,
760 enum syntaxcode code;
762 /* Look back, counting the parity of string-quotes,
763 and recording the comment-starters seen.
764 When we reach a safe place, assume that's not in a string;
765 then step the main scan to the earliest comment-starter seen
766 an even number of string quotes away from the safe place.
768 OFROM[I] is position of the earliest comment-starter seen
769 which is I+2X quotes from the comment-end.
770 PARITY is current parity of quotes from the comment end. */
772 Emchar my_stringend = 0;
773 int string_lossage = 0;
774 Bufpos comment_end = from;
775 Bufpos comstart_pos = 0;
776 int comstart_parity = 0;
777 int styles_match_p = 0;
778 /* mask to match comment styles against; for ST_COMMENT_STYLE, this
779 will get set to SYNTAX_COMMENT_STYLE_B, but never get checked */
780 int mask = comstyle ? SYNTAX_COMMENT_STYLE_B : SYNTAX_COMMENT_STYLE_A;
782 /* At beginning of range to scan, we're outside of strings;
783 that determines quote parity to the comment-end. */
784 while (from != stop) {
787 /* Move back and examine a character. */
789 UPDATE_SYNTAX_CACHE_BACKWARD(from);
791 c = BUF_FETCH_CHAR(buf, from);
792 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
793 code = SYNTAX_FROM_CODE(syncode);
795 /* is this a 1-char comment end sequence? if so, try
796 to see if style matches previously extracted mask */
797 if (code == Sendcomment) {
798 /* MT had SYNTAX_CODE_COMMENT_1CHAR_MASK (syncode) & mask
799 but (as a Boolean) that's just a complicated way to write: */
801 SYNTAX_CODE_MATCHES_1CHAR_P(syncode, mask);
804 /* or are we looking at a 1-char comment start sequence
805 of the style matching mask? */
806 else if (code == Scomment) {
808 SYNTAX_CODE_MATCHES_1CHAR_P(syncode, mask);
811 /* otherwise, is this a 2-char comment end or start sequence? */
812 else if (from > stop)
814 /* 2-char comment end sequence? */
815 if (SYNTAX_CODE_END_SECOND_P(syncode)) {
817 UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
819 SYNTAX_CODE_FROM_CACHE(mirrortab,
824 if (SYNTAX_CODES_END_P
825 (prev_syncode, syncode)) {
828 SYNTAX_CODES_MATCH_END_P
829 (prev_syncode, syncode,
832 UPDATE_SYNTAX_CACHE_BACKWARD
834 c = BUF_FETCH_CHAR(buf, from);
836 /* Found a comment-end sequence, so skip past the
837 check for a comment-start */
842 /* 2-char comment start sequence? */
843 if (SYNTAX_CODE_START_SECOND_P(syncode)) {
845 UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
847 SYNTAX_CODE_FROM_CACHE(mirrortab,
852 if (SYNTAX_CODES_START_P
853 (prev_syncode, syncode)) {
856 SYNTAX_CODES_MATCH_START_P
857 (prev_syncode, syncode,
860 UPDATE_SYNTAX_CACHE_BACKWARD
862 c = BUF_FETCH_CHAR(buf, from);
867 /* Ignore escaped characters. */
868 if (char_quoted(buf, from))
871 /* Track parity of quotes. */
872 if (code == Sstring) {
874 if (my_stringend == 0)
876 /* If we have two kinds of string delimiters.
877 There's no way to grok this scanning backwards. */
878 else if (my_stringend != c)
882 if (code == Sstring_fence || code == Scomment_fence) {
884 if (my_stringend == 0)
887 Sstring_fence ? ST_STRING_STYLE :
889 /* If we have two kinds of string delimiters.
890 There's no way to grok this scanning backwards. */
891 else if (my_stringend != (code == Sstring_fence
897 /* Record comment-starters according to that
898 quote-parity to the comment-end. */
899 if (code == Scomment && styles_match_p) {
900 comstart_parity = parity;
904 /* If we find another earlier comment-ender,
905 any comment-starts earlier than that don't count
906 (because they go with the earlier comment-ender). */
907 if (code == Sendcomment && styles_match_p)
910 /* Assume a defun-start point is outside of strings. */
912 && (from == stop || BUF_FETCH_CHAR(buf, from - 1) == '\n'))
916 if (comstart_pos == 0)
918 /* If the earliest comment starter
919 is followed by uniform paired string quotes or none,
920 we know it can't be inside a string
921 since if it were then the comment ender would be inside one.
922 So it does start a comment. Skip back to it. */
923 else if (comstart_parity == 0 && !string_lossage)
926 /* We had two kinds of string delimiters mixed up
927 together. Decode this going forwards.
928 Scan fwd from the previous comment ender
929 to the one in question; this records where we
930 last passed a comment starter. */
932 struct lisp_parse_state state;
933 scan_sexps_forward(buf, &state,
934 find_defun_start(buf, comment_end),
935 comment_end - 1, -10000, 0, Qnil, 0);
937 from = state.comstr_start;
939 /* We can't grok this as a comment; scan it normally. */
941 UPDATE_SYNTAX_CACHE_FORWARD(from - 1);
947 find_end_of_comment(struct buffer *buf, Bufpos from, Bufpos stop, int comstyle)
951 enum syntaxcode code, next_code;
952 /* mask to match comment styles against; for ST_COMMENT_STYLE, this
953 will get set to SYNTAX_COMMENT_STYLE_B, but never get checked */
954 int mask = comstyle ? SYNTAX_COMMENT_STYLE_B : SYNTAX_COMMENT_STYLE_A;
956 /* This is only called by functions which have already set up the
957 syntax_cache and are keeping it up-to-date */
963 UPDATE_SYNTAX_CACHE_FORWARD(from);
964 c = BUF_FETCH_CHAR(buf, from);
965 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
966 code = SYNTAX_FROM_CODE(syncode);
969 UPDATE_SYNTAX_CACHE_FORWARD(from);
971 /* At end of current generic comment? */
972 if (comstyle == ST_COMMENT_STYLE) {
973 if (code == Scomment_fence)
976 continue; /* Ignore other styles in generic comments */
978 /* At end of current one-character comment of specified style? */
979 else if (code == Sendcomment &&
980 SYNTAX_CODE_MATCHES_1CHAR_P(syncode, mask)) {
981 /* pre-MT code effectively does from-- here, that seems wrong */
985 /* At end of current two-character comment of specified style? */
986 c = BUF_FETCH_CHAR(buf, from);
987 next_code = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
989 && SYNTAX_CODES_MATCH_END_P(syncode, next_code, mask)) {
991 UPDATE_SYNTAX_CACHE_FORWARD(from);
998 /* #### between FSF 19.23 and 19.28 there are some changes to the logic
999 in this function (and minor changes to find_start_of_comment(),
1000 above, which is part of Fforward_comment() in FSF). Attempts to port
1001 that logic made this function break, so I'm leaving it out. If anyone
1002 ever complains about this function not working properly, take a look
1003 at those changes. --ben */
1005 DEFUN("forward-comment", Fforward_comment, 0, 2, 0, /*
1006 Move forward across up to COUNT comments, or backwards if COUNT is negative.
1007 Stop scanning if we find something other than a comment or whitespace.
1008 Set point to where scanning stops.
1009 If COUNT comments are found as expected, with nothing except whitespace
1010 between them, return t; otherwise return nil.
1011 Point is set in either case.
1012 COUNT defaults to 1, and BUFFER defaults to the current buffer.
1019 enum syntaxcode code;
1022 struct buffer *buf = decode_buffer(buffer, 0);
1033 SCS_STATISTICS_SET_FUNCTION(scs_Fforward_comment);
1034 SETUP_SYNTAX_CACHE(from, n);
1039 while (from < stop) {
1040 int comstyle = 0; /* Code for comment style: 0 for A, 1 for B,
1041 or ST_COMMENT_STYLE */
1043 if (char_quoted(buf, from)) {
1048 UPDATE_SYNTAX_CACHE_FORWARD(from);
1049 c = BUF_FETCH_CHAR(buf, from);
1050 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
1051 code = SYNTAX_FROM_CODE(syncode);
1053 if (code == Scomment) {
1054 /* we have encountered a single character comment start
1055 sequence, and we are ignoring all text inside comments.
1056 we must record the comment style this character begins
1057 so that later, only a comment end of the same style actually
1058 ends the comment section */
1060 SYNTAX_CODE_COMMENT_1CHAR_MASK(syncode)
1061 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1064 else if (code == Scomment_fence) {
1067 comstyle = ST_COMMENT_STYLE;
1070 else if (from < stop
1071 && SYNTAX_CODE_START_FIRST_P(syncode)) {
1073 UPDATE_SYNTAX_CACHE_FORWARD(from + 1);
1075 SYNTAX_CODE_FROM_CACHE(mirrortab,
1080 if (SYNTAX_CODES_START_P(syncode, next_syncode)) {
1081 /* we have encountered a 2char comment start sequence and we
1082 are ignoring all text inside comments. we must record
1083 the comment style this sequence begins so that later,
1084 only a comment end of the same style actually ends
1085 the comment section */
1088 SYNTAX_CODES_COMMENT_MASK_START
1089 (syncode, next_syncode)
1090 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1095 if (code == Scomment) {
1097 find_end_of_comment(buf, from, stop,
1100 /* we stopped because from==stop */
1101 BUF_SET_PT(buf, stop);
1106 /* We have skipped one comment. */
1108 } else if (code != Swhitespace
1109 && code != Sendcomment && code != Scomment) {
1110 BUF_SET_PT(buf, from);
1116 /* End of comment reached */
1123 stop = BUF_BEGV(buf);
1124 while (from > stop) {
1125 int comstyle = 0; /* Code for comment style: 0 for A, 1 for B,
1126 or ST_COMMENT_STYLE */
1129 if (char_quoted(buf, from)) {
1134 c = BUF_FETCH_CHAR(buf, from);
1135 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
1136 code = SYNTAX_FROM_CODE(syncode);
1138 if (code == Sendcomment) {
1139 /* we have found a single char end comment. we must record
1140 the comment style encountered so that later, we can match
1141 only the proper comment begin sequence of the same style */
1143 SYNTAX_CODE_COMMENT_1CHAR_MASK(syncode)
1144 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1147 else if (code == Scomment_fence) {
1149 comstyle = ST_COMMENT_STYLE;
1152 else if (from > stop
1153 /* #### This seems logical but it's not in 21.4.9 */
1154 /* && !char_quoted (buf, from - 1) */
1155 && SYNTAX_CODE_END_SECOND_P(syncode)) {
1157 UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
1159 SYNTAX_CODE_FROM_CACHE(mirrortab,
1163 if (SYNTAX_CODES_END_P(prev_syncode, syncode)) {
1164 /* We must record the comment style encountered so that
1165 later, we can match only the proper comment begin
1166 sequence of the same style. */
1169 SYNTAX_CODES_COMMENT_MASK_END
1170 (prev_syncode, syncode)
1171 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1176 if (code == Sendcomment) {
1178 find_start_of_comment(buf, from, stop,
1183 else if (code != Swhitespace
1184 && code != Scomment && code != Sendcomment) {
1185 BUF_SET_PT(buf, from + 1);
1193 BUF_SET_PT(buf, from);
1198 scan_lists(struct buffer * buf, Bufpos from, int count, int depth,
1199 int sexpflag, int noerror)
1205 enum syntaxcode code;
1207 int min_depth = depth; /* Err out if depth gets less than this. */
1212 SCS_STATISTICS_SET_FUNCTION(scs_scan_lists);
1213 SETUP_SYNTAX_CACHE_FOR_BUFFER(buf, from, count);
1218 while (from < stop) {
1219 int comstyle = 0; /* mask for finding matching comment style */
1220 Emchar stringterm = '\0'; /* Used by Sstring case in switch */
1222 UPDATE_SYNTAX_CACHE_FORWARD(from);
1223 c = BUF_FETCH_CHAR(buf, from);
1224 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
1225 code = SYNTAX_FROM_CODE(syncode);
1228 /* a 1-char comment start sequence */
1229 if (code == Scomment && parse_sexp_ignore_comments) {
1231 SYNTAX_CODE_COMMENT_1CHAR_MASK(syncode) ==
1232 SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1235 /* else, a 2-char comment start sequence? */
1236 else if (from < stop
1237 && SYNTAX_CODE_START_FIRST_P(syncode)
1238 && parse_sexp_ignore_comments) {
1240 UPDATE_SYNTAX_CACHE_FORWARD(from);
1241 next_syncode = SYNTAX_CODE_FROM_CACHE(
1242 mirrortab, BUF_FETCH_CHAR(buf, from));
1244 if (SYNTAX_CODES_START_P(syncode,
1246 /* we have encountered a comment start
1247 sequence and we are ignoring all text
1248 inside comments. we must record the
1249 comment style this sequence begins so
1250 that later, only a comment end of the
1251 same style actually ends the comment
1255 SYNTAX_CODES_COMMENT_MASK_START
1256 (syncode, next_syncode)
1257 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1261 UPDATE_SYNTAX_CACHE_FORWARD(from);
1263 if (SYNTAX_CODE_PREFIX(syncode))
1272 /* treat following character as a word
1276 if (depth || !sexpflag)
1278 /* This word counts as a sexp; return at end of
1280 while (from < stop) {
1281 UPDATE_SYNTAX_CACHE_FORWARD(from);
1282 switch ((unsigned int)
1304 case Scomment_fence:
1305 comstyle = ST_COMMENT_STYLE;
1306 /* falls through! */
1308 if (!parse_sexp_ignore_comments)
1310 UPDATE_SYNTAX_CACHE_FORWARD(from);
1313 find_end_of_comment(buf, from, stop,
1316 /* we stopped because from ==
1317 stop in search forward */
1331 && c == BUF_FETCH_CHAR(buf, from))
1348 if (depth < min_depth) {
1351 error("Containing expression "
1352 "ends prematurely");
1358 /* XEmacs change: call syntax_match on
1361 BUF_FETCH_CHAR(buf, from - 1);
1362 Lisp_Object stermobj =
1363 syntax_match(syntax_cache.
1364 current_syntax_table,
1367 if (CHARP(stermobj))
1368 stringterm = XCHAR(stermobj);
1372 /* falls through! */
1377 UPDATE_SYNTAX_CACHE_FORWARD(from);
1378 c = BUF_FETCH_CHAR(buf, from);
1381 : SYNTAX_FROM_CACHE(mirrortab,
1386 switch ((unsigned int)
1399 if (!depth && sexpflag)
1403 /* list them all here */
1416 /* Reached end of buffer. Error if within object,
1417 return nil if between */
1423 /* End of object reached */
1431 stop = BUF_BEGV(buf);
1432 while (from > stop) {
1433 /* mask for finding matching comment style */
1435 /* used by case Sstring in switch below */
1436 Emchar stringterm = '\0';
1439 UPDATE_SYNTAX_CACHE_BACKWARD(from);
1440 quoted = char_quoted(buf, from);
1443 UPDATE_SYNTAX_CACHE_BACKWARD(from);
1446 c = BUF_FETCH_CHAR(buf, from);
1447 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
1448 code = SYNTAX_FROM_CODE(syncode);
1450 if (code == Sendcomment && parse_sexp_ignore_comments) {
1451 /* we have found a single char end comment. we
1452 must record the comment style encountered so
1453 that later, we can match only the proper
1454 comment begin sequence of the same style */
1456 SYNTAX_CODE_COMMENT_1CHAR_MASK(syncode)
1457 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1460 else if (from > stop
1461 && SYNTAX_CODE_END_SECOND_P(syncode)
1462 && !char_quoted(buf, from - 1)
1463 && parse_sexp_ignore_comments) {
1465 UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
1466 prev_syncode = SYNTAX_CODE_FROM_CACHE
1467 (mirrortab, BUF_FETCH_CHAR(buf, from - 1));
1469 if (SYNTAX_CODES_END_P(prev_syncode, syncode)) {
1470 /* we must record the comment style
1471 encountered so that later, we can
1472 match only the proper comment begin
1473 sequence of the same style */
1476 SYNTAX_CODES_COMMENT_MASK_END
1477 (prev_syncode, syncode)
1478 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1483 if (SYNTAX_CODE_PREFIX(syncode)) {
1487 switch (quoted ? Sword : code) {
1490 if (depth || !sexpflag)
1492 /* This word counts as a sexp; count object
1493 finished after passing it. */
1494 while (from > stop) {
1495 /* enum syntaxcode syncode; */
1496 UPDATE_SYNTAX_CACHE_BACKWARD(from);
1497 quoted = char_quoted(buf, from - 1);
1503 SYNTAX_FROM_CACHE(mirrortab,
1507 == Sword || syncode == Ssymbol
1508 || syncode == Squote))
1518 && c == BUF_FETCH_CHAR(buf, from - 1))
1535 if (depth < min_depth) {
1539 error("Containing expression "
1540 "ends prematurely");
1544 case Scomment_fence:
1545 comstyle = ST_COMMENT_STYLE;
1546 /* falls through! */
1548 if (parse_sexp_ignore_comments)
1550 find_start_of_comment(buf, from,
1556 /* XEmacs change: call syntax_match() on
1558 Emchar ch = BUF_FETCH_CHAR(buf, from);
1559 Lisp_Object stermobj =
1560 syntax_match(syntax_cache.
1561 current_syntax_table, ch);
1562 if (CHARP(stermobj)) {
1563 stringterm = XCHAR(stermobj);
1569 /* falls through! */
1575 UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
1576 c = BUF_FETCH_CHAR(buf, from - 1);
1577 if ((code == Sstring
1579 : SYNTAX_FROM_CACHE(mirrortab,
1582 && !char_quoted(buf, from - 1)) {
1589 if (!depth && sexpflag)
1594 /* shouldnt happen */
1599 /* Reached start of buffer. Error if within object,
1600 return nil if between */
1610 return (make_int(from));
1614 error("Unbalanced parentheses");
1618 int char_quoted(struct buffer *buf, Bufpos pos)
1620 enum syntaxcode code;
1621 Bufpos beg = BUF_BEGV(buf);
1623 Bufpos startpos = pos;
1626 UPDATE_SYNTAX_CACHE_BACKWARD(pos - 1);
1628 SYNTAX_FROM_CACHE(mirrortab, BUF_FETCH_CHAR(buf, pos - 1));
1630 if (code != Scharquote && code != Sescape)
1636 UPDATE_SYNTAX_CACHE(startpos);
1640 DEFUN("scan-lists", Fscan_lists, 3, 5, 0, /*
1641 Scan from character number FROM by COUNT lists.
1642 Returns the character number of the position thus found.
1644 If DEPTH is nonzero, paren depth begins counting from that value,
1645 only places where the depth in parentheses becomes zero
1646 are candidates for stopping; COUNT such places are counted.
1647 Thus, a positive value for DEPTH means go out levels.
1649 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1651 If the beginning or end of (the accessible part of) the buffer is reached
1652 and the depth is wrong, an error is signaled.
1653 If the depth is right but the count is not used up, nil is returned.
1655 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1656 of in the current buffer.
1658 If optional arg NOERROR is non-nil, scan-lists will return nil instead of
1659 signalling an error.
1661 (from, count, depth, buffer, noerror))
1668 buf = decode_buffer(buffer, 0);
1670 return scan_lists(buf, XINT(from), XINT(count), XINT(depth), 0,
1674 DEFUN("scan-sexps", Fscan_sexps, 2, 4, 0, /*
1675 Scan from character number FROM by COUNT balanced expressions.
1676 If COUNT is negative, scan backwards.
1677 Returns the character number of the position thus found.
1679 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1681 If the beginning or end of (the accessible part of) the buffer is reached
1682 in the middle of a parenthetical grouping, an error is signaled.
1683 If the beginning or end is reached between groupings
1684 but before count is used up, nil is returned.
1686 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1687 of in the current buffer.
1689 If optional arg NOERROR is non-nil, scan-sexps will return nil instead of
1690 signalling an error.
1692 (from, count, buffer, noerror))
1694 struct buffer *buf = decode_buffer(buffer, 0);
1698 return scan_lists(buf, XINT(from), XINT(count), 0, 1, !NILP(noerror));
1701 DEFUN("backward-prefix-chars", Fbackward_prefix_chars, 0, 1, 0, /*
1702 Move point backward over any number of chars with prefix syntax.
1703 This includes chars with "quote" or "prefix" syntax (' or p).
1705 Optional arg BUFFER defaults to the current buffer.
1709 struct buffer *buf = decode_buffer(buffer, 0);
1710 Bufpos beg = BUF_BEGV(buf);
1711 Bufpos pos = BUF_PT(buf);
1713 Lisp_Char_Table *mirrortab = XCHAR_TABLE(buf->mirror_syntax_table);
1715 Emchar c = '\0'; /* initialize to avoid compiler warnings */
1717 SCS_STATISTICS_SET_FUNCTION(scs_Fbackward_prefix_characters);
1718 SETUP_SYNTAX_CACHE_FOR_BUFFER(buf, pos, -1);
1720 while (pos > beg && !char_quoted(buf, pos - 1)
1721 /* Previous statement updates syntax table. */
1723 (SYNTAX_FROM_CACHE(mirrortab, c = BUF_FETCH_CHAR(buf, pos - 1))
1725 /* equivalent to SYNTAX_PREFIX (mirrortab, c) */
1726 || SYNTAX_CODE_PREFIX(SYNTAX_CODE_FROM_CACHE(mirrortab, c))))
1729 BUF_SET_PT(buf, pos);
1734 /* Parse forward from FROM to END,
1735 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1736 and return a description of the state of the parse at END.
1737 If STOPBEFORE is nonzero, stop at the start of an atom.
1738 If COMMENTSTOP is 1, stop at the start of a comment; if it is -1,
1739 stop at the start of a comment or a string */
1742 scan_sexps_forward(struct buffer *buf, struct lisp_parse_state *stateptr,
1743 Bufpos from, Bufpos end,
1744 int targetdepth, int stopbefore,
1745 Lisp_Object oldstate, int commentstop)
1747 struct lisp_parse_state state;
1749 enum syntaxcode code;
1753 struct level levelstart[100];
1754 struct level *curlevel = levelstart;
1755 struct level *endlevel = levelstart + 100;
1756 int depth; /* Paren depth of current scanning location.
1757 level - levelstart equals this except
1758 when the depth becomes negative. */
1759 int mindepth; /* Lowest DEPTH value seen. */
1760 int start_quoted = 0; /* Nonzero means starting after a char quote */
1761 int boundary_stop = commentstop == -1;
1764 SCS_STATISTICS_SET_FUNCTION(scs_scan_sexps_forward);
1765 SETUP_SYNTAX_CACHE(from, 1);
1766 if (NILP(oldstate)) {
1768 state.instring = -1;
1769 state.incomment = 0;
1770 state.comstyle = 0; /* comment style a by default */
1771 state.comstr_start = -1; /* no comment/string seen. */
1773 tem = Fcar(oldstate); /* elt 0, depth */
1779 oldstate = Fcdr(oldstate);
1780 oldstate = Fcdr(oldstate);
1781 oldstate = Fcdr(oldstate);
1782 tem = Fcar(oldstate); /* elt 3, instring */
1783 state.instring = (!NILP(tem)
1784 ? (INTP(tem) ? XINT(tem) : ST_STRING_STYLE)
1787 oldstate = Fcdr(oldstate);
1788 tem = Fcar(oldstate); /* elt 4, incomment */
1789 state.incomment = !NILP(tem);
1791 oldstate = Fcdr(oldstate);
1792 tem = Fcar(oldstate); /* elt 5, follows-quote */
1793 start_quoted = !NILP(tem);
1795 /* if the eighth element of the list is nil, we are in comment style
1796 a; if it is t, we are in comment style b; if it is 'syntax-table,
1797 we are in a generic comment */
1798 oldstate = Fcdr(oldstate);
1799 oldstate = Fcdr(oldstate);
1800 /* The code below was changed radically for syntax-table properties.
1801 A reasonable place to look if a bug manifests. */
1802 tem = Fcar(oldstate); /* elt 7, comment style a/b/fence */
1803 state.comstyle = NILP(tem) ? 0 : (EQ(tem, Qsyntax_table)
1804 ? ST_COMMENT_STYLE : 1);
1806 oldstate = Fcdr(oldstate); /* elt 8, start of last comment/string */
1807 tem = Fcar(oldstate);
1808 state.comstr_start = NILP(tem) ? -1 : XINT(tem);
1810 /* elt 9, char numbers of starts-of-expression of levels
1811 (starting from outermost). */
1812 oldstate = Fcdr(oldstate);
1813 tem = Fcar(oldstate); /* elt 9, intermediate data for
1814 continuation of parsing (subject
1816 while (!NILP(tem)) { /* >= second enclosing sexps. */
1817 curlevel->last = XINT(Fcar(tem));
1818 if (++curlevel == endlevel)
1819 error("Nesting too deep for parser");
1820 curlevel->prev = -1;
1821 curlevel->last = -1;
1824 /* end radical change section */
1829 curlevel->prev = -1;
1830 curlevel->last = -1;
1832 /* Enter the loop at a place appropriate for initial state. */
1834 if (state.incomment)
1835 goto startincomment;
1836 if (state.instring >= 0) {
1838 goto startquotedinstring;
1844 while (from < end) {
1850 UPDATE_SYNTAX_CACHE_FORWARD(from);
1851 c = BUF_FETCH_CHAR(buf, from);
1852 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
1853 code = SYNTAX_FROM_CODE(syncode);
1856 /* record the comment style we have entered so that only the
1857 comment-ender sequence (or single char) of the same style
1858 actually terminates the comment section. */
1859 if (code == Scomment) {
1860 state.comstyle = SYNTAX_CODE_COMMENT_1CHAR_MASK(syncode)
1861 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1862 state.comstr_start = from - 1;
1865 /* a generic comment delimiter? */
1866 else if (code == Scomment_fence) {
1867 state.comstyle = ST_COMMENT_STYLE;
1868 state.comstr_start = from - 1;
1872 else if (from < end && SYNTAX_CODE_START_FIRST_P(syncode)) {
1874 UPDATE_SYNTAX_CACHE_FORWARD(from);
1876 SYNTAX_CODE_FROM_CACHE(mirrortab,
1877 BUF_FETCH_CHAR(buf, from));
1879 if (SYNTAX_CODES_START_P(syncode, next_syncode)) {
1882 SYNTAX_CODES_COMMENT_MASK_START(syncode,
1884 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1885 state.comstr_start = from - 1;
1887 UPDATE_SYNTAX_CACHE_FORWARD(from);
1891 if (SYNTAX_CODE_PREFIX(syncode))
1897 goto stop; /* this arg means stop at sexp start */
1898 curlevel->last = from - 1;
1904 /* treat following character as a word constituent */
1908 goto stop; /* this arg means stop at sexp start */
1909 curlevel->last = from - 1;
1911 while (from < end) {
1912 UPDATE_SYNTAX_CACHE_FORWARD(from);
1913 switch ((unsigned int)
1916 BUF_FETCH_CHAR(buf, from))) {
1933 curlevel->prev = curlevel->last;
1937 state.incomment = 1;
1938 if (commentstop || boundary_stop)
1941 if (commentstop == 1)
1943 UPDATE_SYNTAX_CACHE_FORWARD(from);
1946 find_end_of_comment(buf, from, end,
1949 /* we terminated search because from == end */
1955 state.incomment = 0;
1956 state.comstyle = 0; /* reset the comment style */
1963 goto stop; /* this arg means stop at sexp start */
1965 /* curlevel++->last ran into compiler bug on Apollo */
1966 curlevel->last = from - 1;
1967 if (++curlevel == endlevel)
1968 error("Nesting too deep for parser");
1969 curlevel->prev = -1;
1970 curlevel->last = -1;
1971 if (targetdepth == depth)
1977 if (depth < mindepth)
1979 if (curlevel != levelstart)
1981 curlevel->prev = curlevel->last;
1982 if (targetdepth == depth)
1988 state.comstr_start = from - 1;
1990 goto stop; /* this arg means stop at sexp start */
1991 curlevel->last = from - 1;
1992 if (code == Sstring_fence) {
1993 state.instring = ST_STRING_STYLE;
1995 /* XEmacs change: call syntax_match() on character */
1996 Emchar ch = BUF_FETCH_CHAR(buf, from - 1);
1997 Lisp_Object stermobj =
1998 syntax_match(syntax_cache.
1999 current_syntax_table, ch);
2001 if (CHARP(stermobj))
2002 state.instring = XCHAR(stermobj);
2004 state.instring = ch;
2010 enum syntaxcode temp_code;
2015 UPDATE_SYNTAX_CACHE_FORWARD(from);
2016 c = BUF_FETCH_CHAR(buf, from);
2017 temp_code = SYNTAX_FROM_CACHE(mirrortab, c);
2019 if (state.instring != ST_STRING_STYLE &&
2020 temp_code == Sstring && c == state.instring)
2023 switch ((unsigned int)temp_code) {
2025 if (state.instring == ST_STRING_STYLE)
2032 startquotedinstring:
2043 state.instring = -1;
2044 curlevel->prev = curlevel->last;
2057 case Scomment_fence:
2067 stop: /* Here if stopping before start of sexp. */
2068 from--; /* We have just fetched the char that starts it; */
2069 goto done; /* but return the position before it. */
2074 state.depth = depth;
2075 state.mindepth = mindepth;
2076 state.thislevelstart = curlevel->prev;
2077 state.prevlevelstart
2078 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2079 state.location = from;
2080 state.levelstarts = Qnil;
2081 while (--curlevel >= levelstart)
2082 state.levelstarts = Fcons(make_int(curlevel->last),
2088 DEFUN("parse-partial-sexp", Fparse_partial_sexp, 2, 7, 0, /*
2089 Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
2090 Parsing stops at TO or when certain criteria are met;
2091 point is set to where parsing stops.
2092 If fifth arg OLDSTATE is omitted or nil,
2093 parsing assumes that FROM is the beginning of a function.
2094 Value is a list of nine elements describing final state of parsing:
2096 1. character address of start of innermost containing list; nil if none.
2097 2. character address of start of last complete sexp terminated.
2098 3. non-nil if inside a string.
2099 (It is the character that will terminate the string,
2100 or t if the string should be terminated by an explicit
2101 `syntax-table' property.)
2102 4. t if inside a comment.
2103 5. t if following a quote character.
2104 6. the minimum paren-depth encountered during this scan.
2105 7. nil if in comment style a, or not in a comment; t if in comment style b;
2106 `syntax-table' if given by an explicit `syntax-table' property.
2107 8. character address of start of last comment or string; nil if none.
2108 9. Intermediate data for continuation of parsing (subject to change).
2109 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
2110 in parentheses becomes equal to TARGETDEPTH.
2111 Fourth arg STOPBEFORE non-nil means stop when come to
2112 any character that starts a sexp.
2113 Fifth arg OLDSTATE is a nine-element list like what this function returns.
2114 It is used to initialize the state of the parse. Its second and third
2115 elements are ignored.
2116 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment. If it
2117 is `syntax-table', stop after the start of a comment or a string, or after
2118 the end of a comment or string.
2120 (from, to, targetdepth, stopbefore, oldstate, commentstop, buffer))
2122 struct lisp_parse_state state;
2125 struct buffer *buf = decode_buffer(buffer, 0);
2128 if (!NILP(targetdepth)) {
2129 CHECK_INT(targetdepth);
2130 target = XINT(targetdepth);
2132 target = -100000; /* We won't reach this depth */
2134 get_buffer_range_char(buf, from, to, &start, &end, 0);
2135 scan_sexps_forward(buf, &state, start, end,
2136 target, !NILP(stopbefore), oldstate,
2138 ? 0 : (EQ(commentstop, Qsyntax_table) ? -1 : 1)));
2139 BUF_SET_PT(buf, state.location);
2143 val = Fcons(state.levelstarts, val);
2144 val = Fcons((state.incomment || (state.instring >= 0))
2145 ? make_int(state.comstr_start) : Qnil, val);
2146 val = Fcons(state.comstyle ? (state.comstyle == ST_COMMENT_STYLE
2147 ? Qsyntax_table : Qt) : Qnil, val);
2148 val = Fcons(make_int(state.mindepth), val);
2149 val = Fcons(state.quoted ? Qt : Qnil, val);
2150 val = Fcons(state.incomment ? Qt : Qnil, val);
2151 val = Fcons(state.instring < 0
2153 : (state.instring == ST_STRING_STYLE
2154 ? Qt : make_int(state.instring)), val);
2156 Fcons(state.thislevelstart <
2157 0 ? Qnil : make_int(state.thislevelstart), val);
2159 Fcons(state.prevlevelstart <
2160 0 ? Qnil : make_int(state.prevlevelstart), val);
2161 val = Fcons(make_int(state.depth), val);
2166 /* Updating of the mirror syntax table.
2168 Each syntax table has a corresponding mirror table in it.
2169 Whenever we make a change to a syntax table, we call
2170 update_syntax_table() on it.
2172 #### We really only need to map over the changed range.
2174 If we change the standard syntax table, we need to map over
2175 all tables because any of them could be inheriting from the
2176 standard syntax table.
2178 When `set-syntax-table' is called, we set the buffer's mirror
2179 syntax table as well.
2183 Lisp_Object mirrortab;
2187 static int cmst_mapfun(struct chartab_range *range, Lisp_Object val, void *arg)
2189 struct cmst_arg *closure = (struct cmst_arg *)arg;
2193 if (SYNTAX_FROM_CODE(XINT(val)) == Sinherit && closure->check_inherit) {
2194 struct cmst_arg recursive;
2196 recursive.mirrortab = closure->mirrortab;
2197 recursive.check_inherit = 0;
2198 map_char_table(XCHAR_TABLE(Vstandard_syntax_table), range,
2199 cmst_mapfun, &recursive);
2201 put_char_table(XCHAR_TABLE(closure->mirrortab), range, val);
2205 static void update_just_this_syntax_table(Lisp_Char_Table * ct)
2207 struct chartab_range range;
2208 struct cmst_arg arg;
2210 arg.mirrortab = ct->mirror_table;
2211 arg.check_inherit = (CHAR_TABLEP(Vstandard_syntax_table)
2212 && ct != XCHAR_TABLE(Vstandard_syntax_table));
2213 range.type = CHARTAB_RANGE_ALL;
2214 map_char_table(ct, &range, cmst_mapfun, &arg);
2217 /* Called from chartab.c when a change is made to a syntax table.
2218 If this is the standard syntax table, we need to recompute
2219 *all* syntax tables (yuck). Otherwise we just recompute this
2222 void update_syntax_table(Lisp_Char_Table * ct)
2224 /* Don't be stymied at startup. */
2225 if (CHAR_TABLEP(Vstandard_syntax_table)
2226 && ct == XCHAR_TABLE(Vstandard_syntax_table)) {
2229 for (syntab = Vall_syntax_tables; !NILP(syntab);
2230 syntab = XCHAR_TABLE(syntab)->next_table)
2231 update_just_this_syntax_table(XCHAR_TABLE(syntab));
2233 update_just_this_syntax_table(ct);
2236 /************************************************************************/
2237 /* initialization */
2238 /************************************************************************/
2240 void syms_of_syntax(void)
2242 defsymbol(&Qsyntax_table_p, "syntax-table-p");
2243 defsymbol(&Qsyntax_table, "syntax-table");
2245 DEFSUBR(Fsyntax_table_p);
2246 DEFSUBR(Fsyntax_table);
2247 DEFSUBR(Fstandard_syntax_table);
2248 DEFSUBR(Fcopy_syntax_table);
2249 DEFSUBR(Fset_syntax_table);
2250 DEFSUBR(Fsyntax_designator_chars);
2251 DEFSUBR(Fchar_syntax);
2252 DEFSUBR(Fmatching_paren);
2253 /* DEFSUBR (Fmodify_syntax_entry); now in Lisp. */
2254 /* DEFSUBR (Fdescribe_syntax); now in Lisp. */
2256 DEFSUBR(Fforward_word);
2258 DEFSUBR(Fforward_comment);
2259 DEFSUBR(Fscan_lists);
2260 DEFSUBR(Fscan_sexps);
2261 DEFSUBR(Fbackward_prefix_chars);
2262 DEFSUBR(Fparse_partial_sexp);
2265 void vars_of_syntax(void)
2267 DEFVAR_BOOL("parse-sexp-ignore-comments", &parse_sexp_ignore_comments /*
2268 Non-nil means `forward-sexp', etc., should treat comments as whitespace.
2270 parse_sexp_ignore_comments = 0;
2272 DEFVAR_BOOL("lookup-syntax-properties", &lookup_syntax_properties /*
2273 Non-nil means `forward-sexp', etc., look up character syntax in the
2274 table that is the value of the `syntax-table' text property, if non-nil.
2275 The value of this property should be either a syntax table, or a cons
2276 of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric
2277 syntax code, MATCHCHAR being nil or the character to match (which is
2278 relevant only for open/close type.
2280 lookup_syntax_properties = 0; /* #### default off until optimized */
2282 DEFVAR_BOOL("words-include-escapes", &words_include_escapes /*
2283 Non-nil means `forward-word', etc., should treat escape chars part of words.
2285 words_include_escapes = 0;
2287 no_quit_in_re_search = 0;
2290 static void define_standard_syntax(const char *p, enum syntaxcode syn)
2293 Fput_char_table(make_char(*p), make_int(syn),
2294 Vstandard_syntax_table);
2297 void complex_vars_of_syntax(void)
2301 /* Set this now, so first buffer creation can refer to it. */
2302 /* Make it nil before calling copy-syntax-table
2303 so that copy-syntax-table will know not to try to copy from garbage */
2304 Vstandard_syntax_table = Qnil;
2305 Vstandard_syntax_table = Fcopy_syntax_table(Qnil);
2306 staticpro(&Vstandard_syntax_table);
2308 Vsyntax_designator_chars_string = make_string_nocopy(syntax_code_spec,
2310 staticpro(&Vsyntax_designator_chars_string);
2312 fill_char_table(XCHAR_TABLE(Vstandard_syntax_table), make_int(Spunct));
2314 for (i = 0; i <= 32; i++) /* Control 0 plus SPACE */
2315 Fput_char_table(make_char(i), make_int(Swhitespace),
2316 Vstandard_syntax_table);
2317 for (i = 127; i <= 159; i++) /* DEL plus Control 1 */
2318 Fput_char_table(make_char(i), make_int(Swhitespace),
2319 Vstandard_syntax_table);
2321 define_standard_syntax("abcdefghijklmnopqrstuvwxyz"
2322 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2323 "0123456789" "$%", Sword);
2324 define_standard_syntax("\"", Sstring);
2325 define_standard_syntax("\\", Sescape);
2326 define_standard_syntax("_-+*/&|<>=", Ssymbol);
2327 define_standard_syntax(".,;:?!#@~^'`", Spunct);
2329 for (p = "()[]{}"; *p; p += 2) {
2330 Fput_char_table(make_char(p[0]),
2331 Fcons(make_int(Sopen), make_char(p[1])),
2332 Vstandard_syntax_table);
2333 Fput_char_table(make_char(p[1]),
2334 Fcons(make_int(Sclose), make_char(p[0])),
2335 Vstandard_syntax_table);