1 /* SXEmacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985-1994 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of SXEmacs
7 SXEmacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 SXEmacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 /* Synched up with: FSF 19.28. */
23 /* This file has been Mule-ized. */
32 /* Here is a comment from Ken'ichi HANDA <handa@etl.go.jp>
33 explaining the purpose of the Sextword syntax category:
35 Japanese words are not separated by spaces, which makes finding word
36 boundaries very difficult. Theoretically it's impossible without
37 using natural language processing techniques. But, by defining
38 pseudo-words as below (much simplified for letting you understand it
39 easily) for Japanese, we can have a convenient forward-word function
42 A Japanese word is a sequence of characters that consists of
43 zero or more Kanji characters followed by zero or more
46 Then, the problem is that now we can't say that a sequence of
47 word-constituents makes up a WORD. For instance, both Hiragana "A"
48 and Kanji "KAN" are word-constituents but the sequence of these two
49 letters can't be a single word.
51 So, we introduced Sextword for Japanese letters. A character of
52 Sextword is a word-constituent but a word boundary may exist between
53 two such characters. */
55 /* Mule 2.4 doesn't seem to have Sextword - I'm removing it -- mrb */
56 /* Recovered by tomo */
58 #define ST_COMMENT_STYLE 0x101
59 #define ST_STRING_STYLE 0x102
61 Lisp_Object Qsyntax_table;
62 int lookup_syntax_properties;
64 Lisp_Object Qsyntax_table_p;
66 int words_include_escapes;
68 int parse_sexp_ignore_comments;
70 /* The following two variables are provided to tell additional information
71 to the regex routines. We do it this way rather than change the
72 arguments to re_search_2() in an attempt to maintain some call
73 compatibility with other versions of the regex code. */
75 /* Tell the regex routines not to QUIT. Normally there is a QUIT
76 each iteration in re_search_2(). */
77 int no_quit_in_re_search;
79 /* Tell the regex routines which buffer to access for SYNTAX() lookups
81 struct buffer *regex_emacs_buffer;
83 /* In Emacs, this is the string or buffer in which we
84 are matching. It is used for looking up syntax properties. */
85 Lisp_Object regex_match_object;
87 Lisp_Object Vstandard_syntax_table;
89 Lisp_Object Vsyntax_designator_chars_string;
91 Lisp_Object Qscan_error;
93 /* This is the internal form of the parse state used in parse-partial-sexp. */
95 struct lisp_parse_state {
96 int depth; /* Depth at end of parsing */
97 Emchar instring; /* -1 if not within string, else desired terminator */
98 int incomment; /* Nonzero if within a comment at end of parsing */
99 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE */
100 int quoted; /* Nonzero if just after an escape char at end of
102 Bufpos thislevelstart; /* Char number of most recent start-of-expression
104 Bufpos prevlevelstart; /* Char number of start of containing expression */
105 Bufpos location; /* Char number at which parsing stopped */
106 int mindepth; /* Minimum depth seen while scanning */
107 Bufpos comstr_start; /* Position just after last comment/string starter
108 (if the 'syntax-table text property is not
109 supported, used only for comment starts) */
110 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
111 of levels (starting from outermost). */
114 /* These variables are a cache for finding the start of a defun.
115 find_start_pos is the place for which the defun start was found.
116 find_start_value is the defun start position found for it.
117 find_start_buffer is the buffer it was found in.
118 find_start_begv is the BEGV value when it was found.
119 find_start_modiff is the value of MODIFF when it was found. */
121 static Bufpos find_start_pos;
122 static Bufpos find_start_value;
123 static struct buffer *find_start_buffer;
124 static Bufpos find_start_begv;
125 static int find_start_modiff;
127 /* Find a defun-start that is the last one before POS (or nearly the last).
128 We record what we find, so that another call in the same area
129 can return the same value right away. */
131 static Bufpos find_defun_start(struct buffer *buf, Bufpos pos)
135 /* Use previous finding, if it's valid and applies to this inquiry. */
136 if (buf == find_start_buffer
137 /* Reuse the defun-start even if POS is a little farther on.
138 POS might be in the next defun, but that's ok.
139 Our value may not be the best possible, but will still be usable. */
140 && pos <= find_start_pos + 1000
141 && pos >= find_start_value
142 && BUF_BEGV(buf) == find_start_begv
143 && BUF_MODIFF(buf) == find_start_modiff)
144 return find_start_value;
146 /* Back up to start of line. */
147 tem = find_next_newline(buf, pos, -1);
149 SCS_STATISTICS_SET_FUNCTION(scs_find_defun_start);
150 SETUP_SYNTAX_CACHE(tem, 1);
151 while (tem > BUF_BEGV(buf)) {
152 UPDATE_SYNTAX_CACHE_BACKWARD(tem);
154 /* Open-paren at start of line means we found our defun-start. */
155 if (SYNTAX_FROM_CACHE(mirrortab, BUF_FETCH_CHAR(buf, tem)) ==
158 /* Move to beg of previous line. */
159 tem = find_next_newline(buf, tem, -2);
162 /* Record what we found, for the next try. */
163 find_start_value = tem;
164 find_start_buffer = buf;
165 find_start_modiff = BUF_MODIFF(buf);
166 find_start_begv = BUF_BEGV(buf);
167 find_start_pos = pos;
169 return find_start_value;
172 DEFUN("syntax-table-p", Fsyntax_table_p, 1, 1, 0, /*
173 Return t if OBJECT is a syntax table.
174 Any vector of 256 elements will do.
178 return (CHAR_TABLEP(object)
179 && XCHAR_TABLE_TYPE(object) == CHAR_TABLE_TYPE_SYNTAX)
183 static Lisp_Object check_syntax_table(Lisp_Object obj, Lisp_Object default_)
187 while (NILP(Fsyntax_table_p(obj)))
188 obj = wrong_type_argument(Qsyntax_table_p, obj);
192 DEFUN("syntax-table", Fsyntax_table, 0, 1, 0, /*
193 Return the current syntax table.
194 This is the one specified by the current buffer, or by BUFFER if it
199 return decode_buffer(buffer, 0)->syntax_table;
202 DEFUN("standard-syntax-table", Fstandard_syntax_table, 0, 0, 0, /*
203 Return the standard syntax table.
204 This is the one used for new buffers.
208 return Vstandard_syntax_table;
211 DEFUN("copy-syntax-table", Fcopy_syntax_table, 0, 1, 0, /*
212 Return a new syntax table which is a copy of SYNTAX-TABLE.
213 SYNTAX-TABLE defaults to the standard syntax table.
217 if (NILP(Vstandard_syntax_table))
218 return Fmake_char_table(Qsyntax);
220 syntax_table = check_syntax_table(syntax_table, Vstandard_syntax_table);
221 return Fcopy_char_table(syntax_table);
224 DEFUN("set-syntax-table", Fset_syntax_table, 1, 2, 0, /*
225 Select SYNTAX-TABLE as the new syntax table for BUFFER.
226 BUFFER defaults to the current buffer if omitted.
228 (syntax_table, buffer))
230 struct buffer *buf = decode_buffer(buffer, 0);
231 syntax_table = check_syntax_table(syntax_table, Qnil);
232 buf->syntax_table = syntax_table;
233 buf->mirror_syntax_table = XCHAR_TABLE(syntax_table)->mirror_table;
234 /* Indicate that this buffer now has a specified syntax table. */
235 buf->local_var_flags |= XINT(buffer_local_flags.syntax_table);
239 /* The current syntax state */
240 struct syntax_cache syntax_cache;
243 Update syntax_cache to an appropriate setting for position POS
245 The sign of COUNT gives the relative position of POS wrt the
246 previously valid interval. (not currently used)
248 `syntax_cache.*_change' are the next and previous positions at
249 which syntax_code and c_s_t will need to be recalculated.
251 #### Currently this code uses 'get-char-property', which will
252 return the "last smallest" extent at a given position. In cases
253 where overlapping extents are defined, this code will simply use
254 whatever is returned by get-char-property.
256 It might be worth it at some point to merge provided syntax tables
257 outward to the current buffer.
260 This implementation has to rather inefficient, since it looks at
261 next-extent-change, and a heavily font-locked buffer will be rife
262 with irrelevant extents. We could do a sledgehammer check on this
263 by looking at the distribution of extent lengths. Also count up
264 cache hits and misses.
266 If we assume that syntax-table is a _text_ property (which also
267 deals with the issue of overlapping syntax-table properties), then
268 the following strategy recommends itself
269 o give the syntax cache a `valid' flag, to be reset whenever a
270 syntax-table property is added, changed, or removed; this could
271 be done by setting syntax_cache's prev_change > next_change
272 (but not compatible with using extents/markers here); if it's a
273 Lisp variable, doing it in Lisp shouldn't be too inefficient
274 o lazily initialize the cache whenever the object being examined
275 differs from the object the cache currently refers to
276 o by using {previous,next-single-property-change} we should be
277 able to get much bigger cache intervals (in most cases, the
279 o cache markers instead of positions so the mere insertion or
280 deletion of text doesn't invalidate the cache, only if it
281 involves a syntax-table property (we could also cache the
282 extents carrying the syntax-table text-property; that gives us
283 another check for invalid cache).
285 If I understand this correctly, we need to invalidate the cache in the
287 o If the referenced object changes (it's a global cache)
288 o If there are insertions or deletions of text (the positions are
289 absolute; fix: use markers or an extent instead?)
290 o If the syntax-table property is altered == added and different or
291 removed and the same (fix: probably computable from range overlap,
292 but is it worth it? would interact with ins/del); this includes
293 detachment of extents with the same value (but only the boundary
294 extents, as otherwise the range coalesces across the deletion point)
295 and attachment of extents with a different value
296 Note: the above looks a lot like what Ben has implemented in 21.5, but
297 he goes one better by making the cache buffer-local.
299 Note: cperl mode uses the text property API, not extents/overlays.
302 #ifdef SYNTAX_CACHE_STATISTICS
303 struct syntax_cache_statistics scs_statistics =
304 { 0, 0, 0, 0, -1, -1, 0.0, 0.0, scs_no_function };
306 char *syntax_cache_statistics_function_names[scs_number_of_functions] = {
312 "Fbackward_prefix_characters",
315 #endif /* SYNTAX_CACHE_STATISTICS */
317 void update_syntax_cache(int pos, int count)
319 Lisp_Object tmp_table;
321 #ifdef SYNTAX_CACHE_STATISTICS
322 if (scs_statistics.total_updates == 0) {
324 for (i = 0; i < scs_number_of_functions; ++i)
325 scs_statistics.functions[i] = 0;
327 if (syntax_cache.prev_change > syntax_cache.next_change)
328 scs_statistics.inits++;
329 else if (pos < syntax_cache.prev_change)
330 scs_statistics.misses_lo++;
331 else if (pos >= syntax_cache.next_change)
332 scs_statistics.misses_hi++;
333 #endif /* SYNTAX_CACHE_STATISTICS */
335 /* #### Since font-lock undoes any narrowing, maybe the BUF_ZV and
336 BUF_BEGV below should be BUF_Z and BUF_BEG respectively? */
337 if (BUFFERP(syntax_cache.object)) {
338 int get_change_before = pos + 1;
340 tmp_table = Fget_char_property(make_int(pos), Qsyntax_table,
341 syntax_cache.object, Qnil);
342 #if NEXT_SINGLE_PROPERTY_CHANGE
343 /* #### shouldn't we be using BUF_BEGV here? */
344 syntax_cache.next_change =
345 XINT(Fnext_single_property_change
346 (make_int(pos > 0 ? pos : 1), Qsyntax_table,
348 make_int(BUF_ZV(syntax_cache.buffer))));
350 syntax_cache.next_change =
351 XINT(Fnext_extent_change(make_int(pos > 0 ? pos : 1),
352 syntax_cache.object));
355 /* #### shouldn't we be using BUF_BEGV here? */
356 if (get_change_before < 1)
357 get_change_before = 1;
358 else if (get_change_before > BUF_ZV(syntax_cache.buffer))
359 get_change_before = BUF_ZV(syntax_cache.buffer);
361 #if PREVIOUS_SINGLE_PROPERTY_CHANGE
362 /* #### shouldn't we be using BUF_BEGV here? */
363 syntax_cache.prev_change =
364 XINT(Fprevious_single_property_change
365 (make_int(get_change_before), Qsyntax_table,
366 syntax_cache.object, make_int(1)));
368 syntax_cache.prev_change =
369 XINT(Fprevious_extent_change(make_int(get_change_before),
370 syntax_cache.object));
372 } else if (STRINGP(syntax_cache.object)) {
373 int get_change_before = pos + 1;
375 tmp_table = Fget_char_property(make_int(pos), Qsyntax_table,
376 syntax_cache.object, Qnil);
377 #if NEXT_SINGLE_PROPERTY_CHANGE
378 /* #### shouldn't we be using BUF_BEGV here? */
379 syntax_cache.next_change =
380 XINT(Fnext_single_property_change
381 (make_int(pos >= 0 ? pos : 0), Qsyntax_table,
383 make_int(XSTRING_LENGTH(syntax_cache.object))));
385 syntax_cache.next_change =
386 XINT(Fnext_extent_change(make_int(pos >= 0 ? pos : 0),
387 syntax_cache.object));
390 if (get_change_before < 0)
391 get_change_before = 0;
392 else if (get_change_before >
393 XSTRING_LENGTH(syntax_cache.object))
394 get_change_before = XSTRING_LENGTH(syntax_cache.object);
396 #if PREVIOUS_SINGLE_PROPERTY_CHANGE
397 syntax_cache.prev_change =
398 XINT(Fprevious_single_property_change
399 (make_int(get_change_before), Qsyntax_table,
400 syntax_cache.object, make_int(0)));
402 syntax_cache.prev_change =
403 XINT(Fprevious_extent_change(make_int(get_change_before),
404 syntax_cache.object));
407 /* silence compiler */
410 * #### Is there another sensible thing to do here? */
411 assert(BUFFERP(syntax_cache.object) ||
412 STRINGP(syntax_cache.object));
415 if (EQ(Fsyntax_table_p(tmp_table), Qt)) {
416 syntax_cache.use_code = 0;
417 syntax_cache.current_syntax_table =
418 XCHAR_TABLE(tmp_table)->mirror_table;
419 } else if (CONSP(tmp_table) && INTP(XCAR(tmp_table))) {
420 syntax_cache.use_code = 1;
421 /* we CANNOT cast the following, XINT goes to long int
422 * while enums are usually just unsigned ints
423 * this'll trigger a warning on 64b machines */
424 syntax_cache.syntax_code = XINT(XCAR(tmp_table));
426 syntax_cache.use_code = 0;
427 syntax_cache.current_syntax_table =
428 syntax_cache.buffer->mirror_syntax_table;
431 #ifdef SYNTAX_CACHE_STATISTICS
434 syntax_cache.next_change - syntax_cache.prev_change;
436 scs_statistics.misses_lo + scs_statistics.misses_hi +
437 scs_statistics.inits;
439 if (scs_statistics.min_length == -1
440 || scs_statistics.min_length > length)
441 scs_statistics.min_length = length;
442 if (scs_statistics.max_length == -1
443 || scs_statistics.max_length < length)
444 scs_statistics.max_length = length;
445 scs_statistics.mean_length_on_miss =
446 ((misses - 1) * scs_statistics.mean_length_on_miss +
450 scs_statistics.mean_length
451 = scs_statistics.total_updates * scs_statistics.mean_length
452 + syntax_cache.next_change - syntax_cache.prev_change;
453 scs_statistics.total_updates++;
454 scs_statistics.mean_length /= scs_statistics.total_updates;
456 if (scs_statistics.this_function != scs_no_function) {
457 scs_statistics.functions[scs_statistics.this_function]++;
458 scs_statistics.this_function = scs_no_function;
462 (scs_statistics.total_updates %
463 SYNTAX_CACHE_STATISTICS_REPORT_INTERVAL)) {
464 fprintf(stderr, "Syntax cache stats:\n ");
466 "updates %d, inits %d, misses low %d, misses high %d,",
467 scs_statistics.total_updates, scs_statistics.inits,
468 scs_statistics.misses_lo, scs_statistics.misses_hi);
469 fprintf(stderr, "\n ");
471 #define REPORT_FUNCTION(i) \
472 fprintf (stderr, " %s %d,", \
473 syntax_cache_statistics_function_names[i], \
474 scs_statistics.functions[i]);
476 REPORT_FUNCTION(scs_find_context);
477 REPORT_FUNCTION(scs_find_defun_start);
478 REPORT_FUNCTION(scs_scan_words);
479 REPORT_FUNCTION(scs_Fforward_comment);
480 fprintf(stderr, "\n ");
481 REPORT_FUNCTION(scs_scan_lists);
482 REPORT_FUNCTION(scs_Fbackward_prefix_characters);
483 REPORT_FUNCTION(scs_scan_sexps_forward);
484 #undef REPORT_FUNCTION
486 fprintf(stderr, "\n min length %d, max length %d,",
487 scs_statistics.min_length, scs_statistics.max_length);
489 "\n mean length %.1f, mean length on miss %.1f\n",
490 scs_statistics.mean_length,
491 scs_statistics.mean_length_on_miss);
493 #endif /* SYNTAX_CACHE_STATISTICS */
496 /* Convert a letter which signifies a syntax code
497 into the code it signifies.
498 This is used by modify-syntax-entry, and other things. */
500 const unsigned char syntax_spec_code[0400] =
501 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
502 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
503 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
504 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
505 (char)Swhitespace, 0377, (char)Sstring, 0377,
506 (char)Smath, 0377, 0377, (char)Squote,
507 (char)Sopen, (char)Sclose, 0377, 0377,
508 0377, (char)Swhitespace, (char)Spunct, (char)Scharquote,
509 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
510 0377, 0377, 0377, 0377,
511 (char)Scomment, 0377, (char)Sendcomment, 0377,
512 (char)Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
513 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
514 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char)Sword,
515 0377, 0377, 0377, 0377, (char)Sescape, 0377, 0377, (char)Ssymbol,
516 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
517 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
518 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char)Sword,
519 0377, 0377, 0377, 0377, (char)Sstring_fence, 0377, 0377, 0377
522 unsigned char syntax_code_spec[] = " .w_()'\"$\\/<>@!|";
524 DEFUN("syntax-designator-chars", Fsyntax_designator_chars, 0, 0, 0, /*
525 Return a string of the recognized syntax designator chars.
526 The chars are ordered by their internal syntax codes, which are
527 numbered starting at 0.
531 return Vsyntax_designator_chars_string;
534 DEFUN("char-syntax", Fchar_syntax, 1, 2, 0, /*
535 Return the syntax code of CHARACTER, described by a character.
536 For example, if CHARACTER is a word constituent,
537 the character `?w' is returned.
538 The characters that correspond to various syntax codes
539 are listed in the documentation of `modify-syntax-entry'.
540 Optional second argument SYNTAX-TABLE defaults to the current buffer's
543 (character, syntax_table))
545 Lisp_Char_Table *mirrortab;
547 if (NILP(character)) {
548 character = make_char('\000');
550 CHECK_CHAR_COERCE_INT(character);
552 check_syntax_table(syntax_table, current_buffer->syntax_table);
553 mirrortab = XCHAR_TABLE(XCHAR_TABLE(syntax_table)->mirror_table);
555 make_char(syntax_code_spec
556 [(int)SYNTAX(mirrortab, XCHAR(character))]);
562 charset_syntax(struct buffer *buf, Lisp_Object charset, int *multi_p_out)
565 /* #### get this right */
571 Lisp_Object syntax_match(Lisp_Object syntax_table, Emchar ch)
573 Lisp_Object code = XCHAR_TABLE_VALUE_UNSAFE(syntax_table, ch);
574 Lisp_Object code2 = code;
578 if (SYNTAX_FROM_CODE(XINT(code2)) == Sinherit)
579 code = XCHAR_TABLE_VALUE_UNSAFE(Vstandard_syntax_table, ch);
581 return CONSP(code) ? XCDR(code) : Qnil;
584 DEFUN("matching-paren", Fmatching_paren, 1, 2, 0, /*
585 Return the matching parenthesis of CHARACTER, or nil if none.
586 Optional second argument SYNTAX-TABLE defaults to the current buffer's
589 (character, syntax_table))
591 Lisp_Char_Table *mirrortab;
594 CHECK_CHAR_COERCE_INT(character);
596 check_syntax_table(syntax_table, current_buffer->syntax_table);
597 mirrortab = XCHAR_TABLE(XCHAR_TABLE(syntax_table)->mirror_table);
598 code = SYNTAX(mirrortab, XCHAR(character));
599 if (code == Sopen || code == Sclose || code == Sstring)
600 return syntax_match(syntax_table, XCHAR(character));
605 /* Return 1 if there is a word boundary between two word-constituent
606 characters C1 and C2 if they appear in this order, else return 0.
607 There is no word boundary between two word-constituent ASCII
609 #define WORD_BOUNDARY_P(c1, c2) \
610 (!(CHAR_ASCII_P (c1) && CHAR_ASCII_P (c2)) \
611 && word_boundary_p (c1, c2))
613 extern int word_boundary_p(Emchar c1, Emchar c2);
616 /* Return the position across COUNT words from FROM.
617 If that many words cannot be found before the end of the buffer, return 0.
618 COUNT negative means scan backward and stop at word beginning. */
620 Bufpos scan_words(struct buffer *buf, Bufpos from, int count)
622 Bufpos limit = count > 0 ? BUF_ZV(buf) : BUF_BEGV(buf);
624 enum syntaxcode code;
626 SCS_STATISTICS_SET_FUNCTION(scs_scan_words);
627 SETUP_SYNTAX_CACHE_FOR_BUFFER(buf, from, count);
629 /* #### is it really worth it to hand expand both cases? JV */
637 UPDATE_SYNTAX_CACHE_FORWARD(from);
638 ch0 = BUF_FETCH_CHAR(buf, from);
639 code = SYNTAX_FROM_CACHE(mirrortab, ch0);
642 if (words_include_escapes
643 && (code == Sescape || code == Scharquote))
651 while (from != limit) {
652 UPDATE_SYNTAX_CACHE_FORWARD(from);
653 ch1 = BUF_FETCH_CHAR(buf, from);
654 code = SYNTAX_FROM_CACHE(mirrortab, ch1);
655 if (!(words_include_escapes
656 && (code == Sescape || code == Scharquote)))
659 || WORD_BOUNDARY_P(ch0, ch1)
678 UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
679 ch1 = BUF_FETCH_CHAR(buf, from - 1);
680 code = SYNTAX_FROM_CACHE(mirrortab, ch1);
683 if (words_include_escapes
684 && (code == Sescape || code == Scharquote))
692 while (from != limit) {
693 UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
694 ch0 = BUF_FETCH_CHAR(buf, from - 1);
695 code = SYNTAX_FROM_CACHE(mirrortab, ch0);
697 if (!(words_include_escapes
698 && (code == Sescape || code == Scharquote)))
701 || WORD_BOUNDARY_P(ch0, ch1)
716 DEFUN("forward-word", Fforward_word, 0, 2, "_p", /*
717 Move point forward COUNT words (backward if COUNT is negative).
718 Normally t is returned, but if an edge of the buffer is reached,
719 point is left there and nil is returned.
721 The characters that are moved over may be added to the current selection
722 \(i.e. active region) if the Shift key is held down, a motion key is used
723 to invoke this command, and `shifted-motion-keys-select-region' is t; see
724 the documentation for this variable for more details.
726 COUNT defaults to 1, and BUFFER defaults to the current buffer.
731 struct buffer *buf = decode_buffer(buffer, 0);
741 val = scan_words(buf, BUF_PT(buf), n);
743 BUF_SET_PT(buf, val);
746 BUF_SET_PT(buf, n > 0 ? BUF_ZV(buf) : BUF_BEGV(buf));
751 static void scan_sexps_forward(struct buffer *buf,
752 struct lisp_parse_state *,
753 Bufpos from, Bufpos end,
754 int targetdepth, int stopbefore,
755 Lisp_Object oldstate, int commentstop);
758 find_start_of_comment(struct buffer *buf, Bufpos from, Bufpos stop,
762 enum syntaxcode code;
764 /* Look back, counting the parity of string-quotes,
765 and recording the comment-starters seen.
766 When we reach a safe place, assume that's not in a string;
767 then step the main scan to the earliest comment-starter seen
768 an even number of string quotes away from the safe place.
770 OFROM[I] is position of the earliest comment-starter seen
771 which is I+2X quotes from the comment-end.
772 PARITY is current parity of quotes from the comment end. */
774 Emchar my_stringend = 0;
775 int string_lossage = 0;
776 Bufpos comment_end = from;
777 Bufpos comstart_pos = 0;
778 int comstart_parity = 0;
779 int styles_match_p = 0;
780 /* mask to match comment styles against; for ST_COMMENT_STYLE, this
781 will get set to SYNTAX_COMMENT_STYLE_B, but never get checked */
782 int mask = comstyle ? SYNTAX_COMMENT_STYLE_B : SYNTAX_COMMENT_STYLE_A;
784 /* At beginning of range to scan, we're outside of strings;
785 that determines quote parity to the comment-end. */
786 while (from != stop) {
789 /* Move back and examine a character. */
791 UPDATE_SYNTAX_CACHE_BACKWARD(from);
793 c = BUF_FETCH_CHAR(buf, from);
794 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
795 code = SYNTAX_FROM_CODE(syncode);
797 /* is this a 1-char comment end sequence? if so, try
798 to see if style matches previously extracted mask */
799 if (code == Sendcomment) {
800 /* MT had SYNTAX_CODE_COMMENT_1CHAR_MASK (syncode) & mask
801 but (as a Boolean) that's just a complicated way to write: */
803 SYNTAX_CODE_MATCHES_1CHAR_P(syncode, mask);
806 /* or are we looking at a 1-char comment start sequence
807 of the style matching mask? */
808 else if (code == Scomment) {
810 SYNTAX_CODE_MATCHES_1CHAR_P(syncode, mask);
813 /* otherwise, is this a 2-char comment end or start sequence? */
814 else if (from > stop)
816 /* 2-char comment end sequence? */
817 if (SYNTAX_CODE_END_SECOND_P(syncode)) {
819 UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
821 SYNTAX_CODE_FROM_CACHE(mirrortab,
826 if (SYNTAX_CODES_END_P
827 (prev_syncode, syncode)) {
830 SYNTAX_CODES_MATCH_END_P
831 (prev_syncode, syncode,
834 UPDATE_SYNTAX_CACHE_BACKWARD
836 c = BUF_FETCH_CHAR(buf, from);
838 /* Found a comment-end sequence, so skip past the
839 check for a comment-start */
844 /* 2-char comment start sequence? */
845 if (SYNTAX_CODE_START_SECOND_P(syncode)) {
847 UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
849 SYNTAX_CODE_FROM_CACHE(mirrortab,
854 if (SYNTAX_CODES_START_P
855 (prev_syncode, syncode)) {
858 SYNTAX_CODES_MATCH_START_P
859 (prev_syncode, syncode,
862 UPDATE_SYNTAX_CACHE_BACKWARD
864 c = BUF_FETCH_CHAR(buf, from);
869 /* Ignore escaped characters. */
870 if (char_quoted(buf, from))
873 /* Track parity of quotes. */
874 if (code == Sstring) {
876 if (my_stringend == 0)
878 /* If we have two kinds of string delimiters.
879 There's no way to grok this scanning backwards. */
880 else if (my_stringend != c)
884 if (code == Sstring_fence || code == Scomment_fence) {
886 if (my_stringend == 0)
889 Sstring_fence ? ST_STRING_STYLE :
891 /* If we have two kinds of string delimiters.
892 There's no way to grok this scanning backwards. */
893 else if (my_stringend != (code == Sstring_fence
899 /* Record comment-starters according to that
900 quote-parity to the comment-end. */
901 if (code == Scomment && styles_match_p) {
902 comstart_parity = parity;
906 /* If we find another earlier comment-ender,
907 any comment-starts earlier than that don't count
908 (because they go with the earlier comment-ender). */
909 if (code == Sendcomment && styles_match_p)
912 /* Assume a defun-start point is outside of strings. */
914 && (from == stop || BUF_FETCH_CHAR(buf, from - 1) == '\n'))
918 if (comstart_pos == 0)
920 /* If the earliest comment starter
921 is followed by uniform paired string quotes or none,
922 we know it can't be inside a string
923 since if it were then the comment ender would be inside one.
924 So it does start a comment. Skip back to it. */
925 else if (comstart_parity == 0 && !string_lossage)
928 /* We had two kinds of string delimiters mixed up
929 together. Decode this going forwards.
930 Scan fwd from the previous comment ender
931 to the one in question; this records where we
932 last passed a comment starter. */
934 struct lisp_parse_state state;
935 scan_sexps_forward(buf, &state,
936 find_defun_start(buf, comment_end),
937 comment_end - 1, -10000, 0, Qnil, 0);
939 from = state.comstr_start;
941 /* We can't grok this as a comment; scan it normally. */
943 UPDATE_SYNTAX_CACHE_FORWARD(from - 1);
949 find_end_of_comment(struct buffer *buf, Bufpos from, Bufpos stop, int comstyle)
953 enum syntaxcode code, next_code;
954 /* mask to match comment styles against; for ST_COMMENT_STYLE, this
955 will get set to SYNTAX_COMMENT_STYLE_B, but never get checked */
956 int mask = comstyle ? SYNTAX_COMMENT_STYLE_B : SYNTAX_COMMENT_STYLE_A;
958 /* This is only called by functions which have already set up the
959 syntax_cache and are keeping it up-to-date */
965 UPDATE_SYNTAX_CACHE_FORWARD(from);
966 c = BUF_FETCH_CHAR(buf, from);
967 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
968 code = SYNTAX_FROM_CODE(syncode);
971 UPDATE_SYNTAX_CACHE_FORWARD(from);
973 /* At end of current generic comment? */
974 if (comstyle == ST_COMMENT_STYLE) {
975 if (code == Scomment_fence)
978 continue; /* Ignore other styles in generic comments */
980 /* At end of current one-character comment of specified style? */
981 else if (code == Sendcomment &&
982 SYNTAX_CODE_MATCHES_1CHAR_P(syncode, mask)) {
983 /* pre-MT code effectively does from-- here, that seems wrong */
987 /* At end of current two-character comment of specified style? */
988 c = BUF_FETCH_CHAR(buf, from);
989 next_code = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
991 && SYNTAX_CODES_MATCH_END_P(syncode, next_code, mask)) {
993 UPDATE_SYNTAX_CACHE_FORWARD(from);
1000 /* #### between FSF 19.23 and 19.28 there are some changes to the logic
1001 in this function (and minor changes to find_start_of_comment(),
1002 above, which is part of Fforward_comment() in FSF). Attempts to port
1003 that logic made this function break, so I'm leaving it out. If anyone
1004 ever complains about this function not working properly, take a look
1005 at those changes. --ben */
1007 DEFUN("forward-comment", Fforward_comment, 0, 2, 0, /*
1008 Move forward across up to COUNT comments, or backwards if COUNT is negative.
1009 Stop scanning if we find something other than a comment or whitespace.
1010 Set point to where scanning stops.
1011 If COUNT comments are found as expected, with nothing except whitespace
1012 between them, return t; otherwise return nil.
1013 Point is set in either case.
1014 COUNT defaults to 1, and BUFFER defaults to the current buffer.
1021 enum syntaxcode code;
1024 struct buffer *buf = decode_buffer(buffer, 0);
1035 SCS_STATISTICS_SET_FUNCTION(scs_Fforward_comment);
1036 SETUP_SYNTAX_CACHE(from, n);
1041 while (from < stop) {
1042 int comstyle = 0; /* Code for comment style: 0 for A, 1 for B,
1043 or ST_COMMENT_STYLE */
1045 if (char_quoted(buf, from)) {
1050 UPDATE_SYNTAX_CACHE_FORWARD(from);
1051 c = BUF_FETCH_CHAR(buf, from);
1052 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
1053 code = SYNTAX_FROM_CODE(syncode);
1055 if (code == Scomment) {
1056 /* we have encountered a single character comment start
1057 sequence, and we are ignoring all text inside comments.
1058 we must record the comment style this character begins
1059 so that later, only a comment end of the same style actually
1060 ends the comment section */
1062 SYNTAX_CODE_COMMENT_1CHAR_MASK(syncode)
1063 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1066 else if (code == Scomment_fence) {
1069 comstyle = ST_COMMENT_STYLE;
1072 else if (from < stop
1073 && SYNTAX_CODE_START_FIRST_P(syncode)) {
1075 UPDATE_SYNTAX_CACHE_FORWARD(from + 1);
1077 SYNTAX_CODE_FROM_CACHE(mirrortab,
1082 if (SYNTAX_CODES_START_P(syncode, next_syncode)) {
1083 /* we have encountered a 2char comment start sequence and we
1084 are ignoring all text inside comments. we must record
1085 the comment style this sequence begins so that later,
1086 only a comment end of the same style actually ends
1087 the comment section */
1090 SYNTAX_CODES_COMMENT_MASK_START
1091 (syncode, next_syncode)
1092 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1097 if (code == Scomment) {
1099 find_end_of_comment(buf, from, stop,
1102 /* we stopped because from==stop */
1103 BUF_SET_PT(buf, stop);
1108 /* We have skipped one comment. */
1110 } else if (code != Swhitespace
1111 && code != Sendcomment && code != Scomment) {
1112 BUF_SET_PT(buf, from);
1118 /* End of comment reached */
1125 stop = BUF_BEGV(buf);
1126 while (from > stop) {
1127 int comstyle = 0; /* Code for comment style: 0 for A, 1 for B,
1128 or ST_COMMENT_STYLE */
1131 if (char_quoted(buf, from)) {
1136 c = BUF_FETCH_CHAR(buf, from);
1137 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
1138 code = SYNTAX_FROM_CODE(syncode);
1140 if (code == Sendcomment) {
1141 /* we have found a single char end comment. we must record
1142 the comment style encountered so that later, we can match
1143 only the proper comment begin sequence of the same style */
1145 SYNTAX_CODE_COMMENT_1CHAR_MASK(syncode)
1146 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1149 else if (code == Scomment_fence) {
1151 comstyle = ST_COMMENT_STYLE;
1154 else if (from > stop
1155 /* #### This seems logical but it's not in 21.4.9 */
1156 /* && !char_quoted (buf, from - 1) */
1157 && SYNTAX_CODE_END_SECOND_P(syncode)) {
1159 UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
1161 SYNTAX_CODE_FROM_CACHE(mirrortab,
1165 if (SYNTAX_CODES_END_P(prev_syncode, syncode)) {
1166 /* We must record the comment style encountered so that
1167 later, we can match only the proper comment begin
1168 sequence of the same style. */
1171 SYNTAX_CODES_COMMENT_MASK_END
1172 (prev_syncode, syncode)
1173 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1178 if (code == Sendcomment) {
1180 find_start_of_comment(buf, from, stop,
1185 else if (code != Swhitespace
1186 && code != Scomment && code != Sendcomment) {
1187 BUF_SET_PT(buf, from + 1);
1195 BUF_SET_PT(buf, from);
1200 scan_lists(struct buffer * buf, Bufpos from, int count, int depth,
1201 int sexpflag, int noerror)
1207 enum syntaxcode code;
1209 int min_depth = depth; /* Err out if depth gets less than this. */
1210 Bufpos last_good = from;
1215 SCS_STATISTICS_SET_FUNCTION(scs_scan_lists);
1216 SETUP_SYNTAX_CACHE_FOR_BUFFER(buf, from, count);
1221 while (from < stop) {
1222 int comstyle = 0; /* mask for finding matching comment style */
1223 Emchar stringterm = '\0'; /* Used by Sstring case in switch */
1225 UPDATE_SYNTAX_CACHE_FORWARD(from);
1226 c = BUF_FETCH_CHAR(buf, from);
1227 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
1228 code = SYNTAX_FROM_CODE(syncode);
1229 if (depth == min_depth)
1233 /* a 1-char comment start sequence */
1234 if (code == Scomment && parse_sexp_ignore_comments) {
1236 SYNTAX_CODE_COMMENT_1CHAR_MASK(syncode) ==
1237 SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1240 /* else, a 2-char comment start sequence? */
1241 else if (from < stop
1242 && SYNTAX_CODE_START_FIRST_P(syncode)
1243 && parse_sexp_ignore_comments) {
1245 UPDATE_SYNTAX_CACHE_FORWARD(from);
1246 next_syncode = SYNTAX_CODE_FROM_CACHE(
1247 mirrortab, BUF_FETCH_CHAR(buf, from));
1249 if (SYNTAX_CODES_START_P(syncode,
1251 /* we have encountered a comment start
1252 sequence and we are ignoring all text
1253 inside comments. we must record the
1254 comment style this sequence begins so
1255 that later, only a comment end of the
1256 same style actually ends the comment
1260 SYNTAX_CODES_COMMENT_MASK_START
1261 (syncode, next_syncode)
1262 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1266 UPDATE_SYNTAX_CACHE_FORWARD(from);
1268 if (SYNTAX_CODE_PREFIX(syncode))
1277 /* treat following character as a word
1281 if (depth || !sexpflag)
1283 /* This word counts as a sexp; return at end of
1285 while (from < stop) {
1286 UPDATE_SYNTAX_CACHE_FORWARD(from);
1287 switch ((unsigned int)
1309 case Scomment_fence:
1310 comstyle = ST_COMMENT_STYLE;
1311 /* falls through! */
1313 if (!parse_sexp_ignore_comments)
1315 UPDATE_SYNTAX_CACHE_FORWARD(from);
1318 find_end_of_comment(buf, from, stop,
1321 /* we stopped because from ==
1322 stop in search forward */
1336 && c == BUF_FETCH_CHAR(buf, from))
1353 if (depth < min_depth) {
1356 signal_type_error_2(Qscan_error,
1357 "Containing expression "
1359 make_int(last_good),
1366 /* XEmacs change: call syntax_match on
1369 BUF_FETCH_CHAR(buf, from - 1);
1370 Lisp_Object stermobj =
1371 syntax_match(syntax_cache.
1372 current_syntax_table,
1375 if (CHARP(stermobj))
1376 stringterm = XCHAR(stermobj);
1380 /* falls through! */
1385 UPDATE_SYNTAX_CACHE_FORWARD(from);
1386 c = BUF_FETCH_CHAR(buf, from);
1389 : SYNTAX_FROM_CACHE(mirrortab,
1394 switch ((unsigned int)
1407 if (!depth && sexpflag)
1411 /* list them all here */
1424 /* Reached end of buffer. Error if within object,
1425 return nil if between */
1431 /* End of object reached */
1439 stop = BUF_BEGV(buf);
1440 while (from > stop) {
1441 /* mask for finding matching comment style */
1443 /* used by case Sstring in switch below */
1444 Emchar stringterm = '\0';
1447 UPDATE_SYNTAX_CACHE_BACKWARD(from);
1448 quoted = char_quoted(buf, from);
1451 UPDATE_SYNTAX_CACHE_BACKWARD(from);
1454 c = BUF_FETCH_CHAR(buf, from);
1455 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
1456 code = SYNTAX_FROM_CODE(syncode);
1458 if (code == Sendcomment && parse_sexp_ignore_comments) {
1459 /* we have found a single char end comment. we
1460 must record the comment style encountered so
1461 that later, we can match only the proper
1462 comment begin sequence of the same style */
1464 SYNTAX_CODE_COMMENT_1CHAR_MASK(syncode)
1465 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1468 else if (from > stop
1469 && SYNTAX_CODE_END_SECOND_P(syncode)
1470 && !char_quoted(buf, from - 1)
1471 && parse_sexp_ignore_comments) {
1473 UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
1474 prev_syncode = SYNTAX_CODE_FROM_CACHE
1475 (mirrortab, BUF_FETCH_CHAR(buf, from - 1));
1477 if (SYNTAX_CODES_END_P(prev_syncode, syncode)) {
1478 /* we must record the comment style
1479 encountered so that later, we can
1480 match only the proper comment begin
1481 sequence of the same style */
1484 SYNTAX_CODES_COMMENT_MASK_END
1485 (prev_syncode, syncode)
1486 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1491 if (SYNTAX_CODE_PREFIX(syncode)) {
1495 switch (quoted ? Sword : code) {
1498 if (depth || !sexpflag)
1500 /* This word counts as a sexp; count object
1501 finished after passing it. */
1502 while (from > stop) {
1503 /* enum syntaxcode syncode; */
1504 UPDATE_SYNTAX_CACHE_BACKWARD(from);
1505 quoted = char_quoted(buf, from - 1);
1511 SYNTAX_FROM_CACHE(mirrortab,
1515 == Sword || syncode == Ssymbol
1516 || syncode == Squote))
1526 && c == BUF_FETCH_CHAR(buf, from - 1))
1543 if (depth < min_depth) {
1547 signal_type_error_2(Qscan_error,
1548 "Containing expression "
1550 make_int(last_good),
1555 case Scomment_fence:
1556 comstyle = ST_COMMENT_STYLE;
1557 /* falls through! */
1559 if (parse_sexp_ignore_comments)
1561 find_start_of_comment(buf, from,
1567 /* XEmacs change: call syntax_match() on
1569 Emchar ch = BUF_FETCH_CHAR(buf, from);
1570 Lisp_Object stermobj =
1571 syntax_match(syntax_cache.
1572 current_syntax_table, ch);
1573 if (CHARP(stermobj)) {
1574 stringterm = XCHAR(stermobj);
1580 /* falls through! */
1586 UPDATE_SYNTAX_CACHE_BACKWARD(from - 1);
1587 c = BUF_FETCH_CHAR(buf, from - 1);
1588 if ((code == Sstring
1590 : SYNTAX_FROM_CACHE(mirrortab,
1593 && !char_quoted(buf, from - 1)) {
1600 if (!depth && sexpflag)
1605 /* shouldnt happen */
1610 /* Reached start of buffer. Error if within object,
1611 return nil if between */
1621 return (make_int(from));
1625 signal_type_error_2(Qscan_error, "Unbalanced parentheses",
1626 make_int(last_good), make_int(from));
1630 int char_quoted(struct buffer *buf, Bufpos pos)
1632 enum syntaxcode code;
1633 Bufpos beg = BUF_BEGV(buf);
1635 Bufpos startpos = pos;
1638 UPDATE_SYNTAX_CACHE_BACKWARD(pos - 1);
1640 SYNTAX_FROM_CACHE(mirrortab, BUF_FETCH_CHAR(buf, pos - 1));
1642 if (code != Scharquote && code != Sescape)
1648 UPDATE_SYNTAX_CACHE(startpos);
1652 DEFUN("scan-lists", Fscan_lists, 3, 5, 0, /*
1653 Scan from character number FROM by COUNT lists.
1654 Returns the character number of the position thus found.
1656 If DEPTH is nonzero, paren depth begins counting from that value,
1657 only places where the depth in parentheses becomes zero
1658 are candidates for stopping; COUNT such places are counted.
1659 Thus, a positive value for DEPTH means go out levels.
1661 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1663 If the beginning or end of (the accessible part of) the buffer is reached
1664 and the depth is wrong, an error is signaled.
1665 If the depth is right but the count is not used up, nil is returned.
1667 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1668 of in the current buffer.
1670 If optional arg NOERROR is non-nil, scan-lists will return nil instead of
1671 signalling an error.
1673 (from, count, depth, buffer, noerror))
1680 buf = decode_buffer(buffer, 0);
1682 return scan_lists(buf, XINT(from), XINT(count), XINT(depth), 0,
1686 DEFUN("scan-sexps", Fscan_sexps, 2, 4, 0, /*
1687 Scan from character number FROM by COUNT balanced expressions.
1688 If COUNT is negative, scan backwards.
1689 Returns the character number of the position thus found.
1691 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
1693 If the beginning or end of (the accessible part of) the buffer is reached
1694 in the middle of a parenthetical grouping, an error is signaled.
1695 If the beginning or end is reached between groupings
1696 but before count is used up, nil is returned.
1698 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead
1699 of in the current buffer.
1701 If optional arg NOERROR is non-nil, scan-sexps will return nil instead of
1702 signalling an error.
1704 (from, count, buffer, noerror))
1706 struct buffer *buf = decode_buffer(buffer, 0);
1710 return scan_lists(buf, XINT(from), XINT(count), 0, 1, !NILP(noerror));
1713 DEFUN("backward-prefix-chars", Fbackward_prefix_chars, 0, 1, 0, /*
1714 Move point backward over any number of chars with prefix syntax.
1715 This includes chars with "quote" or "prefix" syntax (' or p).
1717 Optional arg BUFFER defaults to the current buffer.
1721 struct buffer *buf = decode_buffer(buffer, 0);
1722 Bufpos beg = BUF_BEGV(buf);
1723 Bufpos pos = BUF_PT(buf);
1725 Lisp_Char_Table *mirrortab = XCHAR_TABLE(buf->mirror_syntax_table);
1727 Emchar c = '\0'; /* initialize to avoid compiler warnings */
1729 SCS_STATISTICS_SET_FUNCTION(scs_Fbackward_prefix_characters);
1730 SETUP_SYNTAX_CACHE_FOR_BUFFER(buf, pos, -1);
1732 while (pos > beg && !char_quoted(buf, pos - 1)
1733 /* Previous statement updates syntax table. */
1735 (SYNTAX_FROM_CACHE(mirrortab, c = BUF_FETCH_CHAR(buf, pos - 1))
1737 /* equivalent to SYNTAX_PREFIX (mirrortab, c) */
1738 || SYNTAX_CODE_PREFIX(SYNTAX_CODE_FROM_CACHE(mirrortab, c))))
1741 BUF_SET_PT(buf, pos);
1746 /* Parse forward from FROM to END,
1747 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1748 and return a description of the state of the parse at END.
1749 If STOPBEFORE is nonzero, stop at the start of an atom.
1750 If COMMENTSTOP is 1, stop at the start of a comment; if it is -1,
1751 stop at the start of a comment or a string */
1754 scan_sexps_forward(struct buffer *buf, struct lisp_parse_state *stateptr,
1755 Bufpos from, Bufpos end,
1756 int targetdepth, int stopbefore,
1757 Lisp_Object oldstate, int commentstop)
1759 struct lisp_parse_state state;
1761 enum syntaxcode code;
1765 struct level levelstart[100];
1766 struct level *curlevel = levelstart;
1767 struct level *endlevel = levelstart + 100;
1768 int depth; /* Paren depth of current scanning location.
1769 level - levelstart equals this except
1770 when the depth becomes negative. */
1771 int mindepth; /* Lowest DEPTH value seen. */
1772 int start_quoted = 0; /* Nonzero means starting after a char quote */
1773 int boundary_stop = commentstop == -1;
1776 SCS_STATISTICS_SET_FUNCTION(scs_scan_sexps_forward);
1777 SETUP_SYNTAX_CACHE(from, 1);
1778 if (NILP(oldstate)) {
1780 state.instring = -1;
1781 state.incomment = 0;
1782 state.comstyle = 0; /* comment style a by default */
1783 state.comstr_start = -1; /* no comment/string seen. */
1785 tem = Fcar(oldstate); /* elt 0, depth */
1791 oldstate = Fcdr(oldstate);
1792 oldstate = Fcdr(oldstate);
1793 oldstate = Fcdr(oldstate);
1794 tem = Fcar(oldstate); /* elt 3, instring */
1795 state.instring = (!NILP(tem)
1796 ? (INTP(tem) ? XINT(tem) : ST_STRING_STYLE)
1799 oldstate = Fcdr(oldstate);
1800 tem = Fcar(oldstate); /* elt 4, incomment */
1801 state.incomment = !NILP(tem);
1803 oldstate = Fcdr(oldstate);
1804 tem = Fcar(oldstate); /* elt 5, follows-quote */
1805 start_quoted = !NILP(tem);
1807 /* if the eighth element of the list is nil, we are in comment style
1808 a; if it is t, we are in comment style b; if it is 'syntax-table,
1809 we are in a generic comment */
1810 oldstate = Fcdr(oldstate);
1811 oldstate = Fcdr(oldstate);
1812 /* The code below was changed radically for syntax-table properties.
1813 A reasonable place to look if a bug manifests. */
1814 tem = Fcar(oldstate); /* elt 7, comment style a/b/fence */
1815 state.comstyle = NILP(tem) ? 0 : (EQ(tem, Qsyntax_table)
1816 ? ST_COMMENT_STYLE : 1);
1818 oldstate = Fcdr(oldstate); /* elt 8, start of last comment/string */
1819 tem = Fcar(oldstate);
1820 state.comstr_start = NILP(tem) ? -1 : XINT(tem);
1822 /* elt 9, char numbers of starts-of-expression of levels
1823 (starting from outermost). */
1824 oldstate = Fcdr(oldstate);
1825 tem = Fcar(oldstate); /* elt 9, intermediate data for
1826 continuation of parsing (subject
1828 while (!NILP(tem)) { /* >= second enclosing sexps. */
1829 curlevel->last = XINT(Fcar(tem));
1830 if (++curlevel == endlevel)
1831 error("Nesting too deep for parser");
1832 curlevel->prev = -1;
1833 curlevel->last = -1;
1836 /* end radical change section */
1841 curlevel->prev = -1;
1842 curlevel->last = -1;
1844 /* Enter the loop at a place appropriate for initial state. */
1846 if (state.incomment)
1847 goto startincomment;
1848 if (state.instring >= 0) {
1850 goto startquotedinstring;
1856 while (from < end) {
1862 UPDATE_SYNTAX_CACHE_FORWARD(from);
1863 c = BUF_FETCH_CHAR(buf, from);
1864 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
1865 code = SYNTAX_FROM_CODE(syncode);
1868 /* record the comment style we have entered so that only the
1869 comment-ender sequence (or single char) of the same style
1870 actually terminates the comment section. */
1871 if (code == Scomment) {
1872 state.comstyle = SYNTAX_CODE_COMMENT_1CHAR_MASK(syncode)
1873 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1874 state.comstr_start = from - 1;
1877 /* a generic comment delimiter? */
1878 else if (code == Scomment_fence) {
1879 state.comstyle = ST_COMMENT_STYLE;
1880 state.comstr_start = from - 1;
1884 else if (from < end && SYNTAX_CODE_START_FIRST_P(syncode)) {
1886 UPDATE_SYNTAX_CACHE_FORWARD(from);
1888 SYNTAX_CODE_FROM_CACHE(mirrortab,
1889 BUF_FETCH_CHAR(buf, from));
1891 if (SYNTAX_CODES_START_P(syncode, next_syncode)) {
1894 SYNTAX_CODES_COMMENT_MASK_START(syncode,
1896 == SYNTAX_COMMENT_STYLE_A ? 0 : 1;
1897 state.comstr_start = from - 1;
1899 UPDATE_SYNTAX_CACHE_FORWARD(from);
1903 if (SYNTAX_CODE_PREFIX(syncode))
1909 goto stop; /* this arg means stop at sexp start */
1910 curlevel->last = from - 1;
1916 /* treat following character as a word constituent */
1920 goto stop; /* this arg means stop at sexp start */
1921 curlevel->last = from - 1;
1923 while (from < end) {
1924 UPDATE_SYNTAX_CACHE_FORWARD(from);
1925 switch ((unsigned int)
1928 BUF_FETCH_CHAR(buf, from))) {
1945 curlevel->prev = curlevel->last;
1949 state.incomment = 1;
1950 if (commentstop || boundary_stop)
1953 if (commentstop == 1)
1955 UPDATE_SYNTAX_CACHE_FORWARD(from);
1958 find_end_of_comment(buf, from, end,
1961 /* we terminated search because from == end */
1967 state.incomment = 0;
1968 state.comstyle = 0; /* reset the comment style */
1975 goto stop; /* this arg means stop at sexp start */
1977 /* curlevel++->last ran into compiler bug on Apollo */
1978 curlevel->last = from - 1;
1979 if (++curlevel == endlevel)
1980 error("Nesting too deep for parser");
1981 curlevel->prev = -1;
1982 curlevel->last = -1;
1983 if (targetdepth == depth)
1989 if (depth < mindepth)
1991 if (curlevel != levelstart)
1993 curlevel->prev = curlevel->last;
1994 if (targetdepth == depth)
2000 state.comstr_start = from - 1;
2002 goto stop; /* this arg means stop at sexp start */
2003 curlevel->last = from - 1;
2004 if (code == Sstring_fence) {
2005 state.instring = ST_STRING_STYLE;
2007 /* XEmacs change: call syntax_match() on character */
2008 Emchar ch = BUF_FETCH_CHAR(buf, from - 1);
2009 Lisp_Object stermobj =
2010 syntax_match(syntax_cache.
2011 current_syntax_table, ch);
2013 if (CHARP(stermobj))
2014 state.instring = XCHAR(stermobj);
2016 state.instring = ch;
2022 enum syntaxcode temp_code;
2027 UPDATE_SYNTAX_CACHE_FORWARD(from);
2028 c = BUF_FETCH_CHAR(buf, from);
2029 temp_code = SYNTAX_FROM_CACHE(mirrortab, c);
2031 if (state.instring != ST_STRING_STYLE &&
2032 temp_code == Sstring && c == state.instring)
2035 switch ((unsigned int)temp_code) {
2037 if (state.instring == ST_STRING_STYLE)
2044 startquotedinstring:
2055 state.instring = -1;
2056 curlevel->prev = curlevel->last;
2069 case Scomment_fence:
2079 stop: /* Here if stopping before start of sexp. */
2080 from--; /* We have just fetched the char that starts it; */
2081 goto done; /* but return the position before it. */
2086 state.depth = depth;
2087 state.mindepth = mindepth;
2088 state.thislevelstart = curlevel->prev;
2089 state.prevlevelstart
2090 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2091 state.location = from;
2092 state.levelstarts = Qnil;
2093 while (--curlevel >= levelstart)
2094 state.levelstarts = Fcons(make_int(curlevel->last),
2100 DEFUN("parse-partial-sexp", Fparse_partial_sexp, 2, 7, 0, /*
2101 Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
2102 Parsing stops at TO or when certain criteria are met;
2103 point is set to where parsing stops.
2104 If fifth arg OLDSTATE is omitted or nil,
2105 parsing assumes that FROM is the beginning of a function.
2106 Value is a list of nine elements describing final state of parsing:
2108 1. character address of start of innermost containing list; nil if none.
2109 2. character address of start of last complete sexp terminated.
2110 3. non-nil if inside a string.
2111 (It is the character that will terminate the string,
2112 or t if the string should be terminated by an explicit
2113 `syntax-table' property.)
2114 4. t if inside a comment.
2115 5. t if following a quote character.
2116 6. the minimum paren-depth encountered during this scan.
2117 7. nil if in comment style a, or not in a comment; t if in comment style b;
2118 `syntax-table' if given by an explicit `syntax-table' property.
2119 8. character address of start of last comment or string; nil if none.
2120 9. Intermediate data for continuation of parsing (subject to change).
2121 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
2122 in parentheses becomes equal to TARGETDEPTH.
2123 Fourth arg STOPBEFORE non-nil means stop when come to
2124 any character that starts a sexp.
2125 Fifth arg OLDSTATE is a nine-element list like what this function returns.
2126 It is used to initialize the state of the parse. Its second and third
2127 elements are ignored.
2128 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment. If it
2129 is `syntax-table', stop after the start of a comment or a string, or after
2130 the end of a comment or string.
2132 (from, to, targetdepth, stopbefore, oldstate, commentstop, buffer))
2134 struct lisp_parse_state state;
2137 struct buffer *buf = decode_buffer(buffer, 0);
2140 if (!NILP(targetdepth)) {
2141 CHECK_INT(targetdepth);
2142 target = XINT(targetdepth);
2144 target = -100000; /* We won't reach this depth */
2146 get_buffer_range_char(buf, from, to, &start, &end, 0);
2147 scan_sexps_forward(buf, &state, start, end,
2148 target, !NILP(stopbefore), oldstate,
2150 ? 0 : (EQ(commentstop, Qsyntax_table) ? -1 : 1)));
2151 BUF_SET_PT(buf, state.location);
2155 val = Fcons(state.levelstarts, val);
2156 val = Fcons((state.incomment || (state.instring >= 0))
2157 ? make_int(state.comstr_start) : Qnil, val);
2158 val = Fcons(state.comstyle ? (state.comstyle == ST_COMMENT_STYLE
2159 ? Qsyntax_table : Qt) : Qnil, val);
2160 val = Fcons(make_int(state.mindepth), val);
2161 val = Fcons(state.quoted ? Qt : Qnil, val);
2162 val = Fcons(state.incomment ? Qt : Qnil, val);
2163 val = Fcons(state.instring < 0
2165 : (state.instring == ST_STRING_STYLE
2166 ? Qt : make_int(state.instring)), val);
2168 Fcons(state.thislevelstart <
2169 0 ? Qnil : make_int(state.thislevelstart), val);
2171 Fcons(state.prevlevelstart <
2172 0 ? Qnil : make_int(state.prevlevelstart), val);
2173 val = Fcons(make_int(state.depth), val);
2178 /* Updating of the mirror syntax table.
2180 Each syntax table has a corresponding mirror table in it.
2181 Whenever we make a change to a syntax table, we call
2182 update_syntax_table() on it.
2184 #### We really only need to map over the changed range.
2186 If we change the standard syntax table, we need to map over
2187 all tables because any of them could be inheriting from the
2188 standard syntax table.
2190 When `set-syntax-table' is called, we set the buffer's mirror
2191 syntax table as well.
2195 Lisp_Object mirrortab;
2199 static int cmst_mapfun(struct chartab_range *range, Lisp_Object val, void *arg)
2201 struct cmst_arg *closure = (struct cmst_arg *)arg;
2205 if (SYNTAX_FROM_CODE(XINT(val)) == Sinherit && closure->check_inherit) {
2206 struct cmst_arg recursive;
2208 recursive.mirrortab = closure->mirrortab;
2209 recursive.check_inherit = 0;
2210 map_char_table(XCHAR_TABLE(Vstandard_syntax_table), range,
2211 cmst_mapfun, &recursive);
2213 put_char_table(XCHAR_TABLE(closure->mirrortab), range, val);
2217 static void update_just_this_syntax_table(Lisp_Char_Table * ct)
2219 struct chartab_range range;
2220 struct cmst_arg arg;
2222 arg.mirrortab = ct->mirror_table;
2223 arg.check_inherit = (CHAR_TABLEP(Vstandard_syntax_table)
2224 && ct != XCHAR_TABLE(Vstandard_syntax_table));
2225 range.type = CHARTAB_RANGE_ALL;
2226 map_char_table(ct, &range, cmst_mapfun, &arg);
2229 /* Called from chartab.c when a change is made to a syntax table.
2230 If this is the standard syntax table, we need to recompute
2231 *all* syntax tables (yuck). Otherwise we just recompute this
2234 void update_syntax_table(Lisp_Char_Table * ct)
2236 /* Don't be stymied at startup. */
2237 if (CHAR_TABLEP(Vstandard_syntax_table)
2238 && ct == XCHAR_TABLE(Vstandard_syntax_table)) {
2241 for (syntab = Vall_syntax_tables; !NILP(syntab);
2242 syntab = XCHAR_TABLE(syntab)->next_table)
2243 update_just_this_syntax_table(XCHAR_TABLE(syntab));
2245 update_just_this_syntax_table(ct);
2248 /************************************************************************/
2249 /* initialization */
2250 /************************************************************************/
2252 void syms_of_syntax(void)
2254 defsymbol(&Qsyntax_table_p, "syntax-table-p");
2255 defsymbol(&Qsyntax_table, "syntax-table");
2257 DEFSUBR(Fsyntax_table_p);
2258 DEFSUBR(Fsyntax_table);
2259 DEFSUBR(Fstandard_syntax_table);
2260 DEFSUBR(Fcopy_syntax_table);
2261 DEFSUBR(Fset_syntax_table);
2262 DEFSUBR(Fsyntax_designator_chars);
2263 DEFSUBR(Fchar_syntax);
2264 DEFSUBR(Fmatching_paren);
2265 /* DEFSUBR (Fmodify_syntax_entry); now in Lisp. */
2266 /* DEFSUBR (Fdescribe_syntax); now in Lisp. */
2268 DEFSUBR(Fforward_word);
2270 DEFSUBR(Fforward_comment);
2271 DEFSUBR(Fscan_lists);
2272 DEFSUBR(Fscan_sexps);
2273 DEFSUBR(Fbackward_prefix_chars);
2274 DEFSUBR(Fparse_partial_sexp);
2276 DEFERROR_STANDARD(Qscan_error, Qsyntax_error);
2279 void vars_of_syntax(void)
2281 DEFVAR_BOOL("parse-sexp-ignore-comments", &parse_sexp_ignore_comments /*
2282 Non-nil means `forward-sexp', etc., should treat comments as whitespace.
2284 parse_sexp_ignore_comments = 0;
2286 DEFVAR_BOOL("lookup-syntax-properties", &lookup_syntax_properties /*
2287 Non-nil means `forward-sexp', etc., look up character syntax in the
2288 table that is the value of the `syntax-table' text property, if non-nil.
2289 The value of this property should be either a syntax table, or a cons
2290 of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric
2291 syntax code, MATCHCHAR being nil or the character to match (which is
2292 relevant only for open/close type.
2294 lookup_syntax_properties = 0; /* #### default off until optimized */
2296 DEFVAR_BOOL("words-include-escapes", &words_include_escapes /*
2297 Non-nil means `forward-word', etc., should treat escape chars part of words.
2299 words_include_escapes = 0;
2301 no_quit_in_re_search = 0;
2304 static void define_standard_syntax(const char *p, enum syntaxcode syn)
2307 Fput_char_table(make_char(*p), make_int(syn),
2308 Vstandard_syntax_table);
2311 void complex_vars_of_syntax(void)
2315 /* Set this now, so first buffer creation can refer to it. */
2316 /* Make it nil before calling copy-syntax-table
2317 so that copy-syntax-table will know not to try to copy from garbage */
2318 Vstandard_syntax_table = Qnil;
2319 Vstandard_syntax_table = Fcopy_syntax_table(Qnil);
2320 staticpro(&Vstandard_syntax_table);
2322 Vsyntax_designator_chars_string = make_string_nocopy(syntax_code_spec,
2324 staticpro(&Vsyntax_designator_chars_string);
2326 fill_char_table(XCHAR_TABLE(Vstandard_syntax_table), make_int(Spunct));
2328 for (i = 0; i <= 32; i++) /* Control 0 plus SPACE */
2329 Fput_char_table(make_char(i), make_int(Swhitespace),
2330 Vstandard_syntax_table);
2331 for (i = 127; i <= 159; i++) /* DEL plus Control 1 */
2332 Fput_char_table(make_char(i), make_int(Swhitespace),
2333 Vstandard_syntax_table);
2335 define_standard_syntax("abcdefghijklmnopqrstuvwxyz"
2336 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2337 "0123456789" "$%", Sword);
2338 define_standard_syntax("\"", Sstring);
2339 define_standard_syntax("\\", Sescape);
2340 define_standard_syntax("_-+*/&|<>=", Ssymbol);
2341 define_standard_syntax(".,;:?!#@~^'`", Spunct);
2343 for (p = "()[]{}"; *p; p += 2) {
2344 Fput_char_table(make_char(p[0]),
2345 Fcons(make_int(Sopen), make_char(p[1])),
2346 Vstandard_syntax_table);
2347 Fput_char_table(make_char(p[1]),
2348 Fcons(make_int(Sclose), make_char(p[0])),
2349 Vstandard_syntax_table);