1 /* Routines to compute the current syntactic context, for font-lock mode.
2 Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of SXEmacs
7 SXEmacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 SXEmacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 /* Synched up with: Not in FSF. */
23 /* This code computes the syntactic context of the current point, that is,
24 whether point is within a comment, a string, what have you. It does
25 this by picking a point "known" to be outside of any syntactic constructs
26 and moving forward, examining the syntax of each character.
28 Two caches are used: one caches the last point computed, and the other
29 caches the last point at the beginning of a line. This makes there
30 be little penalty for moving left-to-right on a line a character at a
31 time; makes starting over on a line be cheap; and makes random-accessing
32 within a line relatively cheap.
34 When we move to a different line farther down in the file (but within the
35 current top-level form) we simply continue computing forward. If we move
36 backward more than a line, or move beyond the end of the current tlf, or
37 switch buffers, then we call `beginning-of-defun' and start over from
40 #### We should really rewrite this to keep extents over the buffer
41 that hold the current syntactic information. This would be a big win.
42 This way there would be no guessing or incorrect results.
53 Lisp_Object Qblock_comment;
54 Lisp_Object Qbeginning_of_defun;
56 enum syntactic_context {
60 context_block_comment,
61 context_generic_comment,
62 context_generic_string
65 enum block_comment_context {
78 struct context_cache {
79 Bufpos start_point; /* beginning of defun */
80 Bufpos cur_point; /* cache location */
81 Bufpos end_point; /* end of defun */
82 struct buffer *buffer; /* does this need to be staticpro'd? */
83 enum syntactic_context context; /* single-char-syntax state */
84 enum block_comment_context ccontext; /* block-comment state */
85 enum comment_style style; /* which comment group */
86 Emchar scontext; /* active string delimiter */
87 int depth; /* depth in parens */
88 int backslash_p; /* just read a backslash */
89 int needs_its_head_reexamined; /* we're apparently outside of
90 a top level form, and far away
91 from it. This is a bad situation
92 because it will lead to constant
93 slowness as we keep going way
94 back to that form and moving
95 forward again. In this case,
96 we try to compute a "pseudo-
97 top-level-form" where the
98 depth is 0 and the context
99 is none at both ends. */
102 /* We have two caches; one for the current point and one for
103 the beginning of line. We used to rely on the caller to
104 tell us when to invalidate them, but now we do it ourselves;
105 it lets us be smarter. */
107 static struct context_cache context_cache;
109 static struct context_cache bol_context_cache;
113 #define reset_context_cache(cc) memset (cc, 0, sizeof (struct context_cache))
115 /* This function is called from signal_after_change() to tell us when
116 textual changes are made so we can flush our caches when necessary.
118 We make the following somewhat heuristic assumptions:
120 (remember that current_point is always >= start_point, but may be
121 less than or greater than end_point (we might not be inside any
124 1) Textual changes before the beginning of the current top-level form
125 don't affect anything; all we need to do is offset the caches
127 2) Textual changes right at the beginning of the current
128 top-level form messes things up and requires that we flush
130 3) Textual changes after the beginning of the current top-level form
131 and before one or both or the caches invalidates the corresponding
133 4) Textual changes after the caches and before the end of the
134 current top-level form don't affect anything; all we need to do is
135 offset the caches appropriately.
136 5) Textual changes right at the end of the current top-level form
137 necessitate recomputing that end value.
138 6) Textual changes after the end of the current top-level form
142 font_lock_maybe_update_syntactic_caches(struct buffer *buf, Bufpos start,
143 Bufpos orig_end, Bufpos new_end)
145 /* Note: either both context_cache and bol_context_cache are valid and
146 point to the same buffer, or both are invalid. If we have to
147 invalidate just context_cache, we recopy it from bol_context_cache.
149 if (context_cache.buffer != buf)
150 /* caches don't apply */
152 /* NOTE: The order of the if statements below is important. If you
153 change them around unthinkingly, you will probably break something. */
154 if (orig_end <= context_cache.start_point - 1) {
155 /* case 1: before the beginning of the current top-level form */
156 Charcount diff = new_end - orig_end;
158 stderr_out("font-lock; Case 1\n");
159 context_cache.start_point += diff;
160 context_cache.cur_point += diff;
161 context_cache.end_point += diff;
162 bol_context_cache.start_point += diff;
163 bol_context_cache.cur_point += diff;
164 bol_context_cache.end_point += diff;
165 } else if (start <= context_cache.start_point) {
167 stderr_out("font-lock; Case 2\n");
168 /* case 2: right at the current top-level form (paren that starts
169 top level form got deleted or moved away from the newline it
171 reset_context_cache(&context_cache);
172 reset_context_cache(&bol_context_cache);
174 /* OK, now we know that the start is after the beginning of the
175 current top-level form. */
176 else if (start < bol_context_cache.cur_point) {
178 stderr_out("font-lock; Case 3 (1)\n");
179 /* case 3: after the beginning of the current top-level form
180 and before both of the caches */
181 reset_context_cache(&context_cache);
182 reset_context_cache(&bol_context_cache);
183 } else if (start < context_cache.cur_point) {
185 stderr_out("font-lock; Case 3 (2)\n");
186 /* case 3: but only need to invalidate one cache */
187 context_cache = bol_context_cache;
189 /* OK, now we know that the start is after the caches. */
190 else if (start >= context_cache.end_point) {
192 stderr_out("font-lock; Case 6\n");
193 /* case 6: after the end of the current top-level form
194 and after the caches. */
195 } else if (orig_end <= context_cache.end_point - 2) {
196 /* case 4: after the caches and before the end of the
197 current top-level form */
198 Charcount diff = new_end - orig_end;
200 stderr_out("font-lock; Case 4\n");
201 context_cache.end_point += diff;
202 bol_context_cache.end_point += diff;
205 stderr_out("font-lock; Case 5\n");
206 /* case 5: right at the end of the current top-level form */
207 context_cache.end_point = context_cache.start_point - 1;
208 bol_context_cache.end_point = context_cache.start_point - 1;
212 /* This function is called from Fkill_buffer(). */
214 void font_lock_buffer_was_killed(struct buffer *buf)
216 if (context_cache.buffer == buf) {
217 reset_context_cache(&context_cache);
218 reset_context_cache(&bol_context_cache);
222 static Bufpos beginning_of_defun(struct buffer *buf, Bufpos pt)
224 /* This function can GC */
225 Bufpos opt = BUF_PT(buf);
226 if (pt == BUF_BEGV(buf))
229 /* There used to be some kludginess to call c++-beginning-of-defun
230 if we're in C++ mode. There's no point in this any more;
231 we're using cc-mode. If you really want to get the old c++
232 mode working, fix it rather than the C code. */
233 call0_in_buffer(buf, Qbeginning_of_defun);
235 BUF_SET_PT(buf, opt);
239 static Bufpos end_of_defun(struct buffer *buf, Bufpos pt)
241 Lisp_Object retval = scan_lists(buf, pt, 1, 0, 0, 1);
248 /* Set up context_cache for attempting to determine the syntactic context
249 in buffer BUF at point PT. */
251 static void setup_context_cache(struct buffer *buf, Bufpos pt)
253 int recomputed_start_point = 0;
254 /* This function can GC */
255 if (context_cache.buffer != buf || pt < context_cache.start_point) {
258 stderr_out("reset context cache\n");
259 /* OK, completely invalid. */
260 reset_context_cache(&context_cache);
261 reset_context_cache(&bol_context_cache);
263 if (!context_cache.buffer) {
264 /* Need to recompute the start point. */
266 stderr_out("recompute start\n");
267 context_cache.start_point = beginning_of_defun(buf, pt);
268 recomputed_start_point = 1;
269 bol_context_cache.start_point = context_cache.start_point;
270 bol_context_cache.buffer = context_cache.buffer = buf;
272 if (context_cache.end_point < context_cache.start_point) {
273 /* Need to recompute the end point. */
275 stderr_out("recompute end\n");
276 context_cache.end_point =
277 end_of_defun(buf, context_cache.start_point);
278 bol_context_cache.end_point = context_cache.end_point;
280 if (bol_context_cache.cur_point == 0 ||
281 pt < bol_context_cache.cur_point) {
283 stderr_out("reset to start\n");
284 if (pt > context_cache.end_point
285 /* 3000 is some arbitrary delta but seems reasonable;
286 about the size of a reasonable function */
287 && pt - context_cache.end_point > 3000)
288 /* If we're far past the end of the top level form,
289 don't trust it; recompute it. */
291 /* But don't get in an infinite loop doing this.
292 If we're really far past the end of the top level
293 form, try to compute a pseudo-top-level form. */
294 if (recomputed_start_point)
295 context_cache.needs_its_head_reexamined = 1;
297 /* force recomputation */
300 /* Go to the nearest end of the top-level form that's before
302 if (pt > context_cache.end_point)
303 pt = context_cache.end_point;
305 pt = context_cache.start_point;
306 /* Reset current point to start of buffer. */
307 context_cache.cur_point = pt;
308 context_cache.context = context_none;
309 context_cache.ccontext = ccontext_none;
310 context_cache.style = comment_style_none;
311 context_cache.scontext = '\000';
312 context_cache.depth = 0;
313 /* #### shouldn't this be checking the character's syntax instead of
314 explicitly testing for backslash characters? */
315 context_cache.backslash_p = ((pt > 1) &&
316 (BUF_FETCH_CHAR(buf, pt - 1) ==
318 /* Note that the BOL context cache may not be at the beginning
319 of the line, but that should be OK, nobody's checking. */
320 bol_context_cache = context_cache;
322 } else if (pt < context_cache.cur_point) {
324 stderr_out("reset to bol\n");
325 /* bol cache is OK but current_cache is not. */
326 context_cache = bol_context_cache;
328 } else if (pt <= context_cache.end_point) {
330 stderr_out("everything is OK\n");
331 /* in same top-level form. */
335 /* OK, we're past the end of the top-level form. */
337 max(context_cache.end_point, context_cache.cur_point);
343 stderr_out("past end\n");
348 /* This appears to cause huge slowdowns in files which have no
351 In any case, it's not really necessary that we know for
352 sure the top-level form we're in; if we're in a form
353 but the form we have recorded is the previous one,
356 scan_buffer(buf, '\n', maxpt, pt, 1, &shortage, 1);
358 /* If there was a newline in the region past the known universe,
359 we might be inside another top-level form, so start over.
360 Otherwise, we're outside of any top-level forms and we know
361 the one directly before us, so it's OK. */
367 /* GCC 2.95.4 seems to need the casts */
368 #define SYNTAX_START_STYLE(c1, c2) \
369 ((enum comment_style) \
370 (SYNTAX_CODES_MATCH_START_P (c1, c2, SYNTAX_COMMENT_STYLE_A) ? \
372 SYNTAX_CODES_MATCH_START_P (c1, c2, SYNTAX_COMMENT_STYLE_B) ? \
376 #define SYNTAX_END_STYLE(c1, c2) \
377 ((enum comment_style) \
378 (SYNTAX_CODES_MATCH_END_P (c1, c2, SYNTAX_COMMENT_STYLE_A) ? \
380 SYNTAX_CODES_MATCH_END_P (c1, c2, SYNTAX_COMMENT_STYLE_B) ? \
384 #define SINGLE_SYNTAX_STYLE(c) \
385 ((enum comment_style) \
386 (SYNTAX_CODE_MATCHES_1CHAR_P (c, SYNTAX_COMMENT_STYLE_A) ? \
388 SYNTAX_CODE_MATCHES_1CHAR_P (c, SYNTAX_COMMENT_STYLE_B) ? \
392 /* Set up context_cache for position PT in BUF. */
394 static void find_context(struct buffer *buf, Bufpos pt)
396 /* This function can GC */
398 Lisp_Char_Table *mirrortab = XCHAR_TABLE(buf->mirror_syntax_table);
399 Lisp_Object syntaxtab = buf->syntax_table;
402 int prev_syncode, syncode;
404 setup_context_cache(buf, pt);
405 pt = context_cache.cur_point;
407 SCS_STATISTICS_SET_FUNCTION(scs_find_context);
408 SETUP_SYNTAX_CACHE(pt - 1, 1);
409 if (pt > BUF_BEGV(buf)) {
410 c = BUF_FETCH_CHAR(buf, pt - 1);
411 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
413 c = '\n'; /* to get bol_context_cache at point-min */
414 syncode = Swhitespace;
417 for (; pt < target; pt++, context_cache.cur_point = pt) {
418 if (context_cache.needs_its_head_reexamined) {
419 if (context_cache.depth == 0
420 && context_cache.context == context_none) {
421 /* We've found an anchor spot.
422 Try to put the start of defun within 6000 chars of
423 the target, and the end of defun as close as possible.
424 6000 is also arbitrary but tries to strike a balance
425 between two conflicting pulls when dealing with a
426 file that has lots of stuff sitting outside of a top-
429 a) If you move past the start of defun, you will
430 have to recompute defun, which in this case
431 means that start of defun goes all the way back
432 to the beginning of the file; so you want
433 to set start of defun a ways back from the
435 b) If you move a line backwards but within start of
436 defun, you have to move back to start of defun;
437 so you don't want start of defun too far from
440 if (target - context_cache.start_point > 6000)
441 context_cache.start_point = pt;
442 context_cache.end_point = pt;
443 bol_context_cache = context_cache;
447 UPDATE_SYNTAX_CACHE_FORWARD(pt);
449 prev_syncode = syncode;
450 c = BUF_FETCH_CHAR(buf, pt);
451 syncode = SYNTAX_CODE_FROM_CACHE(mirrortab, c);
454 bol_context_cache = context_cache;
456 if (context_cache.backslash_p) {
457 context_cache.backslash_p = 0;
461 switch (SYNTAX_FROM_CACHE(mirrortab, c)) {
463 context_cache.backslash_p = 1;
467 if (context_cache.context == context_none)
468 context_cache.depth++;
472 if (context_cache.context == context_none)
473 context_cache.depth--;
477 if (context_cache.context == context_none) {
478 context_cache.context = context_comment;
479 context_cache.ccontext = ccontext_none;
480 context_cache.style =
481 SINGLE_SYNTAX_STYLE(syncode);
482 if (context_cache.style == comment_style_none)
488 if (context_cache.style !=
489 SINGLE_SYNTAX_STYLE(syncode)) ;
490 else if (context_cache.context == context_comment) {
491 context_cache.context = context_none;
492 context_cache.style = comment_style_none;
493 } else if (context_cache.context ==
494 context_block_comment
495 && (context_cache.ccontext == ccontext_start2
496 || context_cache.ccontext ==
498 context_cache.context = context_none;
499 context_cache.ccontext = ccontext_none;
500 context_cache.style = comment_style_none;
506 if (context_cache.context == context_string &&
507 context_cache.scontext == c) {
508 context_cache.context = context_none;
509 context_cache.scontext = '\000';
510 } else if (context_cache.context ==
512 Lisp_Object stringtermobj =
513 syntax_match(syntax_cache.
514 current_syntax_table,
518 if (CHARP(stringtermobj))
520 XCHAR(stringtermobj);
523 context_cache.context = context_string;
524 context_cache.scontext = stringterm;
525 context_cache.ccontext = ccontext_none;
532 if (context_cache.context ==
533 context_generic_comment) {
534 context_cache.context = context_none;
535 } else if (context_cache.context ==
537 context_cache.context =
538 context_generic_comment;
539 context_cache.ccontext = ccontext_none;
546 if (context_cache.context ==
547 context_generic_string) {
548 context_cache.context = context_none;
549 } else if (context_cache.context ==
551 context_cache.context =
552 context_generic_string;
553 context_cache.ccontext = ccontext_none;
571 /* That takes care of the characters with manifest syntax.
572 Now we've got to hack multi-char sequences that start
573 and end block comments.
575 if ((SYNTAX_CODE_COMMENT_BITS(syncode) & SYNTAX_SECOND_CHAR_START) && context_cache.context == context_none && context_cache.ccontext == ccontext_start1 && SYNTAX_CODES_START_P(prev_syncode, syncode) /* the two chars match */
577 context_cache.ccontext = ccontext_start2;
578 context_cache.style =
579 SYNTAX_START_STYLE(prev_syncode, syncode);
580 if (context_cache.style == comment_style_none)
582 } else if ((SYNTAX_CODE_COMMENT_BITS(syncode) &
583 SYNTAX_FIRST_CHAR_START) &&
584 context_cache.context == context_none &&
585 (context_cache.ccontext == ccontext_none ||
586 context_cache.ccontext == ccontext_start1)) {
587 context_cache.ccontext = ccontext_start1;
588 context_cache.style = comment_style_none; /* should be this already */
589 } else if ((SYNTAX_CODE_COMMENT_BITS(syncode) &
590 SYNTAX_SECOND_CHAR_END) &&
591 context_cache.context == context_block_comment &&
592 context_cache.ccontext == ccontext_end1 &&
593 SYNTAX_CODES_END_P(prev_syncode, syncode) &&
594 /* the two chars match */
595 context_cache.style ==
596 SYNTAX_END_STYLE(prev_syncode, syncode)
598 context_cache.context = context_none;
599 context_cache.ccontext = ccontext_none;
600 context_cache.style = comment_style_none;
601 } else if ((SYNTAX_CODE_COMMENT_BITS(syncode) &
602 SYNTAX_FIRST_CHAR_END) &&
603 context_cache.context == context_block_comment &&
605 /* #### pre-Matt code had: */
606 (context_cache.style ==
607 SYNTAX_END_STYLE(c, BUF_FETCH_CHAR(buf, pt + 1))) &&
608 /* why do these differ here?! */
610 context_cache.style == SINGLE_SYNTAX_STYLE(syncode)
611 && (context_cache.ccontext == ccontext_start2
612 || context_cache.ccontext == ccontext_end1))
613 /* check end1, to detect a repetition of the first char of a
614 comment-end sequence. ie, '/xxx foo xxx/' or '/xxx foo x/',
615 where 'x' = '*' -- mct */
617 if (context_cache.style == comment_style_none)
619 context_cache.ccontext = ccontext_end1;
622 else if (context_cache.ccontext == ccontext_start1) {
623 if (context_cache.context != context_none)
625 context_cache.ccontext = ccontext_none;
626 } else if (context_cache.ccontext == ccontext_end1) {
627 if (context_cache.context != context_block_comment)
629 context_cache.context = context_none;
630 context_cache.ccontext = ccontext_start2;
633 if (context_cache.ccontext == ccontext_start2 &&
634 context_cache.context == context_none) {
635 context_cache.context = context_block_comment;
636 if (context_cache.style == comment_style_none)
638 } else if (context_cache.ccontext == ccontext_none &&
639 context_cache.context == context_block_comment) {
640 context_cache.context = context_none;
644 context_cache.needs_its_head_reexamined = 0;
647 static Lisp_Object context_to_symbol(enum syntactic_context context)
654 case context_comment:
656 case context_block_comment:
657 return Qblock_comment;
658 case context_generic_comment:
659 return Qblock_comment;
660 case context_generic_string:
664 return Qnil; /* suppress compiler warning */
668 DEFUN("buffer-syntactic-context", Fbuffer_syntactic_context, 0, 1, 0, /*
669 Return the syntactic context of BUFFER at point.
670 If BUFFER is nil or omitted, the current buffer is assumed.
671 The returned value is one of the following symbols:
673 nil ; meaning no special interpretation
674 string ; meaning point is within a string
675 comment ; meaning point is within a line comment
676 block-comment ; meaning point is within a block comment
678 See also the function `buffer-syntactic-context-depth', which returns
679 the current nesting-depth within all parenthesis-syntax delimiters
680 and the function `syntactically-sectionize', which will map a function
681 over each syntactic context in a region.
683 WARNING: this may alter match-data.
687 /* This function can GC */
688 struct buffer *buf = decode_buffer(buffer, 0);
689 find_context(buf, BUF_PT(buf));
690 return context_to_symbol(context_cache.context);
693 DEFUN("buffer-syntactic-context-depth", Fbuffer_syntactic_context_depth, 0, 1, 0, /*
694 Return the depth within all parenthesis-syntax delimiters at point.
695 If BUFFER is nil or omitted, the current buffer is assumed.
696 WARNING: this may alter match-data.
700 /* This function can GC */
701 struct buffer *buf = decode_buffer(buffer, 0);
702 find_context(buf, BUF_PT(buf));
703 return make_int(context_cache.depth);
706 DEFUN("syntactically-sectionize", Fsyntactically_sectionize, 3, 4, 0, /*
707 Call FUNCTION for each contiguous syntactic context in the region.
708 Call the given function with four arguments: the start and end of the
709 region, a symbol representing the syntactic context, and the current
710 depth (as returned by the functions `buffer-syntactic-context' and
711 `buffer-syntactic-context-depth'). When this function is called, the
712 current buffer will be set to BUFFER.
714 WARNING: this may alter match-data.
716 (function, start, end, buffer))
718 /* This function can GC */
721 enum syntactic_context this_context;
722 struct buffer *buf = decode_buffer(buffer, 0);
724 get_buffer_range_char(buf, start, end, &s, &e, 0);
727 find_context(buf, pt);
731 /* skip over "blank" areas, and bug out at end-of-buffer. */
732 while (context_cache.context == context_none) {
736 find_context(buf, pt);
738 /* We've found a non-blank area; keep going until we reach its end */
739 this_context = context_cache.context;
742 /* Minor kludge: consider the comment-start character(s) a part of
745 if (this_context == context_block_comment &&
746 context_cache.ccontext == ccontext_start2)
748 else if (this_context == context_comment
749 || this_context == context_generic_comment)
752 edepth = context_cache.depth;
753 while (context_cache.context == this_context && pt < e) {
755 find_context(buf, pt);
760 /* Minor kludge: consider the character which terminated the comment
761 a part of the comment.
763 if ((this_context == context_block_comment ||
764 this_context == context_comment
765 || this_context == context_generic_comment)
771 /* Make sure not to pass in values that are outside the
772 actual bounds of this function. */
773 call4_in_buffer(buf, function, make_int(max(s, estart)),
774 make_int(eend == e ? e : eend - 1),
775 context_to_symbol(this_context),
782 void syms_of_font_lock(void)
784 defsymbol(&Qcomment, "comment");
785 defsymbol(&Qblock_comment, "block-comment");
786 defsymbol(&Qbeginning_of_defun, "beginning-of-defun");
788 DEFSUBR(Fbuffer_syntactic_context);
789 DEFSUBR(Fbuffer_syntactic_context_depth);
790 DEFSUBR(Fsyntactically_sectionize);
793 void reinit_vars_of_font_lock(void)
795 xzero(context_cache);
796 xzero(bol_context_cache);
799 void vars_of_font_lock(void)
801 reinit_vars_of_font_lock();