1 ;;; haskell-font-lock.el --- Font locking module for Haskell Mode
3 ;; Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
4 ;; Copyright 1997-1998 Graeme E Moss, and Tommy Thorn
6 ;; Authors: 1997-1998 Graeme E Moss <gem@cs.york.ac.uk> and
7 ;; Tommy Thorn <thorn@irisa.fr>
8 ;; 2003 Dave Love <fx@gnu.org>
9 ;; Keywords: faces files Haskell
11 ;; This file is not part of GNU Emacs.
13 ;; This file is free software; you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation; either version 2, or (at your option)
18 ;; This file is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs; see the file COPYING. If not, write to the
25 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
26 ;; Boston, MA 02111-1307, USA.
33 ;; To support fontification of standard Haskell keywords, symbols,
34 ;; functions, etc. Supports full Haskell 1.4 as well as LaTeX- and
35 ;; Bird-style literate scripts.
39 ;; To turn font locking on for all Haskell buffers under the Haskell
40 ;; mode of Moss&Thorn, add this to .emacs:
42 ;; (add-hook 'haskell-mode-hook 'turn-on-haskell-font-lock)
44 ;; Otherwise, call `turn-on-haskell-font-lock'.
49 ;; The colours and level of font locking may be customised. See the
50 ;; documentation on `turn-on-haskell-font-lock' for more details.
55 ;; If you have any problems or suggestions, after consulting the list
56 ;; below, email gem@cs.york.ac.uk and thorn@irisa.fr quoting the
57 ;; version of the mode you are using, the version of Emacs you are
58 ;; using, and a small example of the problem or suggestion. Note that
59 ;; this module requires a reasonably recent version of Emacs. It
60 ;; requires Emacs 21 to cope with Unicode characters and to do proper
61 ;; syntactic fontification.
65 ;; Support for proper behaviour (including with Unicode identifiers)
66 ;; in Emacs 21 only hacked in messily to avoid disturbing the old
67 ;; stuff. Needs integrating more cleanly. Allow literate comment
68 ;; face to be customized. Some support for fontifying definitions.
69 ;; (I'm not convinced the faces should be customizable -- fontlock
70 ;; faces are normally expected to be consistent.)
73 ;; Added support for LaTeX-style literate scripts. Allow whitespace
74 ;; after backslash to end a line for string continuations.
77 ;; Use own syntax table. Use backquote (neater). Stop ''' being
78 ;; highlighted as quoted character. Fixed `\"' fontification bug
82 ;; Brought over from Haskell mode v1.1.
84 ;; Present Limitations/Future Work (contributions are most welcome!):
86 ;; . Debatable whether `()' `[]' `(->)' `(,)' `(,,)' etc. should be
87 ;; highlighted as constructors or not. Should the `->' in
88 ;; `id :: a -> a' be considered a constructor or a keyword? If so,
89 ;; how do we distinguish this from `\x -> x'? What about the `\'?
91 ;; . XEmacs can support both `--' comments and `{- -}' comments
92 ;; simultaneously. If XEmacs is detected, this should be used.
94 ;; . Support for GreenCard?
97 ;; All functions/variables start with
98 ;; `(turn-(on/off)-)haskell-font-lock' or `haskell-fl-'.
103 (require 'haskell-mode)
108 (defconst haskell-font-lock-version "1.17"
109 "Version number of haskell-font-lock.")
110 (defun haskell-font-lock-version ()
111 "Echo the current version of haskell-font-lock in the minibuffer."
113 (message "Using haskell-font-lock version %s" haskell-font-lock-version))
115 (defcustom haskell-font-lock-symbols nil
116 "Display \\ and -> and such using symbols in fonts.
117 This may sound like a neat trick, but be extra careful: it changes the
118 alignment and can thus lead to nasty surprises w.r.t layout.
119 If t, try to use whichever font is available. Otherwise you can
120 set it to a particular font of your preference among `japanese-jisx0208'
123 :type '(choice (const nil)
126 (const japanese-jisx0208)))
128 (defconst haskell-font-lock-symbols-alist
130 ;; Prefer single-width Unicode font for lambda.
131 (and (fboundp 'decode-char)
132 (memq haskell-font-lock-symbols '(t unicode))
133 (list (cons "\\" (decode-char 'ucs 955))))
134 ;; The symbols can come from a JIS0208 font.
135 (and (fboundp 'make-char) (charsetp 'japanese-jisx0208)
136 (memq haskell-font-lock-symbols '(t japanese-jisx0208))
137 (list (cons "not" (make-char 'japanese-jisx0208 34 76))
138 (cons "\\" (make-char 'japanese-jisx0208 38 75))
139 (cons "->" (make-char 'japanese-jisx0208 34 42))
140 (cons "<-" (make-char 'japanese-jisx0208 34 43))
141 (cons "=>" (make-char 'japanese-jisx0208 34 77))))
142 ;; Or a unicode font.
143 (and (fboundp 'decode-char)
144 (memq haskell-font-lock-symbols '(t unicode))
145 (list (cons "not" (decode-char 'ucs 172))
146 (cons "->" (decode-char 'ucs 8594))
147 (cons "<-" (decode-char 'ucs 8592))
148 (cons "=>" (decode-char 'ucs 8658))
149 (cons "~>" (decode-char 'ucs 8669)) ;; Omega language
150 ;; (cons "~>" (decode-char 'ucs 8605)) ;; less desirable
151 (cons "-<" (decode-char 'ucs 8610)) ;; Paterson's arrow syntax
152 ;; (cons "-<" (decode-char 'ucs 10521)) ;; nicer but uncommon
153 (cons "::" (decode-char 'ucs 8759))
154 (cons "." (decode-char 'ucs 9675))))))
156 ;; Use new vars for the font-lock faces. The indirection allows people to
157 ;; use different faces than in other modes, as before.
158 (defvar haskell-keyword-face 'font-lock-keyword-face)
159 (defvar haskell-constructor-face 'font-lock-type-face)
160 ;; This used to be `font-lock-variable-name-face' but it doesn't result in
161 ;; a highlighting that's consistent with other modes (it's mostly used
162 ;; for function defintions).
163 (defvar haskell-definition-face 'font-lock-function-name-face)
164 ;; This is probably just wrong, but it used to use
165 ;; `font-lock-function-name-face' with a result that was not consistent with
166 ;; other major modes, so I just exchanged with `haskell-definition-face'.
167 (defvar haskell-operator-face 'font-lock-variable-name-face)
168 (defvar haskell-default-face nil)
169 (defvar haskell-literate-comment-face 'font-lock-doc-face
170 "Face with which to fontify literate comments.
171 Set to `default' to avoid fontification of them.")
173 (defconst haskell-emacs21-features (string-match "[[:alpha:]]" "x")
174 "Non-nil if we have regexp char classes.
175 Assume this means we have other useful features from Emacs 21.")
177 (defun haskell-font-lock-compose-symbol (alist)
178 "Compose a sequence of ascii chars into a symbol.
179 Regexp match data 0 points to the chars."
180 ;; Check that the chars should really be composed into a symbol.
181 (let* ((start (match-beginning 0))
184 ((eq (char-syntax (char-after start)) ?w) '(?w))
185 ;; Special case for the . used for qualified names.
186 ((and (eq (char-after start) ?\.) (= end (1+ start)))
189 (if (or (memq (char-syntax (or (char-before start) ?\ )) syntaxes)
190 (memq (char-syntax (or (char-after end) ?\ )) syntaxes)
191 (memq (get-text-property start 'face)
192 '(font-lock-doc-face font-lock-string-face
193 font-lock-comment-face)))
194 ;; No composition for you. Let's actually remove any composition
195 ;; we may have added earlier and which is now incorrect.
196 (remove-text-properties start end '(composition))
197 ;; That's a symbol alright, so add the composition.
198 (compose-region start end (cdr (assoc (match-string 0) alist)))))
199 ;; Return nil because we're not adding any face property.
202 (defun haskell-font-lock-symbols-keywords ()
203 (when (fboundp 'compose-region)
205 (dolist (x haskell-font-lock-symbols-alist)
206 (when (and (if (fboundp 'char-displayable-p)
207 (char-displayable-p (cdr x))
209 (not (assoc (car x) alist))) ;Not yet in alist.
212 `((,(regexp-opt (mapcar 'car alist) t)
213 (0 (haskell-font-lock-compose-symbol ',alist))))))))
215 ;; The font lock regular expressions.
216 (defun haskell-font-lock-keywords-create (literate)
217 "Create fontification definitions for Haskell scripts.
218 Returns keywords suitable for `font-lock-keywords'."
219 (let* (;; Bird-style literate scripts start a line of code with
220 ;; "^>", otherwise a line of code starts with "^".
221 (line-prefix (if (eq literate 'bird) "^> ?" "^"))
223 ;; Most names are borrowed from the lexical syntax of the Haskell
225 ;; Some of these definitions have been superseded by using the
226 ;; syntax table instead.
228 ;; (ASCsymbol "-!#$%&*+./<=>?@\\\\^|~")
229 ;; Put the minus first to make it work in ranges.
230 ;; (ISOsymbol "\241-\277\327\367")
231 (ISOlarge "\300-\326\330-\337")
232 (ISOsmall "\340-\366\370-\377")
234 (if haskell-emacs21-features "[:lower:]" (concat "a-z" ISOsmall)))
236 (if haskell-emacs21-features "[:upper:]" (concat "A-Z" ISOlarge)))
238 (if haskell-emacs21-features "[:alnum:]" (concat small large "0-9")))
240 ;; (concat ASCsymbol ISOsymbol))
242 ;; We allow _ as the first char to fit GHC
243 (varid (concat "\\b[" small "_][" alnum "'_]*\\b"))
244 (conid (concat "\\b[" large "][" alnum "'_]*\\b"))
245 (modid (concat "\\b" conid "\\(\\." conid "\\)*\\b"))
246 (qvarid (concat modid "\\." varid))
247 (qconid (concat modid "\\." conid))
249 ;; We used to use the below for non-Emacs21, but I think the
250 ;; regexp based on syntax works for other emacsen as well. -- Stef
251 ;; (concat "[" symbol ":]+")
252 ;; Add backslash to the symbol-syntax chars. This seems to
253 ;; be thrown for some reason by backslash's escape syntax.
254 "\\(\\s_\\|\\\\\\)+")
256 ;; Reserved operations
259 ;; (regexp-opt '(".." "::" "=" "\\" "|" "<-" "->"
261 "\\(->\\|\\.\\.\\|::\\|<-\\|=>\\|[=@\\|~]\\)"
263 ;; Reserved identifiers
266 ;; ?? `as' and `qualified' aren't in the Haskell98 list.
267 ;; `_' can go in here since it has temporary word syntax.
269 ;; '("as" "case" "class" "data" "default" "deriving" "do"
270 ;; "else" "hiding" "if" "import" "in" "infix" "infixl"
271 ;; "infixr" "instance" "let" "module" "newtype" "of"
272 ;; "qualified" "then" "type" "where" "_") t)
273 "\\(_\\|as\\|c\\(ase\\|lass\\)\\|d\\(ata\\|e\\(fault\\|riving\\)\\|o\\)\\|else\\|hiding\\|i\\(mport\\|n\\(fix[lr]?\\|stance\\)\\|[fn]\\)\\|let\\|module\\|newtype\\|of\\|qualified\\|t\\(hen\\|ype\\)\\|where\\)"
276 ;; This unreadable regexp matches strings and character
277 ;; constants. We need to do this with one regexp to handle
278 ;; stuff like '"':"'". The regexp is the composition of
279 ;; "([^"\\]|\\.)*" for strings and '([^\\]|\\.[^']*)' for
280 ;; characters, allowing for string continuations.
281 ;; Could probably be improved...
283 (concat "\\(\\(\"\\|" line-prefix "[ \t]*\\\\\\)\\([^\"\\\\\n]\\|\\\\.\\)*\\(\"\\|\\\\[ \t]*$\\)\\|'\\([^'\\\\\n]\\|\\\\.[^'\n]*\\)'\\)"))
285 ;; Top-level declarations
287 (concat line-prefix "\\(" varid "\\)\\s-*\\("
288 varid "\\|" conid "\\|::\\|=\\||\\|\\s(\\)"))
290 (concat line-prefix "\\(" varid "\\|" conid "\\)\\s-*`\\(" varid "\\)`"))
292 (concat line-prefix "\\(" varid "\\|" conid "\\)\\s-*\\(" sym "\\)"))
293 (topdecl-sym2 (concat line-prefix "(\\(" sym "\\))"))
298 `(;; NOTICE the ordering below is significant
300 ("^#.*$" 0 'font-lock-warning-face t)
301 ,@(unless haskell-emacs21-features
303 `((,string-and-char 1 font-lock-string-face)))
305 ;; This was originally at the very end (and needs to be after
306 ;; all the comment/string/doc highlighting) but it seemed to
307 ;; trigger a bug in Emacs-21.3 which caused the compositions to
308 ;; be "randomly" dropped. Moving it earlier seemed to reduce
309 ;; the occurrence of the bug.
310 ,@(haskell-font-lock-symbols-keywords)
312 (,reservedid 1 (symbol-value 'haskell-keyword-face))
313 (,reservedsym 1 (symbol-value 'haskell-operator-face))
315 ;; Toplevel Declarations.
316 ;; Place them *before* generic id-and-op highlighting.
317 (,topdecl-var (1 (symbol-value 'haskell-definition-face)))
318 (,topdecl-var2 (2 (symbol-value 'haskell-definition-face)))
319 (,topdecl-sym (2 (symbol-value 'haskell-definition-face)))
320 (,topdecl-sym2 (1 (symbol-value 'haskell-definition-face)))
322 ;; These four are debatable...
323 ("(\\(,*\\|->\\))" 0 (symbol-value 'haskell-constructor-face))
324 ("\\[\\]" 0 (symbol-value 'haskell-constructor-face))
326 (,qvarid 0 (symbol-value 'haskell-default-face))
327 (,qconid 0 (symbol-value 'haskell-constructor-face))
328 (,(concat "\`" varid "\`") 0 (symbol-value 'haskell-operator-face))
330 (,conid 0 (symbol-value 'haskell-constructor-face))
333 (,sym 0 (if (eq (char-after (match-beginning 0)) ?:)
334 haskell-constructor-face
335 haskell-operator-face))))
336 (unless haskell-emacs21-features
340 `(("^[^>\n].*$" 0 haskell-comment-face t)
342 ("^>" 0 haskell-default-face t))))
345 `((haskell-fl-latex-comments 0 'font-lock-comment-face t)
349 ;; The next three aren't used in Emacs 21.
351 (defvar haskell-fl-latex-cache-pos nil
352 "Position of cache point used by `haskell-fl-latex-cache-in-comment'.
353 Should be at the start of a line.")
355 (defvar haskell-fl-latex-cache-in-comment nil
356 "If `haskell-fl-latex-cache-pos' is outside a
357 \\begin{code}..\\end{code} block (and therefore inside a comment),
358 this variable is set to t, otherwise nil.")
360 (defun haskell-fl-latex-comments (end)
361 "Sets `match-data' according to the region of the buffer before end
362 that should be commented under LaTeX-style literate scripts."
363 (let ((start (point)))
365 ;; We're at the end. No more to fontify.
367 (if (not (eq start haskell-fl-latex-cache-pos))
368 ;; If the start position is not cached, calculate the state
371 (setq haskell-fl-latex-cache-pos start)
372 ;; If the previous \begin{code} or \end{code} is a
373 ;; \begin{code}, then start is not in a comment, otherwise
374 ;; it is in a comment.
375 (setq haskell-fl-latex-cache-in-comment
378 "^\\(\\(\\\\begin{code}\\)\\|\\(\\\\end{code}\\)\\)$"
384 (if haskell-fl-latex-cache-in-comment
386 ;; If start is inside a comment, search for next \begin{code}.
387 (re-search-forward "^\\\\begin{code}$" end 'move)
388 ;; Mark start to end of \begin{code} (if present, till end
389 ;; otherwise), as a comment.
390 (set-match-data (list start (point)))
391 ;; Return point, as a normal regexp would.
393 ;; If start is inside a code block, search for next \end{code}.
394 (if (re-search-forward "^\\\\end{code}$" end t)
395 ;; If one found, mark it as a comment, otherwise finish.
398 (defconst haskell-basic-syntactic-keywords
399 '(;; Character constants (since apostrophe can't have string syntax).
400 ;; Beware: do not match something like 's-}' or '\n"+' since the first '
401 ;; might be inside a comment or a string.
402 ;; This still gets fooled with "'"'"'"'"'"', but ... oh well.
403 ("\\Sw\\('\\)\\([^\\'\n]\\|\\\\.[^\\'\n \"}]*\\)\\('\\)" (1 "|") (3 "|"))
404 ;; The \ is not escaping in \(x,y) -> x + y.
405 ("\\(\\\\\\)(" (1 "."))
406 ;; The second \ in a gap does not quote the subsequent char.
407 ;; It's probably not worth the trouble, tho.
408 ;; ("^[ \t]*\\(\\\\\\)" (1 "."))
409 ;; Deal with instances of `--' which don't form a comment.
410 ("\\s_\\{3,\\}" (0 (if (string-match "\\`-*\\'" (match-string 0))
411 ;; Sequence of hyphens. Do nothing in
412 ;; case of things like `{---'.
414 "_"))))) ; other symbol sequence
416 (defconst haskell-bird-syntactic-keywords
417 (cons '("^[^\n>]" (0 "<"))
418 haskell-basic-syntactic-keywords))
420 (defconst haskell-latex-syntactic-keywords
422 '(("^\\\\begin{code}\\(\n\\)" 1 "!")
423 ;; Note: buffer is widened during font-locking.
424 ("\\`\\(.\\|\n\\)" (1 "!")) ; start comment at buffer start
425 ("^\\(\\\\\\)end{code}$" 1 "!"))
426 haskell-basic-syntactic-keywords))
428 (defun haskell-syntactic-face-function (state)
429 "`font-lock-syntactic-face-function' for Haskell."
431 ((nth 3 state) font-lock-string-face) ; as normal
432 ;; Else comment. If it's from syntax table, use default face.
433 ((or (eq 'syntax-table (nth 7 state))
434 (and (eq haskell-literate 'bird)
435 (memq (char-before (nth 8 state)) '(nil ?\n))))
436 haskell-literate-comment-face)
437 (t font-lock-comment-face)))
439 (defconst haskell-font-lock-keywords
440 (haskell-font-lock-keywords-create nil)
441 "Font lock definitions for non-literate Haskell.")
443 (defconst haskell-font-lock-bird-literate-keywords
444 (haskell-font-lock-keywords-create 'bird)
445 "Font lock definitions for Bird-style literate Haskell.")
447 (defconst haskell-font-lock-latex-literate-keywords
448 (haskell-font-lock-keywords-create 'latex)
449 "Font lock definitions for LaTeX-style literate Haskell.")
451 (defun haskell-font-lock-choose-keywords ()
452 (let ((literate (if (boundp 'haskell-literate) haskell-literate)))
454 (bird haskell-font-lock-bird-literate-keywords)
455 (latex haskell-font-lock-latex-literate-keywords)
456 (t haskell-font-lock-keywords))))
458 (defun haskell-font-lock-choose-syntactic-keywords ()
459 (let ((literate (if (boundp 'haskell-literate) haskell-literate)))
461 (bird haskell-bird-syntactic-keywords)
462 (latex haskell-latex-syntactic-keywords)
463 (t haskell-basic-syntactic-keywords))))
465 (defun haskell-font-lock-defaults-create ()
466 "Locally set `font-lock-defaults' for Haskell."
467 (set (make-local-variable 'font-lock-defaults)
468 '(haskell-font-lock-choose-keywords
469 nil nil ((?\' . "w") (?_ . "w")) nil
470 (font-lock-syntactic-keywords
471 . haskell-font-lock-choose-syntactic-keywords)
472 (font-lock-syntactic-face-function
473 . haskell-syntactic-face-function)
474 ;; Get help from font-lock-syntactic-keywords.
475 (parse-sexp-lookup-properties . t))))
477 ;; The main functions.
478 (defun turn-on-haskell-font-lock ()
479 "Turns on font locking in current buffer for Haskell 1.4 scripts.
481 Changes the current buffer's `font-lock-defaults', and adds the
484 `haskell-keyword-face' for reserved keywords and syntax,
485 `haskell-constructor-face' for data- and type-constructors, class names,
487 `haskell-operator-face' for symbolic and alphanumeric operators,
488 `haskell-default-face' for ordinary code.
490 The variables are initialised to the following font lock default faces:
492 `haskell-keyword-face' `font-lock-keyword-face'
493 `haskell-constructor-face' `font-lock-type-face'
494 `haskell-operator-face' `font-lock-function-name-face'
495 `haskell-default-face' <default face>
497 Two levels of fontification are defined: level one (the default)
498 and level two (more colour). The former does not colour operators.
499 Use the variable `font-lock-maximum-decoration' to choose
500 non-default levels of fontification. For example, adding this to
503 (setq font-lock-maximum-decoration '((haskell-mode . 2) (t . 0)))
505 uses level two fontification for `haskell-mode' and default level for
506 all other modes. See documentation on this variable for further
509 To alter an attribute of a face, add a hook. For example, to change
510 the foreground colour of comments to brown, add the following line to
513 (add-hook 'haskell-font-lock-hook
515 (set-face-foreground 'haskell-comment-face \"brown\")))
517 Note that the colours available vary from system to system. To see
518 what colours are available on your system, call
519 `list-colors-display' from emacs.
521 To turn font locking on for all Haskell buffers, add this to .emacs:
523 (add-hook 'haskell-mode-hook 'turn-on-haskell-font-lock)
525 To turn font locking on for the current buffer, call
526 `turn-on-haskell-font-lock'. To turn font locking off in the current
527 buffer, call `turn-off-haskell-font-lock'.
529 Bird-style literate Haskell scripts are supported: If the value of
530 `haskell-literate-bird-style' (automatically set by the Haskell mode
531 of Moss&Thorn) is non-nil, a Bird-style literate script is assumed.
533 Invokes `haskell-font-lock-hook' if not nil.
535 Use `haskell-font-lock-version' to find out what version this is."
538 (haskell-font-lock-defaults-create)
539 (run-hooks 'haskell-font-lock-hook)
542 (defun turn-off-haskell-font-lock ()
543 "Turns off font locking in current buffer."
547 ;; Provide ourselves:
549 (provide 'haskell-font-lock)
551 ;;; haskell-font-lock.el ends here