1 ;;; spam.el --- Identifying spam
2 ;; Copyright (C) 2002 Free Software Foundation, Inc.
4 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
7 ;; This file is part of GNU Emacs.
9 ;; GNU Emacs is free software; you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published by
11 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; GNU Emacs is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;; GNU General Public License for more details.
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs; see the file COPYING. If not, write to the
21 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22 ;; Boston, MA 02111-1307, USA.
26 ;;; This module addresses a few aspects of spam control under Gnus. Page
27 ;;; breaks are used for grouping declarations and documentation relating to
28 ;;; each particular aspect.
30 ;;; The integration with Gnus is not yet complete. See various `FIXME'
31 ;;; comments, below, for supplementary explanations or discussions.
33 ;;; Several TODO items are marked as such
39 (require 'gnus-uu) ; because of key prefix issues
40 (require 'gnus) ; for the definitions of group content classification and spam processors
41 (require 'message) ;for the message-fetch-field functions
43 ;; autoload executable-find
45 ;; executable-find is not autoloaded in Emacs 20
46 (autoload 'executable-find "executable"))
50 (autoload 'query-dig "dig"))
54 (autoload 'query-dns "dns"))
59 "Spam configuration.")
61 (defcustom spam-directory "~/News/spam/"
62 "Directory for spam whitelists and blacklists."
66 (defcustom spam-move-spam-nonspam-groups-only t
67 "Whether spam should be moved in non-spam groups only.
68 When nil, only ham and unclassified groups will have their spam moved
69 to the spam-process-destination. When t, spam will also be moved from
74 (defcustom spam-whitelist (expand-file-name "whitelist" spam-directory)
75 "The location of the whitelist.
76 The file format is one regular expression per line.
77 The regular expression is matched against the address."
81 (defcustom spam-blacklist (expand-file-name "blacklist" spam-directory)
82 "The location of the blacklist.
83 The file format is one regular expression per line.
84 The regular expression is matched against the address."
88 (defcustom spam-use-dig t
89 "Whether query-dig should be used instead of query-dns."
93 (defcustom spam-use-blacklist nil
94 "Whether the blacklist should be used by spam-split."
98 (defcustom spam-use-whitelist nil
99 "Whether the whitelist should be used by spam-split."
103 (defcustom spam-use-blackholes nil
104 "Whether blackholes should be used by spam-split."
108 (defcustom spam-use-bogofilter nil
109 "Whether bogofilter should be used by spam-split."
113 (defcustom spam-use-BBDB nil
114 "Whether BBDB should be used by spam-split."
118 (defcustom spam-use-ifile nil
119 "Whether ifile should be used by spam-split."
123 (defcustom spam-use-stat nil
124 "Whether spam-stat should be used by spam-split."
128 (defcustom spam-split-group "spam"
129 "Group name where incoming spam should be put by spam-split."
133 (defcustom spam-junk-mailgroups (cons spam-split-group '("mail.junk" "poste.pourriel"))
134 "Mailgroups with spam contents.
135 All unmarked article in such group receive the spam mark on group entry."
136 :type '(repeat (string :tag "Group"))
139 (defcustom spam-blackhole-servers '("bl.spamcop.net" "relays.ordb.org"
140 "dev.null.dk" "relays.visi.com")
141 "List of blackhole servers."
142 :type '(repeat (string :tag "Server"))
145 (defcustom spam-ham-marks (list 'gnus-del-mark 'gnus-read-mark
146 'gnus-killed-mark 'gnus-kill-file-mark
147 'gnus-low-score-mark)
148 "Marks considered as being ham (positively not spam).
149 Such articles will be processed as ham (non-spam) on group exit."
151 (variable-item gnus-del-mark)
152 (variable-item gnus-read-mark)
153 (variable-item gnus-killed-mark)
154 (variable-item gnus-kill-file-mark)
155 (variable-item gnus-low-score-mark))
158 (defcustom spam-spam-marks (list 'gnus-spam-mark)
159 "Marks considered as being spam (positively spam).
160 Such articles will be transmitted to `bogofilter -s' on group exit."
162 (variable-item gnus-spam-mark)
163 (variable-item gnus-killed-mark)
164 (variable-item gnus-kill-file-mark)
165 (variable-item gnus-low-score-mark))
168 (defcustom spam-face 'gnus-splash-face
169 "Face for spam-marked articles"
173 (defgroup spam-ifile nil
174 "Spam ifile configuration."
177 (defcustom spam-ifile-path (executable-find "ifile")
178 "File path of the ifile executable program."
179 :type '(choice (file :tag "Location of ifile")
180 (const :tag "ifile is not installed"))
183 (defcustom spam-ifile-database-path nil
184 "File path of the ifile database."
185 :type '(choice (file :tag "Location of the ifile database")
186 (const :tag "Use the default"))
189 (defcustom spam-ifile-spam-category "spam"
190 "Name of the spam ifile category."
194 (defcustom spam-ifile-all-categories nil
195 "Whether the ifile check will return all categories, or just spam.
196 Set this to t if you want to use the spam-split invocation of ifile as
197 your main source of newsgroup names."
201 (defgroup spam-bogofilter nil
202 "Spam bogofilter configuration."
205 (defcustom spam-bogofilter-output-buffer-name "*Bogofilter Output*"
206 "Name of buffer when displaying `bogofilter -v' output."
208 :group 'spam-bogofilter)
210 (defcustom spam-bogofilter-initial-timeout 40
211 "Timeout in seconds for the initial reply from the `bogofilter' program."
213 :group 'spam-bogofilter)
215 (defcustom spam-bogofilter-subsequent-timeout 15
216 "Timeout in seconds for any subsequent reply from the `bogofilter' program."
218 :group 'spam-bogofilter)
220 (defcustom spam-bogofilter-path (executable-find "bogofilter")
221 "File path of the Bogofilter executable program."
222 :type '(choice (file :tag "Location of bogofilter")
223 (const :tag "Bogofilter is not installed"))
224 :group 'spam-bogofilter)
226 ;; FIXME! In the following regexp, we should explain which tool produces
227 ;; which kind of header. I do not even remember them all by now. X-Junk
228 ;; (and previously X-NoSpam) are produced by the `NoSpam' tool, which has
229 ;; never been published, so it might not be reasonable leaving it in the
231 (defcustom spam-bogofilter-spaminfo-header-regexp
232 "^X-\\(jf\\|Junk\\|NoSpam\\|Spam\\|SB\\)[^:]*:"
233 "Regexp for spam markups in headers.
234 Markup from spam recognisers, as well as `Xref', are to be removed from
235 articles before they get registered by Bogofilter."
237 :group 'spam-bogofilter)
239 ;;; Key bindings for spam control.
241 (gnus-define-keys gnus-summary-mode-map
242 "St" spam-bogofilter-score
243 "Sx" gnus-summary-mark-as-spam
244 "Mst" spam-bogofilter-score
245 "Msx" gnus-summary-mark-as-spam
246 "\M-d" gnus-summary-mark-as-spam)
248 ;;; How to highlight a spam summary line.
250 ;; TODO: How do we redo this every time spam-face is customized?
252 (push '((eq mark gnus-spam-mark) . spam-face)
253 gnus-summary-highlight)
255 ;; convenience functions
256 (defun spam-group-spam-contents-p (group)
258 (or (member group spam-junk-mailgroups)
259 (memq 'gnus-group-spam-classification-spam
260 (gnus-parameter-spam-contents group)))
263 (defun spam-group-ham-contents-p (group)
265 (memq 'gnus-group-spam-classification-ham
266 (gnus-parameter-spam-contents group))
269 (defun spam-group-processor-p (group processor)
270 (if (and (stringp group)
272 (member processor (car (gnus-parameter-spam-process group)))
275 (defun spam-group-spam-processor-bogofilter-p (group)
276 (spam-group-processor-p group 'gnus-group-spam-exit-processor-bogofilter))
278 (defun spam-group-spam-processor-blacklist-p (group)
279 (spam-group-processor-p group 'gnus-group-spam-exit-processor-blacklist))
281 (defun spam-group-spam-processor-ifile-p (group)
282 (spam-group-processor-p group 'gnus-group-spam-exit-processor-ifile))
284 (defun spam-group-ham-processor-ifile-p (group)
285 (spam-group-processor-p group 'gnus-group-ham-exit-processor-ifile))
287 (defun spam-group-spam-processor-stat-p (group)
288 (spam-group-processor-p group 'gnus-group-spam-exit-processor-stat))
290 (defun spam-group-ham-processor-stat-p (group)
291 (spam-group-processor-p group 'gnus-group-ham-exit-processor-stat))
293 (defun spam-group-ham-processor-whitelist-p (group)
294 (spam-group-processor-p group 'gnus-group-ham-exit-processor-whitelist))
296 (defun spam-group-ham-processor-BBDB-p (group)
297 (spam-group-processor-p group 'gnus-group-ham-exit-processor-BBDB))
299 ;;; Summary entry and exit processing.
301 (defun spam-summary-prepare ()
302 (spam-mark-junk-as-spam-routine))
304 (add-hook 'gnus-summary-prepare-hook 'spam-summary-prepare)
306 (defun spam-summary-prepare-exit ()
307 ;; The spam processors are invoked for any group, spam or ham or neither
308 (when (and spam-bogofilter-path
309 (spam-group-spam-processor-bogofilter-p gnus-newsgroup-name))
310 (spam-bogofilter-register-routine))
312 (when (and spam-ifile-path
313 (spam-group-spam-processor-ifile-p gnus-newsgroup-name))
314 (spam-ifile-register-spam-routine))
316 (when (spam-group-spam-processor-stat-p gnus-newsgroup-name)
317 (spam-stat-register-spam-routine))
319 (when (spam-group-spam-processor-bogofilter-p gnus-newsgroup-name)
320 (spam-blacklist-register-routine))
322 (if spam-move-spam-nonspam-groups-only
323 (when (not (spam-group-spam-contents-p gnus-newsgroup-name))
324 (spam-mark-spam-as-expired-and-move-routine
325 (gnus-parameter-spam-process-destination gnus-newsgroup-name)))
326 (spam-mark-spam-as-expired-and-move-routine
327 (gnus-parameter-spam-process-destination gnus-newsgroup-name)))
329 ;; now we redo spam-mark-spam-as-expired-and-move-routine to only
330 ;; expire spam, in case the above did not expire them
331 (spam-mark-spam-as-expired-and-move-routine nil)
333 (when (spam-group-ham-contents-p gnus-newsgroup-name)
334 (when (spam-group-ham-processor-whitelist-p gnus-newsgroup-name)
335 (spam-whitelist-register-routine))
336 (when (spam-group-ham-processor-ifile-p gnus-newsgroup-name)
337 (spam-ifile-register-ham-routine))
338 (when (spam-group-ham-processor-stat-p gnus-newsgroup-name)
339 (spam-stat-register-ham-routine))
340 (when (spam-group-ham-processor-BBDB-p gnus-newsgroup-name)
341 (spam-BBDB-register-routine)))
343 ;; now move all ham articles out of spam groups
344 (when (spam-group-spam-contents-p gnus-newsgroup-name)
345 (spam-ham-move-routine
346 (gnus-parameter-ham-process-destination gnus-newsgroup-name))))
348 (add-hook 'gnus-summary-prepare-exit-hook 'spam-summary-prepare-exit)
350 (defun spam-mark-junk-as-spam-routine ()
351 ;; check the global list of group names spam-junk-mailgroups and the
353 (when (spam-group-spam-contents-p gnus-newsgroup-name)
354 (let ((articles gnus-newsgroup-articles)
357 (setq article (pop articles))
358 (when (eq (gnus-summary-article-mark article) gnus-unread-mark)
359 (gnus-summary-mark-article article gnus-spam-mark))))))
361 (defun spam-mark-spam-as-expired-and-move-routine (&optional group)
362 (let ((articles gnus-newsgroup-articles)
365 (setq article (pop articles))
366 (when (eq (gnus-summary-article-mark article) gnus-spam-mark)
367 (gnus-summary-mark-article article gnus-expirable-mark)
368 (when (stringp group)
369 (let ((gnus-current-article article))
370 (gnus-summary-move-article nil group)))))))
372 (defun spam-ham-move-routine (&optional group)
373 (let ((articles gnus-newsgroup-articles)
374 article ham-mark-values mark)
375 (dolist (mark spam-ham-marks)
376 (push (symbol-value mark) ham-mark-values))
379 (setq article (pop articles))
380 (when (and (memq mark ham-mark-values)
382 (let ((gnus-current-article article))
383 (gnus-summary-move-article nil group))))))
385 (defun spam-generic-register-routine (spam-func ham-func)
386 (let ((articles gnus-newsgroup-articles)
387 article mark ham-articles spam-articles spam-mark-values
390 ;; marks are stored as symbolic values, so we have to dereference
391 ;; them for memq to work. we wouldn't have to do this if
392 ;; gnus-summary-article-mark returned a symbol.
393 (dolist (mark spam-ham-marks)
394 (push (symbol-value mark) ham-mark-values))
396 (dolist (mark spam-spam-marks)
397 (push (symbol-value mark) spam-mark-values))
400 (setq article (pop articles)
401 mark (gnus-summary-article-mark article))
402 (cond ((memq mark spam-mark-values) (push article spam-articles))
403 ((memq article gnus-newsgroup-saved))
404 ((memq mark ham-mark-values) (push article ham-articles))))
405 (when (and ham-articles ham-func)
406 (mapc ham-func ham-articles)) ; we use mapc because unlike
407 ; mapcar it discards the
409 (when (and spam-articles spam-func)
410 (mapc spam-func spam-articles)))) ; we use mapc because unlike
411 ; mapcar it discards the
415 (defalias 'spam-point-at-eol (if (fboundp 'point-at-eol)
417 'line-end-position)))
419 (defun spam-get-article-as-string (article)
420 (let ((article-string))
421 (when (numberp article)
422 (save-window-excursion
423 (gnus-summary-goto-subject article)
424 (gnus-summary-show-article t)
425 (set-buffer gnus-article-buffer)
426 (setq article-string (buffer-string))))
429 (defun spam-fetch-field-from-fast (article)
430 "Fetch the `from' field quickly, using the internal gnus-data-list function"
431 (if (and (numberp article)
432 (assoc article (gnus-data-list nil)))
433 (mail-header-from (gnus-data-header (assoc article (gnus-data-list nil))))
436 (defun spam-fetch-field-subject-fast (article)
437 "Fetch the `subject' field quickly, using the internal gnus-data-list function"
438 (if (and (numberp article)
439 (assoc article (gnus-data-list nil)))
440 (mail-header-subject (gnus-data-header (assoc article (gnus-data-list nil))))
444 ;;;; Spam determination.
446 (defvar spam-list-of-checks
447 '((spam-use-blacklist . spam-check-blacklist)
448 (spam-use-whitelist . spam-check-whitelist)
449 (spam-use-BBDB . spam-check-BBDB)
450 (spam-use-ifile . spam-check-ifile)
451 (spam-use-stat . spam-check-stat)
452 (spam-use-blackholes . spam-check-blackholes)
453 (spam-use-bogofilter . spam-check-bogofilter))
454 "The spam-list-of-checks list contains pairs associating a parameter
455 variable with a spam checking function. If the parameter variable is
456 true, then the checking function is called, and its value decides what
457 happens. Each individual check may return `nil', `t', or a mailgroup
458 name. The value `nil' means that the check does not yield a decision,
459 and so, that further checks are needed. The value `t' means that the
460 message is definitely not spam, and that further spam checks should be
461 inhibited. Otherwise, a mailgroup name is returned where the mail
462 should go, and further checks are also inhibited. The usual mailgroup
463 name is the value of `spam-split-group', meaning that the message is
467 "Split this message into the `spam' group if it is spam.
468 This function can be used as an entry in `nnmail-split-fancy', for
469 example like this: (: spam-split)
471 See the Info node `(gnus)Fancy Mail Splitting' for more details."
474 ;; load the spam-stat tables if needed
475 (when spam-use-stat (spam-stat-load))
477 (let ((list-of-checks spam-list-of-checks)
479 (while (and list-of-checks (not decision))
480 (let ((pair (pop list-of-checks)))
481 (when (symbol-value (car pair))
482 (setq decision (funcall (cdr pair))))))
489 (defun spam-check-blackholes ()
490 "Check the Received headers for blackholed relays."
491 (let ((headers (message-fetch-field "received"))
496 (goto-char (point-min))
497 (while (re-search-forward
498 "\\[\\([0-9]+.[0-9]+.[0-9]+.[0-9]+\\)\\]" nil t)
499 (message "Blackhole search found host IP %s." (match-string 1))
500 (push (mapconcat 'identity
501 (nreverse (split-string (match-string 1) "\\."))
504 (dolist (server spam-blackhole-servers)
506 (let ((query-string (concat ip "." server)))
508 (let ((query-result (query-dig query-string)))
510 (message "spam: positive blackhole check '%s'" query-result)
511 (push (list ip server query-result)
513 ;; else, if not using dig.el
514 (when (query-dns query-string)
515 (push (list ip server (query-dns query-string 'TXT))
522 ;;; original idea for spam-check-BBDB from Alexander Kotelnikov
523 ;;; <sacha@giotto.sj.ru>
525 ;; all this is done inside a condition-case to trap errors
532 (defun spam-enter-ham-BBDB (from)
533 "Enter an address into the BBDB; implies ham (non-spam) sender"
535 (let* ((parsed-address (gnus-extract-address-components from))
536 (name (or (car parsed-address) "Ham Sender"))
537 (net-address (car (cdr parsed-address))))
538 (message "Adding address %s to BBDB" from)
539 (when (and net-address
540 (not (bbdb-search-simple nil net-address)))
541 (bbdb-create-internal name nil net-address nil nil
542 "ham sender added by spam.el")))))
544 (defun spam-BBDB-register-routine ()
545 (spam-generic-register-routine
550 (spam-enter-ham-BBDB (spam-fetch-field-from-fast article)))))
552 (defun spam-check-BBDB ()
553 "Mail from people in the BBDB is never considered spam"
554 (let ((who (message-fetch-field "from")))
556 (setq who (regexp-quote (cadr
557 (gnus-extract-address-components who))))
558 (if (bbdb-search-simple nil who)
559 nil spam-split-group)))))
562 (defalias 'bbdb-search-simple 'ignore)
563 (defalias 'spam-check-BBDB 'ignore)
564 (defalias 'spam-BBDB-register-routine 'ignore)
565 (defalias 'spam-enter-ham-BBDB 'ignore)
566 (defalias 'bbdb-create-internal 'ignore)
567 (defalias 'bbdb-records 'ignore))))
572 ;;; check the ifile backend; return nil if the mail was NOT classified
575 (defun spam-get-ifile-database-parameter ()
576 "Get the command-line parameter for ifile's database from spam-ifile-database-path."
577 (if spam-ifile-database-path
578 (format "--db-file=%s" spam-ifile-database-path)
581 (defun spam-check-ifile ()
582 "Check the ifile backend for the classification of this message"
583 (let ((article-buffer-name (buffer-name))
586 (let ((temp-buffer-name (buffer-name))
587 (db-param (spam-get-ifile-database-parameter)))
589 (set-buffer article-buffer-name)
591 (call-process-region (point-min) (point-max) spam-ifile-path
592 nil temp-buffer-name nil "-q" "-c" db-param)
593 (call-process-region (point-min) (point-max) spam-ifile-path
594 nil temp-buffer-name nil "-q" "-c")))
595 (goto-char (point-min))
597 (setq category (buffer-substring (point) (spam-point-at-eol))))
598 (when (not (zerop (length category))) ; we need a category here
599 (if spam-ifile-all-categories
600 (setq return category)
601 ;; else, if spam-ifile-all-categories is not set...
602 (when (string-equal spam-ifile-spam-category category)
603 (setq return spam-split-group))))))
606 (defun spam-ifile-register-with-ifile (article-string category)
607 "Register an article, given as a string, with a category.
608 Uses `gnus-newsgroup-name' if category is nil (for ham registration)."
609 (when (stringp article-string)
610 (let ((category (or category gnus-newsgroup-name))
611 (db-param (spam-get-ifile-database-parameter)))
613 (insert-string article-string)
615 (call-process-region (point-min) (point-max) spam-ifile-path
617 "-h" "-i" category db-param)
618 (call-process-region (point-min) (point-max) spam-ifile-path
620 "-h" "-i" category))))))
622 (defun spam-ifile-register-spam-routine ()
623 (spam-generic-register-routine
625 (spam-ifile-register-with-ifile
626 (spam-get-article-as-string article) spam-ifile-spam-category))
629 (defun spam-ifile-register-ham-routine ()
630 (spam-generic-register-routine
633 (spam-ifile-register-with-ifile
634 (spam-get-article-as-string article) nil))))
641 (let ((spam-stat-install-hooks nil))
642 (require 'spam-stat))
644 (defun spam-check-stat ()
645 "Check the spam-stat backend for the classification of this message"
646 (let ((spam-stat-split-fancy-spam-group spam-split-group) ; override
647 (spam-stat-buffer (buffer-name)) ; stat the current buffer
649 (spam-stat-split-fancy)))
651 (defun spam-stat-register-spam-routine ()
652 (spam-generic-register-routine
654 (let ((article-string (spam-get-article-as-string article)))
656 (insert-string article-string)
657 (spam-stat-buffer-is-spam))))
661 (defun spam-stat-register-ham-routine ()
662 (spam-generic-register-routine
665 (let ((article-string (spam-get-article-as-string article)))
667 (insert-string article-string)
668 (spam-stat-buffer-is-non-spam)))))
672 (defalias 'spam-stat-register-ham-routine 'ignore)
673 (defalias 'spam-stat-register-spam-routine 'ignore)
674 (defalias 'spam-stat-buffer-is-spam 'ignore)
675 (defalias 'spam-stat-buffer-is-non-spam 'ignore)
676 (defalias 'spam-stat-split-fancy 'ignore)
677 (defalias 'spam-stat-load 'ignore)
678 (defalias 'spam-stat-save 'ignore)
679 (defalias 'spam-check-stat 'ignore))))
683 ;;;; Blacklists and whitelists.
685 (defvar spam-whitelist-cache nil)
686 (defvar spam-blacklist-cache nil)
688 (defun spam-enter-whitelist (address)
689 "Enter ADDRESS into the whitelist."
690 (interactive "sAddress: ")
691 (spam-enter-list address spam-whitelist)
692 (setq spam-whitelist-cache nil))
694 (defun spam-enter-blacklist (address)
695 "Enter ADDRESS into the blacklist."
696 (interactive "sAddress: ")
697 (spam-enter-list address spam-blacklist)
698 (setq spam-blacklist-cache nil))
700 (defun spam-enter-list (address file)
701 "Enter ADDRESS into the given FILE, either the whitelist or the blacklist."
702 (unless (file-exists-p (file-name-directory file))
703 (make-directory (file-name-directory file) t))
706 (find-file-noselect file))
707 (goto-char (point-max))
710 (insert address "\n")
713 ;;; returns nil if the sender is in the whitelist, spam-split-group otherwise
714 (defun spam-check-whitelist ()
715 ;; FIXME! Should it detect when file timestamps change?
716 (unless spam-whitelist-cache
717 (setq spam-whitelist-cache (spam-parse-list spam-whitelist)))
718 (if (spam-from-listed-p spam-whitelist-cache) nil spam-split-group))
720 (defun spam-check-blacklist ()
721 ;; FIXME! Should it detect when file timestamps change?
722 (unless spam-blacklist-cache
723 (setq spam-blacklist-cache (spam-parse-list spam-blacklist)))
724 (and (spam-from-listed-p spam-blacklist-cache) spam-split-group))
726 (defun spam-parse-list (file)
727 (when (file-readable-p file)
728 (let (contents address)
730 (insert-file-contents file)
732 (setq address (buffer-substring (point) (spam-point-at-eol)))
734 (unless (zerop (length address))
735 (setq address (regexp-quote address))
736 (while (string-match "\\\\\\*" address)
737 (setq address (replace-match ".*" t t address)))
738 (push address contents))))
739 (nreverse contents))))
741 (defun spam-from-listed-p (cache)
742 (let ((from (message-fetch-field "from"))
745 (when (string-match (pop cache) from)
750 (defun spam-blacklist-register-routine ()
751 (spam-generic-register-routine
754 (let ((from (spam-fetch-field-from-fast article)))
756 (spam-enter-blacklist from))))
760 (defun spam-whitelist-register-routine ()
761 (spam-generic-register-routine
766 (let ((from (spam-fetch-field-from-fast article)))
768 (spam-enter-whitelist from))))))
773 ;;; See Paul Graham article, at `http://www.paulgraham.com/spam.html'.
775 ;;; This page is for those wanting to control spam with the help of
776 ;;; Eric Raymond's speedy Bogofilter, see
777 ;;; http://www.tuxedo.org/~esr/bogofilter. This has been tested with
778 ;;; a locally patched copy of version 0.4.
780 ;;; Make sure Bogofilter is installed. Bogofilter internally uses
781 ;;; Judy fast associative arrays, so you need to install Judy first,
782 ;;; and Bogofilter next. Fetch both distributions by visiting the
783 ;;; following links and downloading the latest version of each:
785 ;;; http://sourceforge.net/projects/judy/
786 ;;; http://www.tuxedo.org/~esr/bogofilter/
788 ;;; Unpack the Judy distribution and enter its main directory. Then do:
794 ;;; You will likely need to become super-user for the last step.
795 ;;; Then, unpack the Bogofilter distribution and enter its main
801 ;;; Here as well, you need to become super-user for the last step.
802 ;;; Now, initialize your word lists by doing, under your own identity:
804 ;;; mkdir ~/.bogofilter
805 ;;; touch ~/.bogofilter/badlist
806 ;;; touch ~/.bogofilter/goodlist
808 ;;; These two files are text files you may edit, but you normally don't!
810 ;;; The `M-d' command gets added to Gnus summary mode, marking current
811 ;;; article as spam, showing it with the `H' mark. Whenever you see a
812 ;;; spam article, make sure to mark its summary line with `M-d' before
813 ;;; leaving the group. Some groups, as per variable
814 ;;; `spam-junk-mailgroups' below, receive articles from Gnus splitting
815 ;;; on clues added by spam recognisers, so for these groups, we tack
816 ;;; an `H' mark at group entry for all summary lines which would
817 ;;; otherwise have no other mark. Make sure to _remove_ `H' marks for
818 ;;; any article which is _not_ genuine spam, before leaving such
819 ;;; groups: you may use `M-u' to "unread" the article, or `d' for
820 ;;; declaring it read the non-spam way. When you leave a group, all
821 ;;; `H' marked articles, saved or unsaved, are sent to Bogofilter
822 ;;; which will study them as spam samples.
824 ;;; Messages may also be deleted in various other ways, and unless
825 ;;; `spam-ham-marks-form' gets overridden below, marks `R' and `r' for
826 ;;; default read or explicit delete, marks `X' and 'K' for automatic
827 ;;; or explicit kills, as well as mark `Y' for low scores, are all
828 ;;; considered to be associated with articles which are not spam.
829 ;;; This assumption might be false, in particular if you use kill
830 ;;; files or score files as means for detecting genuine spam, you
831 ;;; should then adjust `spam-ham-marks-form'. When you leave a group,
832 ;;; all _unsaved_ articles bearing any the above marks are sent to
833 ;;; Bogofilter which will study these as not-spam samples. If you
834 ;;; explicit kill a lot, you might sometimes end up with articles
835 ;;; marked `K' which you never saw, and which might accidentally
836 ;;; contain spam. Best is to make sure that real spam is marked with
837 ;;; `H', and nothing else.
839 ;;; All other marks do not contribute to Bogofilter pre-conditioning.
840 ;;; In particular, ticked, dormant or souped articles are likely to
841 ;;; contribute later, when they will get deleted for real, so there is
842 ;;; no need to use them prematurely. Explicitly expired articles do
843 ;;; not contribute, command `E' is a way to get rid of an article
844 ;;; without Bogofilter ever seeing it.
846 ;;; In a word, with a minimum of care for associating the `H' mark for
847 ;;; spam articles only, Bogofilter training all gets fairly automatic.
848 ;;; You should do this until you get a few hundreds of articles in
849 ;;; each category, spam or not. The shell command `head -1
850 ;;; ~/.bogofilter/*' shows both article counts. The command `S S' in
851 ;;; summary mode, either for debugging or for curiosity, triggers
852 ;;; Bogofilter into displaying in another buffer the "spamicity" score
853 ;;; of the current article (between 0.0 and 1.0), together with the
854 ;;; article words which most significantly contribute to the score.
856 ;;; The real way for using Bogofilter, however, is to have some use
857 ;;; tool like `procmail' for invoking it on message reception, then
858 ;;; adding some recognisable header in case of detected spam. Gnus
859 ;;; splitting rules might later trip on these added headers and react
860 ;;; by sorting such articles into specific junk folders as per
861 ;;; `spam-junk-mailgroups'. Here is a possible `.procmailrc' contents
862 ;;; (still untested -- please tell me how it goes):
866 ;;; | formail -bfI "X-Spam-Status: Yes"
868 (defun spam-check-bogofilter ()
869 ;; Dynamic spam check. I do not know how to check the exit status,
870 ;; so instead, read `bogofilter -v' output.
871 (when (and spam-use-bogofilter spam-bogofilter-path)
872 (spam-bogofilter-articles nil "-v" (list (gnus-summary-article-number)))
873 (when (save-excursion
874 (set-buffer spam-bogofilter-output-buffer-name)
875 (goto-char (point-min))
876 (re-search-forward "Spamicity: \\(0\\.9\\|1\\.0\\)" nil t))
879 (defun spam-bogofilter-score ()
880 "Use `bogofilter -v' on the current article.
881 This yields the 15 most discriminant words for this article and the
882 spamicity coefficient of each, and the overall article spamicity."
884 (when (and spam-use-bogofilter spam-bogofilter-path)
885 (spam-bogofilter-articles nil "-v" (list (gnus-summary-article-number)))
886 (with-current-buffer spam-bogofilter-output-buffer-name
887 (unless (zerop (buffer-size))
888 (if (<= (count-lines (point-min) (point-max)) 1)
890 (goto-char (point-max))
893 (message "%s" (buffer-substring (point-min) (point))))
894 (goto-char (point-min))
895 (display-buffer (current-buffer)))))))
897 (defun spam-bogofilter-register-routine ()
898 (let ((articles gnus-newsgroup-articles)
899 article mark ham-articles spam-articles spam-mark-values
902 ;; marks are stored as symbolic values, so we have to dereference
903 ;; them for memq to work we wouldn't have to do this if
904 ;; gnus-summary-article-mark returned a symbol.
905 (dolist (mark spam-ham-marks)
906 (push (symbol-value mark) ham-mark-values))
908 (dolist (mark spam-spam-marks)
909 (push (symbol-value mark) spam-mark-values))
912 (setq article (pop articles)
913 mark (gnus-summary-article-mark article))
914 (cond ((memq mark spam-mark-values) (push article spam-articles))
915 ((memq article gnus-newsgroup-saved))
916 ((memq mark ham-mark-values) (push article ham-articles))))
918 (spam-bogofilter-articles "ham" "-n" ham-articles))
920 (spam-bogofilter-articles "SPAM" "-s" spam-articles))))
922 (defun spam-bogofilter-articles (type option articles)
923 (let ((output-buffer (get-buffer-create spam-bogofilter-output-buffer-name))
924 (article-copy (get-buffer-create " *Bogofilter Article Copy*"))
925 (remove-regexp (concat spam-bogofilter-spaminfo-header-regexp
928 prefix process article)
930 (setq prefix (format "Studying %d articles as %s..." (length articles)
932 (message "%s" prefix))
933 (save-excursion (set-buffer output-buffer) (erase-buffer))
934 (setq process (start-process "bogofilter" output-buffer
935 spam-bogofilter-path "-F" option))
936 (process-kill-without-query process t)
938 (save-window-excursion
940 (setq counter (1+ counter))
942 (message "%s %d" prefix counter))
943 (setq article (pop articles))
944 (gnus-summary-goto-subject article)
945 (gnus-summary-show-article t)
946 (gnus-eval-in-buffer-window article-copy
947 (insert-buffer-substring gnus-original-article-buffer)
948 ;; Remove spam classification redundant headers: they may induce
949 ;; unwanted biases in later analysis.
950 (message-remove-header remove-regexp t)
951 ;; Bogofilter really wants From envelopes for counting articles.
952 ;; Fake one at the beginning, make sure there will be no other.
953 (goto-char (point-min))
954 (if (looking-at "From ")
956 (insert "From nobody " (current-time-string) "\n"))
957 (let (case-fold-search)
958 (while (re-search-forward "^From " nil t)
961 (process-send-region process (point-min) (point-max))
963 ;; Sending the EOF is unwind-protected. This is to prevent lost copies
964 ;; of `bogofilter', hung on reading their standard input, in case the
965 ;; whole registering process gets interrupted by the user.
966 (process-send-eof process))
967 (kill-buffer article-copy)
968 ;; Receive process output. It sadly seems that we still have to protect
969 ;; ourselves against hung `bogofilter' processes.
970 (let ((status (process-status process))
971 (timeout (* 1000 spam-bogofilter-initial-timeout))
972 (quanta 200)) ; also counted in milliseconds
973 (while (and (not (eq status 'exit)) (> timeout 0))
974 ;; `accept-process-output' timeout is counted in microseconds.
975 (setq timeout (if (accept-process-output process 0 (* 1000 quanta))
976 (* 1000 spam-bogofilter-subsequent-timeout)
978 status (process-status process)))
979 (if (eq status 'exit)
981 (message "%s done!" prefix))
982 ;; Sigh! The process did time out... Become brutal!
983 (interrupt-process process)
984 (message "%s %d INTERRUPTED! (Article %d, status %s)"
985 (or prefix "Bogofilter process...")
986 counter article status)
987 ;; Give some time for user to read. Sitting redisplays but gives up
988 ;; if input is pending. Sleeping does not give up, but it does not
989 ;; redisplay either. Mix both: let's redisplay and not give up.
995 ;;; spam.el ends here.