;;; The integration with Gnus is not yet complete. See various `FIXME'
;;; comments, below, for supplementary explanations or discussions.
+;;; Several TODO items are marked as such
+
;;; Code:
(require 'gnus-sum)
-;; FIXME! We should not require `dns' nor `message' until we actually
-;; need them. Best would be to declare needed functions as auto-loadable.
-(require 'dns)
-(require 'message)
+(require 'gnus-uu) ; because of key prefix issues
+(require 'gnus) ; for the definitions of group content classification and spam processors
-(autoload 'bbdb-records "bbdb-com")
+;; FIXME! We should not require `message' until we actually need
+;; them. Best would be to declare needed functions as auto-loadable.
+(require 'message)
;; Attempt to load BBDB macros
(eval-when-compile
(condition-case nil
(require 'bbdb-com)
- (file-error (defalias 'bbdb-search 'ignore))
- (error)))
+ (file-error (defalias 'bbdb-search 'ignore))))
;; autoload executable-find
-(autoload 'executable-find "executable")
-
-;;; Main parameters.
-
-(defvar spam-use-blacklist t
- "True if the blacklist should be used.")
-
-(defvar spam-use-whitelist t
- "True if the whitelist should be used.")
+(eval-and-compile
+ ;; executable-find is not autoloaded in Emacs 20
+ (autoload 'executable-find "executable"))
-(defvar spam-use-blackholes nil
- ;; FIXME! Turned off for now. The DNS routines are said to be flaky.
- "True if blackholes should be used.")
+;; autoload ifile-spam-filter
+(eval-and-compile
+ (autoload 'ifile-spam-filter "ifile-gnus"))
-(defvar spam-use-bogofilter t
- "True if bogofilter should be used.")
+;; autoload query-dig
+(eval-and-compile
+ (autoload 'query-dig "dig"))
-(defvar spam-split-group "spam"
- "Usual group name where spam should be split.")
+;; autoload query-dns
+(eval-and-compile
+ (autoload 'query-dns "dns"))
-(defvar spam-junk-mailgroups
- ;; FIXME! The mailgroup list evidently depends on other choices made by the
- ;; user, so the built-in default below is not likely to be appropriate.
- (cons spam-split-group '("mail.junk" "poste.pourriel"))
- "Mailgroups which are dedicated by splitting to receive various junk.
-All unmarked article in such group receive the spam mark on group entry.")
+;;; Main parameters.
-;; FIXME! For `spam-ham-marks' and `spam-spam-marks', I wonder if it would
-;; not be easier for the user to just accept a string of mark letters, instead
-;; of a list of Gnus variable names. In such case, the stunt of deferred
-;; evaluation would not be useful anymore. Lars?? :-)
+(defgroup spam nil
+ "Spam configuration.")
-;; FIXME! It is rather questionable to see `K', `X' and `Y' as indicating
-;; positive ham. It much depends on how and why people use kill files, score
-;; files, and the kill command. Maybe it would be better, by default, to not
-;; process a message neither as ham nor spam, that is, just ignore it for
-;; learning purposes, when we are not sure of how the user sees it.
-;; But `r' and `R' should undoubtedly be seen as ham.
+(defcustom spam-directory "~/News/spam/"
+ "Directory for spam whitelists and blacklists."
+ :type 'directory
+ :group 'spam)
-;; FIXME! Some might consider overkill to define a list of spam marks. On
-;; the other hand, who knows, some users might for example like that
-;; explicitly `E'xpired articles be processed as positive spam.
+(defcustom spam-whitelist (expand-file-name "whitelist" spam-directory)
+ "The location of the whitelist.
+The file format is one regular expression per line.
+The regular expression is matched against the address."
+ :type 'file
+ :group 'spam)
-(defvar spam-ham-marks
- (list gnus-del-mark gnus-read-mark gnus-killed-mark
- gnus-kill-file-mark gnus-low-score-mark)
+(defcustom spam-blacklist (expand-file-name "blacklist" spam-directory)
+ "The location of the blacklist.
+The file format is one regular expression per line.
+The regular expression is matched against the address."
+ :type 'file
+ :group 'spam)
+
+(defcustom spam-use-dig t
+ "Whether query-dig should be used instead of query-dns."
+ :type 'boolean
+ :group 'spam)
+
+(defcustom spam-use-blacklist nil
+ "Whether the blacklist should be used by spam-split."
+ :type 'boolean
+ :group 'spam)
+
+(defcustom spam-use-whitelist nil
+ "Whether the whitelist should be used by spam-split."
+ :type 'boolean
+ :group 'spam)
+
+(defcustom spam-use-blackholes nil
+ "Whether blackholes should be used by spam-split."
+ :type 'boolean
+ :group 'spam)
+
+(defcustom spam-use-bogofilter nil
+ "Whether bogofilter should be used by spam-split."
+ :type 'boolean
+ :group 'spam)
+
+(defcustom spam-use-BBDB nil
+ "Whether BBDB should be used by spam-split."
+ :type 'boolean
+ :group 'spam)
+
+(defcustom spam-use-ifile nil
+ "Whether ifile should be used by spam-split."
+ :type 'boolean
+ :group 'spam)
+
+(defcustom spam-split-group "spam"
+ "Group name where incoming spam should be put by spam-split."
+ :type 'string
+ :group 'spam)
+
+;; FIXME! The mailgroup list evidently depends on other choices made by the
+;; user, so the built-in default below is not likely to be appropriate.
+(defcustom spam-junk-mailgroups (cons spam-split-group '("mail.junk" "poste.pourriel"))
+ "Mailgroups with spam contents.
+All unmarked article in such group receive the spam mark on group entry."
+ :type '(repeat (string :tag "Group"))
+ :group 'spam)
+
+(defcustom spam-blackhole-servers '("bl.spamcop.net" "relays.ordb.org"
+ "dev.null.dk" "relays.visi.com")
+ "List of blackhole servers."
+ :type '(repeat (string :tag "Server"))
+ :group 'spam)
+
+(defcustom spam-ham-marks (list 'gnus-del-mark 'gnus-read-mark
+ 'gnus-killed-mark 'gnus-kill-file-mark
+ 'gnus-low-score-mark)
"Marks considered as being ham (positively not spam).
-Such articles will be transmitted to `bogofilter -n' on group exit.")
-
-(defvar spam-spam-marks
- (list gnus-spam-mark)
+Such articles will be processed as ham (non-spam) on group exit."
+ :type '(set
+ (variable-item gnus-del-mark)
+ (variable-item gnus-read-mark)
+ (variable-item gnus-killed-mark)
+ (variable-item gnus-kill-file-mark)
+ (variable-item gnus-low-score-mark))
+ :group 'spam)
+
+(defcustom spam-spam-marks (list 'gnus-spam-mark)
"Marks considered as being spam (positively spam).
-Such articles will be transmitted to `bogofilter -s' on group exit.")
-
-;; FIXME! Ideally, the remainder of this page should be fully integrated
-;; within `gnus-sum.el'.
+Such articles will be transmitted to `bogofilter -s' on group exit."
+ :type '(set
+ (variable-item gnus-spam-mark)
+ (variable-item gnus-killed-mark)
+ (variable-item gnus-kill-file-mark)
+ (variable-item gnus-low-score-mark))
+ :group 'spam)
+
+(defcustom spam-face 'gnus-splash-face
+ "Face for spam-marked articles"
+ :type 'face
+ :group 'spam)
+
+(defgroup spam-bogofilter nil
+ "Spam bogofilter configuration."
+ :group 'spam)
+
+(defcustom spam-bogofilter-output-buffer-name "*Bogofilter Output*"
+ "Name of buffer when displaying `bogofilter -v' output."
+ :type 'string
+ :group 'spam-bogofilter)
+
+(defcustom spam-bogofilter-initial-timeout 40
+ "Timeout in seconds for the initial reply from the `bogofilter' program."
+ :type 'integer
+ :group 'spam-bogofilter)
+
+(defcustom spam-bogofilter-subsequent-timeout 15
+ "Timeout in seconds for any subsequent reply from the `bogofilter' program."
+ :type 'integer
+ :group 'spam-bogofilter)
+
+(defcustom spam-bogofilter-path (executable-find "bogofilter")
+ "File path of the Bogofilter executable program."
+ :type '(choice (file :tag "Location of bogofilter")
+ (const :tag "Bogofilter is not installed"))
+ :group 'spam-bogofilter)
+
+;; FIXME! In the following regexp, we should explain which tool produces
+;; which kind of header. I do not even remember them all by now. X-Junk
+;; (and previously X-NoSpam) are produced by the `NoSpam' tool, which has
+;; never been published, so it might not be reasonable leaving it in the
+;; list.
+(defcustom spam-bogofilter-spaminfo-header-regexp
+ "^X-\\(jf\\|Junk\\|NoSpam\\|Spam\\|SB\\)[^:]*:"
+ "Regexp for spam markups in headers.
+Markup from spam recognisers, as well as `Xref', are to be removed from
+articles before they get registered by Bogofilter."
+ :type 'regexp
+ :group 'spam-bogofilter)
;;; Key bindings for spam control.
-;; FIXME! The justification for `M-d' is that this is what Paul Graham
-;; suggests in his original article, and what Eric Raymond's patch for Mutt
-;; uses. But more importantly, that binding was still free in Summary mode!
-
-;; FIXME! Lars has not blessed the following key bindings yet. It looks
-;; convenient that the score analysis command uses a sequence ending with the
-;; letter `t', so it nicely parallels `B t' or `V t'. `M-d' is a kind of
-;; "alternate" `d', it is also the sequence suggested in Paul Graham article,
-;; and also in Eric Raymond's patch for Mutt. `S x' might be the more
-;; official key binding for `M-d'.
-
(gnus-define-keys gnus-summary-mode-map
"St" spam-bogofilter-score
"Sx" gnus-summary-mark-as-spam
+ "Mst" spam-bogofilter-score
+ "Msx" gnus-summary-mark-as-spam
"\M-d" gnus-summary-mark-as-spam)
;;; How to highlight a spam summary line.
-;; FIXME! Of course, `gnus-splash-face' has another purpose. Maybe a
-;; special face should be created, named and used instead, for spam lines.
+;; TODO: How do we redo this every time spam-face is customized?
-(push '((eq mark gnus-spam-mark) . gnus-splash-face)
+(push '((eq mark gnus-spam-mark) . spam-face)
gnus-summary-highlight)
+;; convenience functions
+(defun spam-group-spam-contents-p (group)
+ (if (stringp group)
+ (or (member group spam-junk-mailgroups)
+ (memq 'gnus-group-spam-classification-spam
+ (gnus-parameter-spam-contents group)))
+ nil))
+
+(defun spam-group-ham-contents-p (group)
+ (if (stringp group)
+ (memq 'gnus-group-spam-classification-ham
+ (gnus-parameter-spam-contents group))
+ nil))
+
+(defun spam-group-processor-p (group processor)
+ (if (and (stringp group)
+ (symbolp processor))
+ (member processor (car (gnus-parameter-spam-process group)))
+ nil))
+
+(defun spam-group-processor-bogofilter-p (group)
+ (spam-group-processor-p group 'gnus-group-spam-exit-processor-bogofilter))
+
+(defun spam-group-processor-ifile-p (group)
+ (spam-group-processor-p group 'gnus-group-spam-exit-processor-ifile))
+
+(defun spam-group-processor-blacklist-p (group)
+ (spam-group-processor-p group 'gnus-group-spam-exit-processor-blacklist))
+
+(defun spam-group-processor-whitelist-p (group)
+ (spam-group-processor-p group 'gnus-group-ham-exit-processor-whitelist))
+
+(defun spam-group-processor-BBDB-p (group)
+ (spam-group-processor-p group 'gnus-group-ham-exit-processor-BBDB))
+
;;; Hooks dispatching. A bit raw for now.
(defun spam-summary-prepare ()
(spam-mark-junk-as-spam-routine))
(defun spam-summary-prepare-exit ()
- (spam-bogofilter-register-routine))
+ ;; The spam processors are invoked for any group, spam or ham or neither
+ (when (and spam-bogofilter-path
+ (spam-group-processor-bogofilter-p gnus-newsgroup-name))
+ (spam-bogofilter-register-routine))
+
+ (when (spam-group-processor-ifile-p gnus-newsgroup-name)
+ (spam-ifile-register-routine))
+
+ (when (spam-group-processor-bogofilter-p gnus-newsgroup-name)
+ (spam-blacklist-register-routine))
+
+ ;; Only for spam groups, we expire and maybe move articles
+ (when (spam-group-spam-contents-p gnus-newsgroup-name)
+ (spam-mark-spam-as-expired-and-move-routine
+ (gnus-parameter-spam-process-destination gnus-newsgroup-name)))
+
+ (when (spam-group-ham-contents-p gnus-newsgroup-name)
+ (when (spam-group-processor-whitelist-p gnus-newsgroup-name)
+ (spam-whitelist-register-routine))
+ (when (spam-group-processor-BBDB-p gnus-newsgroup-name)
+ (spam-BBDB-register-routine))))
(add-hook 'gnus-summary-prepare-hook 'spam-summary-prepare)
(add-hook 'gnus-summary-prepare-exit-hook 'spam-summary-prepare-exit)
(defun spam-mark-junk-as-spam-routine ()
- (when (member gnus-newsgroup-name spam-junk-mailgroups)
+ ;; check the global list of group names spam-junk-mailgroups and the
+ ;; group parameters
+ (when (spam-group-spam-contents-p gnus-newsgroup-name)
(let ((articles gnus-newsgroup-articles)
article)
(while articles
(setq article (pop articles))
(when (eq (gnus-summary-article-mark article) gnus-unread-mark)
(gnus-summary-mark-article article gnus-spam-mark))))))
+
+(defun spam-mark-spam-as-expired-and-move-routine (&optional group)
+ (let ((articles gnus-newsgroup-articles)
+ article)
+ (while articles
+ (setq article (pop articles))
+ (when (eq (gnus-summary-article-mark article) gnus-spam-mark)
+ (gnus-summary-mark-article article gnus-expirable-mark)
+ (when (stringp group)
+ (let ((gnus-current-article article))
+ (gnus-summary-move-article nil group)))))))
+
+(defun spam-generic-register-routine (spam-func ham-func)
+ (let ((articles gnus-newsgroup-articles)
+ article mark ham-articles spam-articles spam-mark-values
+ ham-mark-values)
+
+ ;; marks are stored as symbolic values, so we have to dereference
+ ;; them for memq to work we wouldn't have to do this if
+ ;; gnus-summary-article-mark returned a symbol.
+ (dolist (mark spam-ham-marks)
+ (push (symbol-value mark) ham-mark-values))
+
+ (dolist (mark spam-spam-marks)
+ (push (symbol-value mark) spam-mark-values))
+
+ (while articles
+ (setq article (pop articles)
+ mark (gnus-summary-article-mark article))
+ (cond ((memq mark spam-mark-values) (push article spam-articles))
+ ((memq article gnus-newsgroup-saved))
+ ((memq mark ham-mark-values) (push article ham-articles))))
+ (when (and ham-articles ham-func)
+ (mapc ham-func ham-articles)) ; we use mapc because unlike
+ ; mapcar it discards the
+ ; return values
+ (when (and spam-articles spam-func)
+ (mapc spam-func spam-articles)))) ; we use mapc because unlike
+ ; mapcar it discards the
+ ; return values
+
+(defun spam-fetch-field-from-fast (article)
+ "Fetch the `from' field quickly, using the internal gnus-data-list function"
+ (if (and (numberp article)
+ (assoc article (gnus-data-list nil)))
+ (mail-header-from (gnus-data-header (assoc article (gnus-data-list nil))))
+ nil))
+
\f
;;;; Spam determination.
-;; The following list contains pairs associating a parameter variable with a
-;; spam checking function. If the parameter variable is true, then the
-;; checking function is called, and its value decides what happens. Each
-;; individual check may return `nil', `t', or a mailgroup name. The value
-;; `nil' means that the check does not yield a decision, and so, that further
-;; checks are needed. The value `t' means that the message is definitely not
-;; spam, and that further spam checks should be inhibited. Otherwise, a
-;; mailgroup name is returned where the mail should go, and further checks are
-;; also inhibited. The usual mailgroup name is the value of
-;; `spam-split-group', meaning that the message is definitely a spam.
-
(defvar spam-list-of-checks
'((spam-use-blacklist . spam-check-blacklist)
(spam-use-whitelist . spam-check-whitelist)
- (spam-use-bbdb . spam-check-bbdb)
+ (spam-use-BBDB . spam-check-BBDB)
+ (spam-use-ifile . spam-check-ifile)
(spam-use-blackholes . spam-check-blackholes)
- (spam-use-bogofilter . spam-check-bogofilter)))
+ (spam-use-bogofilter . spam-check-bogofilter))
+"The spam-list-of-checks list contains pairs associating a parameter
+variable with a spam checking function. If the parameter variable is
+true, then the checking function is called, and its value decides what
+happens. Each individual check may return `nil', `t', or a mailgroup
+name. The value `nil' means that the check does not yield a decision,
+and so, that further checks are needed. The value `t' means that the
+message is definitely not spam, and that further spam checks should be
+inhibited. Otherwise, a mailgroup name is returned where the mail
+should go, and further checks are also inhibited. The usual mailgroup
+name is the value of `spam-split-group', meaning that the message is
+definitely a spam.")
(defun spam-split ()
"Split this message into the `spam' group if it is spam.
decision)
(while (and list-of-checks (not decision))
(let ((pair (pop list-of-checks)))
- (when (eval (car pair))
- (setq decision (apply (cdr pair))))))
+ (when (symbol-value (car pair))
+ (setq decision (funcall (cdr pair))))))
(if (eq decision t)
nil
decision)))
\f
;;;; Blackholes.
-(defvar spam-blackhole-servers '("bl.spamcop.net"
- "relays.ordb.org"
- "dev.null.dk"
- "relays.visi.com"
- "rbl.maps.vix.com")
- "List of blackhole servers.")
-
(defun spam-check-blackholes ()
- "Check the Receieved headers for blackholed relays."
+ "Check the Received headers for blackholed relays."
(let ((headers (message-fetch-field "received"))
ips matches)
(when headers
ips)))
(dolist (server spam-blackhole-servers)
(dolist (ip ips)
- (when (query-dns (concat ip "." server))
- (push (list ip server (query-dns (concat ip "." server) 'TXT))
- matches)))))
+ (let ((query-string (concat ip "." server)))
+ (if spam-use-dig
+ (let ((query-result (query-dig query-string)))
+ (when query-result
+ (message "spam: positive blackhole check '%s'" query-result)
+ (push (list ip server query-result)
+ matches)))
+ ;; else, if not using dig.el
+ (when (query-dns query-string)
+ (push (list ip server (query-dns query-string 'TXT))
+ matches)))))))
(when matches
spam-split-group)))
\f
-;;;; Blacklists and whitelists.
+;;;; BBDB original idea for spam-check-BBDB from Alexander Kotelnikov
+;;; <sacha@giotto.sj.ru>
-(defvar spam-directory "~/News/spam/"
- "When spam files are kept.")
+;; all this is done inside a condition-case to trap errors
+(condition-case nil
+ (progn
-(defvar spam-whitelist (expand-file-name "whitelist" spam-directory)
- "The location of the whitelist.
-The file format is one regular expression per line.
-The regular expression is matched against the address.")
+ (require 'bbdb-com)
-(defvar spam-blacklist (expand-file-name "blacklist" spam-directory)
- "The location of the blacklist.
-The file format is one regular expression per line.
-The regular expression is matched against the address.")
+ (defun spam-enter-ham-BBDB (from)
+ "Enter an address into the BBDB; implies ham (non-spam) sender"
+ (when (stringp from)
+ (let* ((parsed-address (gnus-extract-address-components from))
+ (name (or (car parsed-address) "Ham Sender"))
+ (net-address (car (cdr parsed-address))))
+ (message "Adding address %s to BBDB" from)
+ (when (and net-address
+ (not (bbdb-search (bbdb-records) nil nil net-address)))
+ (bbdb-create-internal name nil net-address nil nil
+ "ham sender added by spam.el")))))
+
+ (defun spam-BBDB-register-routine ()
+ (spam-generic-register-routine
+ ;; spam function
+ nil
+ ;; ham function
+ (lambda (article)
+ (spam-enter-ham-BBDB (spam-fetch-field-from-fast article)))))
+
+ (defun spam-check-BBDB ()
+ "Mail from people in the BBDB is never considered spam"
+ (let ((who (message-fetch-field "from")))
+ (when who
+ (setq who (regexp-quote (cadr
+ (gnus-extract-address-components who))))
+ (if (bbdb-search (bbdb-records) nil nil who)
+ nil spam-split-group)))))
+
+ (file-error (progn
+ (setq spam-list-of-checks
+ (delete (assoc 'spam-use-BBDB spam-list-of-checks)
+ spam-list-of-checks))
+ (defun spam-check-BBDB ()
+ message "spam-check-BBDB was invoked, but it shouldn't have")
+ (defun spam-BBDB-register-routine ()
+ (spam-generic-register-routine nil nil)))))
+
+\f
+;;;; ifile
+
+;;; uses ifile-gnus.el from
+;;; http://www.ai.mit.edu/people/jhbrown/ifile-gnus.html
+
+;;; check the ifile backend; return nil if the mail was NOT classified
+;;; as spam
+
+;;; TODO: we can't (require 'ifile-gnus), because it will insinuate
+;;; itself automatically
+(defun spam-check-ifile ()
+ (let ((ifile-primary-spam-group spam-split-group))
+ (ifile-spam-filter nil)))
+
+;; TODO: add ifile registration
+;; We need ifile-gnus.el to support nnimap; we could feel the message
+;; directly to ifile like we do with bogofilter but that's ugly.
+(defun spam-ifile-register-routine ()
+ (spam-generic-register-routine nil nil))
+
+\f
+;;;; Blacklists and whitelists.
(defvar spam-whitelist-cache nil)
(defvar spam-blacklist-cache nil)
(setq spam-whitelist-cache (spam-parse-list spam-whitelist)))
(if (spam-from-listed-p spam-whitelist-cache) nil spam-split-group))
-;;; copied from code by Alexander Kotelnikov <sacha@giotto.sj.ru>
-(defun spam-check-bbdb ()
- "We want messages from people who are in the BBDB not to be split to spam"
- (let ((who (message-fetch-field "from")))
- (when who
- (setq who (regexp-quote (cadr (gnus-extract-address-components who))))
- (if (bbdb-search (bbdb-records) nil nil who) nil spam-split-group))))
-
-;; let spam-check-bbdb be nil if the BBDB can't be loaded
-(condition-case nil
- (require 'bbdb)
- (file-error (defalias 'spam-check-bbdb 'ignore)))
-
(defun spam-check-blacklist ()
;; FIXME! Should it detect when file timestamps change?
(unless spam-blacklist-cache
(setq found t
cache nil)))
found))
+
+(defun spam-blacklist-register-routine ()
+ (spam-generic-register-routine
+ ;; the spam function
+ (lambda (article)
+ (let ((from (spam-fetch-field-from-fast article)))
+ (when (stringp from)
+ (spam-enter-blacklist from))))
+ ;; the ham function
+ nil))
+
+(defun spam-whitelist-register-routine ()
+ (spam-generic-register-routine
+ ;; the spam function
+ nil
+ ;; the ham function
+ (lambda (article)
+ (let ((from (spam-fetch-field-from-fast article)))
+ (when (stringp from)
+ (spam-enter-whitelist from))))))
+
\f
-;;;; Training via Bogofilter. Last updated 2002-09-02.
+;;;; Bogofilter
;;; See Paul Graham article, at `http://www.paulgraham.com/spam.html'.
-;;; This page is for those wanting to control spam with the help of Eric
-;;; Raymond's speedy Bogofilter, see http://www.tuxedo.org/~esr/bogofilter.
-;;; This has been tested with a locally patched copy of version 0.4.
+;;; This page is for those wanting to control spam with the help of
+;;; Eric Raymond's speedy Bogofilter, see
+;;; http://www.tuxedo.org/~esr/bogofilter. This has been tested with
+;;; a locally patched copy of version 0.4.
-;;; Make sure Bogofilter is installed. Bogofilter internally uses Judy fast
-;;; associative arrays, so you need to install Judy first, and Bogofilter
-;;; next. Fetch both distributions by visiting the following links and
-;;; downloading the latest version of each:
+;;; Make sure Bogofilter is installed. Bogofilter internally uses
+;;; Judy fast associative arrays, so you need to install Judy first,
+;;; and Bogofilter next. Fetch both distributions by visiting the
+;;; following links and downloading the latest version of each:
;;;
;;; http://sourceforge.net/projects/judy/
;;; http://www.tuxedo.org/~esr/bogofilter/
;;; make
;;; make install
;;;
-;;; You will likely need to become super-user for the last step. Then, unpack
-;;; the Bogofilter distribution and enter its main directory:
+;;; You will likely need to become super-user for the last step.
+;;; Then, unpack the Bogofilter distribution and enter its main
+;;; directory:
;;;
;;; make
;;; make install
;;;
-;;; Here as well, you need to become super-user for the last step. Now,
-;;; initialises your word lists by doing, under your own identity:
+;;; Here as well, you need to become super-user for the last step.
+;;; Now, initialize your word lists by doing, under your own identity:
;;;
;;; mkdir ~/.bogofilter
;;; touch ~/.bogofilter/badlist
;;;
;;; These two files are text files you may edit, but you normally don't!
-;;; The `M-d' command gets added to Gnus summary mode, marking current article
-;;; as spam, showing it with the `H' mark. Whenever you see a spam article,
-;;; make sure to mark its summary line with `M-d' before leaving the group.
-;;; Some groups, as per variable `spam-junk-mailgroups' below, receive articles
-;;; from Gnus splitting on clues added by spam recognisers, so for these
-;;; groups, we tack an `H' mark at group entry for all summary lines which
-;;; would otherwise have no other mark. Make sure to _remove_ `H' marks for
-;;; any article which is _not_ genuine spam, before leaving such groups: you
-;;; may use `M-u' to "unread" the article, or `d' for declaring it read the
-;;; non-spam way. When you leave a group, all `H' marked articles, saved or
-;;; unsaved, are sent to Bogofilter which will study them as spam samples.
+;;; The `M-d' command gets added to Gnus summary mode, marking current
+;;; article as spam, showing it with the `H' mark. Whenever you see a
+;;; spam article, make sure to mark its summary line with `M-d' before
+;;; leaving the group. Some groups, as per variable
+;;; `spam-junk-mailgroups' below, receive articles from Gnus splitting
+;;; on clues added by spam recognisers, so for these groups, we tack
+;;; an `H' mark at group entry for all summary lines which would
+;;; otherwise have no other mark. Make sure to _remove_ `H' marks for
+;;; any article which is _not_ genuine spam, before leaving such
+;;; groups: you may use `M-u' to "unread" the article, or `d' for
+;;; declaring it read the non-spam way. When you leave a group, all
+;;; `H' marked articles, saved or unsaved, are sent to Bogofilter
+;;; which will study them as spam samples.
;;; Messages may also be deleted in various other ways, and unless
-;;; `spam-ham-marks-form' gets overridden below, marks `R' and `r' for default
-;;; read or explicit delete, marks `X' and 'K' for automatic or explicit
-;;; kills, as well as mark `Y' for low scores, are all considered to be
-;;; associated with articles which are not spam. This assumption might be
-;;; false, in particular if you use kill files or score files as means for
-;;; detecting genuine spam, you should then adjust `spam-ham-marks-form'. When
-;;; you leave a group, all _unsaved_ articles bearing any the above marks are
-;;; sent to Bogofilter which will study these as not-spam samples. If you
-;;; explicit kill a lot, you might sometimes end up with articles marked `K'
-;;; which you never saw, and which might accidentally contain spam. Best is
-;;; to make sure that real spam is marked with `H', and nothing else.
-
-;;; All other marks do not contribute to Bogofilter pre-conditioning. In
-;;; particular, ticked, dormant or souped articles are likely to contribute
-;;; later, when they will get deleted for real, so there is no need to use
-;;; them prematurely. Explicitly expired articles do not contribute, command
-;;; `E' is a way to get rid of an article without Bogofilter ever seeing it.
-
-;;; In a word, with a minimum of care for associating the `H' mark for spam
-;;; articles only, Bogofilter training all gets fairly automatic. You should
-;;; do this until you get a few hundreds of articles in each category, spam
-;;; or not. The shell command `head -1 ~/.bogofilter/*' shows both article
-;;; counts. The command `S S' in summary mode, either for debugging or for
-;;; curiosity, triggers Bogofilter into displaying in another buffer the
-;;; "spamicity" score of the current article (between 0.0 and 1.0), together
-;;; with the article words which most significantly contribute to the score.
-
-;;; The real way for using Bogofilter, however, is to have some use tool like
-;;; `procmail' for invoking it on message reception, then adding some
-;;; recognisable header in case of detected spam. Gnus splitting rules might
-;;; later trip on these added headers and react by sorting such articles into
-;;; specific junk folders as per `spam-junk-mailgroups'. Here is a possible
-;;; `.procmailrc' contents (still untested -- please tell me how it goes):
+;;; `spam-ham-marks-form' gets overridden below, marks `R' and `r' for
+;;; default read or explicit delete, marks `X' and 'K' for automatic
+;;; or explicit kills, as well as mark `Y' for low scores, are all
+;;; considered to be associated with articles which are not spam.
+;;; This assumption might be false, in particular if you use kill
+;;; files or score files as means for detecting genuine spam, you
+;;; should then adjust `spam-ham-marks-form'. When you leave a group,
+;;; all _unsaved_ articles bearing any the above marks are sent to
+;;; Bogofilter which will study these as not-spam samples. If you
+;;; explicit kill a lot, you might sometimes end up with articles
+;;; marked `K' which you never saw, and which might accidentally
+;;; contain spam. Best is to make sure that real spam is marked with
+;;; `H', and nothing else.
+
+;;; All other marks do not contribute to Bogofilter pre-conditioning.
+;;; In particular, ticked, dormant or souped articles are likely to
+;;; contribute later, when they will get deleted for real, so there is
+;;; no need to use them prematurely. Explicitly expired articles do
+;;; not contribute, command `E' is a way to get rid of an article
+;;; without Bogofilter ever seeing it.
+
+;;; In a word, with a minimum of care for associating the `H' mark for
+;;; spam articles only, Bogofilter training all gets fairly automatic.
+;;; You should do this until you get a few hundreds of articles in
+;;; each category, spam or not. The shell command `head -1
+;;; ~/.bogofilter/*' shows both article counts. The command `S S' in
+;;; summary mode, either for debugging or for curiosity, triggers
+;;; Bogofilter into displaying in another buffer the "spamicity" score
+;;; of the current article (between 0.0 and 1.0), together with the
+;;; article words which most significantly contribute to the score.
+
+;;; The real way for using Bogofilter, however, is to have some use
+;;; tool like `procmail' for invoking it on message reception, then
+;;; adding some recognisable header in case of detected spam. Gnus
+;;; splitting rules might later trip on these added headers and react
+;;; by sorting such articles into specific junk folders as per
+;;; `spam-junk-mailgroups'. Here is a possible `.procmailrc' contents
+;;; (still untested -- please tell me how it goes):
;;;
;;; :0HBf:
;;; * ? bogofilter
;;; | formail -bfI "X-Spam-Status: Yes"
-(defvar spam-output-buffer-name "*Bogofilter Output*"
- "Name of buffer when displaying `bogofilter -v' output.")
-
-(defvar spam-spaminfo-header-regexp
- ;; FIXME! In the following regexp, we should explain which tool produces
- ;; which kind of header. I do not even remember them all by now. X-Junk
- ;; (and previously X-NoSpam) are produced by the `NoSpam' tool, which has
- ;; never been published, so it might not be reasonable leaving it in the
- ;; list.
- "^X-\\(jf\\|Junk\\|NoSpam\\|Spam\\|SB\\)[^:]*:"
- "Regexp for spam markups in headers.
-Markup from spam recognisers, as well as `Xref', are to be removed from
-articles before they get registered by Bogofilter.")
-
-(defvar spam-bogofilter-path (executable-find "bogofilter")
- "File path of the Bogofilter executable program.
-Force this variable to nil if you want to inhibit the functionality.")
-
(defun spam-check-bogofilter ()
;; Dynamic spam check. I do not know how to check the exit status,
;; so instead, read `bogofilter -v' output.
(when (and spam-use-bogofilter spam-bogofilter-path)
(spam-bogofilter-articles nil "-v" (list (gnus-summary-article-number)))
(when (save-excursion
- (set-buffer spam-output-buffer-name)
+ (set-buffer spam-bogofilter-output-buffer-name)
(goto-char (point-min))
(re-search-forward "Spamicity: \\(0\\.9\\|1\\.0\\)" nil t))
spam-split-group)))
(interactive)
(when (and spam-use-bogofilter spam-bogofilter-path)
(spam-bogofilter-articles nil "-v" (list (gnus-summary-article-number)))
- (save-excursion
- (set-buffer spam-output-buffer-name)
- (unless (= (point-min) (point-max))
- (display-message-or-buffer (current-buffer)
- spam-output-buffer-name)))))
+ (with-current-buffer spam-bogofilter-output-buffer-name
+ (unless (zerop (buffer-size))
+ (if (<= (count-lines (point-min) (point-max)) 1)
+ (progn
+ (goto-char (point-max))
+ (when (bolp)
+ (backward-char 1))
+ (message "%s" (buffer-substring (point-min) (point))))
+ (goto-char (point-min))
+ (display-buffer (current-buffer)))))))
(defun spam-bogofilter-register-routine ()
- (when (and spam-use-bogofilter spam-bogofilter-path)
- (let ((articles gnus-newsgroup-articles)
- article mark ham-articles spam-articles)
- (while articles
- (setq article (pop articles)
- mark (gnus-summary-article-mark article))
- (cond ((memq mark spam-spam-marks) (push article spam-articles))
- ((memq article gnus-newsgroup-saved))
- ((memq mark spam-ham-marks) (push article ham-articles))))
- (when ham-articles
- (spam-bogofilter-articles "ham" "-n" ham-articles))
- (when spam-articles
- (spam-bogofilter-articles "SPAM" "-s" spam-articles)))))
-
-(defvar spam-bogofilter-initial-timeout 40
- "Timeout in seconds for the initial reply from the `bogofilter' program.")
-
-(defvar spam-bogofilter-subsequent-timeout 15
- "Timeout in seconds for any subsequent reply from the `bogofilter' program.")
+ (let ((articles gnus-newsgroup-articles)
+ article mark ham-articles spam-articles spam-mark-values
+ ham-mark-values)
+
+ ;; marks are stored as symbolic values, so we have to dereference
+ ;; them for memq to work we wouldn't have to do this if
+ ;; gnus-summary-article-mark returned a symbol.
+ (dolist (mark spam-ham-marks)
+ (push (symbol-value mark) ham-mark-values))
+
+ (dolist (mark spam-spam-marks)
+ (push (symbol-value mark) spam-mark-values))
+
+ (while articles
+ (setq article (pop articles)
+ mark (gnus-summary-article-mark article))
+ (cond ((memq mark spam-mark-values) (push article spam-articles))
+ ((memq article gnus-newsgroup-saved))
+ ((memq mark ham-mark-values) (push article ham-articles))))
+ (when ham-articles
+ (spam-bogofilter-articles "ham" "-n" ham-articles))
+ (when spam-articles
+ (spam-bogofilter-articles "SPAM" "-s" spam-articles))))
(defun spam-bogofilter-articles (type option articles)
- (let ((output-buffer (get-buffer-create spam-output-buffer-name))
+ (let ((output-buffer (get-buffer-create spam-bogofilter-output-buffer-name))
(article-copy (get-buffer-create " *Bogofilter Article Copy*"))
- (remove-regexp (concat spam-spaminfo-header-regexp "\\|Xref:"))
+ (remove-regexp (concat spam-bogofilter-spaminfo-header-regexp
+ "\\|Xref:"))
(counter 0)
prefix process article)
(when type
(message "%s %d" prefix counter))
(setq article (pop articles))
(gnus-summary-goto-subject article)
- (gnus-summary-select-article)
+ (gnus-summary-show-article t)
(gnus-eval-in-buffer-window article-copy
(insert-buffer-substring gnus-original-article-buffer)
;; Remove spam classification redundant headers: they may induce
;; unwanted biases in later analysis.
- (goto-char (point-min))
- (while (not (or (eobp) (= (following-char) ?\n)))
- (if (looking-at remove-regexp)
- (delete-region (point)
- (save-excursion (forward-line 1) (point)))
- (forward-line 1)))
- (goto-char (point-min))
+ (message-remove-header remove-regexp t)
;; Bogofilter really wants From envelopes for counting articles.
;; Fake one at the beginning, make sure there will be no other.
+ (goto-char (point-min))
(if (looking-at "From ")
(forward-line 1)
(insert "From nobody " (current-time-string) "\n"))