:type 'directory
:group 'spam)
+(defcustom spam-move-spam-nonspam-groups-only t
+ "Whether spam should be moved in non-spam groups only.
+When nil, only ham and unclassified groups will have their spam moved
+to the spam-process-destination. When t, spam will also be moved from
+spam groups."
+ :type 'boolean
+ :group 'spam-ifile)
+
(defcustom spam-whitelist (expand-file-name "whitelist" spam-directory)
"The location of the whitelist.
The file format is one regular expression per line.
:type 'boolean
:group 'spam)
+(defcustom spam-use-regex-headers nil
+ "Whether a header regular expression match should be used by spam-split.
+Also see the variable `spam-spam-regex-headers' and `spam-ham-regex-headers'."
+ :type 'boolean
+ :group 'spam)
+
+(defcustom spam-use-bogofilter-headers nil
+ "Whether bogofilter headers should be used by spam-split.
+Enable this if you pre-process messages with Bogofilter BEFORE Gnus sees them."
+ :type 'boolean
+ :group 'spam)
+
(defcustom spam-use-bogofilter nil
- "Whether bogofilter should be used by spam-split."
+ "Whether bogofilter should be invoked by spam-split.
+Enable this if you want Gnus to invoke Bogofilter on new messages."
:type 'boolean
:group 'spam)
:type 'boolean
:group 'spam)
+(defcustom spam-use-stat nil
+ "Whether spam-stat should be used by spam-split."
+ :type 'boolean
+ :group 'spam)
+
(defcustom spam-split-group "spam"
"Group name where incoming spam should be put by spam-split."
:type 'string
:type 'face
:group 'spam)
+(defcustom spam-regex-headers-spam '("^X-Spam-Flag: YES")
+ "Regular expression for positive header spam matches"
+ :type '(repeat (regexp :tag "Regular expression to match spam header"))
+ :group 'spam)
+
+(defcustom spam-regex-headers-ham '("^X-Spam-Flag: NO")
+ "Regular expression for positive header ham matches"
+ :type '(repeat (regexp :tag "Regular expression to match ham header"))
+ :group 'spam)
+
(defgroup spam-ifile nil
"Spam ifile configuration."
:group 'spam)
(const :tag "ifile is not installed"))
:group 'spam-ifile)
+(defcustom spam-ifile-database-path nil
+ "File path of the ifile database."
+ :type '(choice (file :tag "Location of the ifile database")
+ (const :tag "Use the default"))
+ :group 'spam-ifile)
+
(defcustom spam-ifile-spam-category "spam"
"Name of the spam ifile category."
:type 'string
:group 'spam-ifile)
+(defcustom spam-ifile-ham-category nil
+ "Name of the ham ifile category. If nil, the current group name will
+be used."
+ :type '(choice (string :tag "Use a fixed category")
+ (const :tag "Use the current group name"))
+ :group 'spam-ifile)
+
(defcustom spam-ifile-all-categories nil
"Whether the ifile check will return all categories, or just spam.
Set this to t if you want to use the spam-split invocation of ifile as
"Spam bogofilter configuration."
:group 'spam)
-(defcustom spam-bogofilter-output-buffer-name "*Bogofilter Output*"
- "Name of buffer when displaying `bogofilter -v' output."
- :type 'string
- :group 'spam-bogofilter)
-
-(defcustom spam-bogofilter-initial-timeout 40
- "Timeout in seconds for the initial reply from the `bogofilter' program."
- :type 'integer
- :group 'spam-bogofilter)
-
-(defcustom spam-bogofilter-subsequent-timeout 15
- "Timeout in seconds for any subsequent reply from the `bogofilter' program."
- :type 'integer
- :group 'spam-bogofilter)
-
(defcustom spam-bogofilter-path (executable-find "bogofilter")
"File path of the Bogofilter executable program."
:type '(choice (file :tag "Location of bogofilter")
(const :tag "Bogofilter is not installed"))
:group 'spam-bogofilter)
-;; FIXME! In the following regexp, we should explain which tool produces
-;; which kind of header. I do not even remember them all by now. X-Junk
-;; (and previously X-NoSpam) are produced by the `NoSpam' tool, which has
-;; never been published, so it might not be reasonable leaving it in the
-;; list.
-(defcustom spam-bogofilter-spaminfo-header-regexp
- "^X-\\(jf\\|Junk\\|NoSpam\\|Spam\\|SB\\)[^:]*:"
- "Regexp for spam markups in headers.
-Markup from spam recognisers, as well as `Xref', are to be removed from
-articles before they get registered by Bogofilter."
- :type 'regexp
+(defcustom spam-bogofilter-header "X-Bogosity"
+ "The header that Bogofilter inserts in messages."
+ :type 'string
:group 'spam-bogofilter)
+(defcustom spam-bogofilter-database-directory nil
+ "Directory path of the Bogofilter databases."
+ :type '(choice (directory :tag "Location of the Bogofilter database directory")
+ (const :tag "Use the default"))
+ :group 'spam-ifile)
+
;;; Key bindings for spam control.
(gnus-define-keys gnus-summary-mode-map
(defun spam-group-ham-processor-ifile-p (group)
(spam-group-processor-p group 'gnus-group-ham-exit-processor-ifile))
+(defun spam-group-ham-processor-bogofilter-p (group)
+ (spam-group-processor-p group 'gnus-group-ham-exit-processor-bogofilter))
+
+(defun spam-group-spam-processor-stat-p (group)
+ (spam-group-processor-p group 'gnus-group-spam-exit-processor-stat))
+
+(defun spam-group-ham-processor-stat-p (group)
+ (spam-group-processor-p group 'gnus-group-ham-exit-processor-stat))
+
(defun spam-group-ham-processor-whitelist-p (group)
(spam-group-processor-p group 'gnus-group-ham-exit-processor-whitelist))
(defun spam-summary-prepare-exit ()
;; The spam processors are invoked for any group, spam or ham or neither
+ (gnus-message 6 "Exiting summary buffer and applying spam rules")
(when (and spam-bogofilter-path
(spam-group-spam-processor-bogofilter-p gnus-newsgroup-name))
- (spam-bogofilter-register-routine))
+ (gnus-message 5 "Registering spam with bogofilter")
+ (spam-bogofilter-register-spam-routine))
(when (and spam-ifile-path
(spam-group-spam-processor-ifile-p gnus-newsgroup-name))
+ (gnus-message 5 "Registering spam with ifile")
(spam-ifile-register-spam-routine))
- (when (spam-group-spam-processor-bogofilter-p gnus-newsgroup-name)
+ (when (spam-group-spam-processor-stat-p gnus-newsgroup-name)
+ (gnus-message 5 "Registering spam with spam-stat")
+ (spam-stat-register-spam-routine))
+
+ (when (spam-group-spam-processor-blacklist-p gnus-newsgroup-name)
+ (gnus-message 5 "Registering spam with the blacklist")
(spam-blacklist-register-routine))
- ;; Only for spam groups, we expire and maybe move articles
- (when (spam-group-spam-contents-p gnus-newsgroup-name)
+ (if spam-move-spam-nonspam-groups-only
+ (when (not (spam-group-spam-contents-p gnus-newsgroup-name))
+ (spam-mark-spam-as-expired-and-move-routine
+ (gnus-parameter-spam-process-destination gnus-newsgroup-name)))
+ (gnus-message 5 "Marking spam as expired and moving it")
(spam-mark-spam-as-expired-and-move-routine
(gnus-parameter-spam-process-destination gnus-newsgroup-name)))
+ ;; now we redo spam-mark-spam-as-expired-and-move-routine to only
+ ;; expire spam, in case the above did not expire them
+ (spam-mark-spam-as-expired-and-move-routine nil)
+
(when (spam-group-ham-contents-p gnus-newsgroup-name)
(when (spam-group-ham-processor-whitelist-p gnus-newsgroup-name)
+ (gnus-message 5 "Registering ham with the whitelist")
(spam-whitelist-register-routine))
(when (spam-group-ham-processor-ifile-p gnus-newsgroup-name)
+ (gnus-message 5 "Registering ham with ifile")
(spam-ifile-register-ham-routine))
+ (when (spam-group-ham-processor-bogofilter-p gnus-newsgroup-name)
+ (gnus-message 5 "Registering ham with Bogofilter")
+ (spam-bogofilter-register-ham-routine))
+ (when (spam-group-ham-processor-stat-p gnus-newsgroup-name)
+ (gnus-message 5 "Registering ham with spam-stat")
+ (spam-stat-register-ham-routine))
(when (spam-group-ham-processor-BBDB-p gnus-newsgroup-name)
- (spam-BBDB-register-routine))))
+ (gnus-message 5 "Registering ham with the BBDB")
+ (spam-BBDB-register-routine)))
+
+ ;; now move all ham articles out of spam groups
+ (when (spam-group-spam-contents-p gnus-newsgroup-name)
+ (gnus-message 5 "Moving ham messages from spam group")
+ (spam-ham-move-routine
+ (gnus-parameter-ham-process-destination gnus-newsgroup-name))))
(add-hook 'gnus-summary-prepare-exit-hook 'spam-summary-prepare-exit)
;; check the global list of group names spam-junk-mailgroups and the
;; group parameters
(when (spam-group-spam-contents-p gnus-newsgroup-name)
+ (gnus-message 5 "Marking unread articles as spam")
(let ((articles gnus-newsgroup-articles)
article)
(while articles
(let ((gnus-current-article article))
(gnus-summary-move-article nil group)))))))
+(defun spam-ham-move-routine (&optional group)
+ (let ((articles gnus-newsgroup-articles)
+ article ham-mark-values mark)
+ (dolist (mark spam-ham-marks)
+ (push (symbol-value mark) ham-mark-values))
+
+ (while articles
+ (setq article (pop articles))
+ (when (and (memq mark ham-mark-values)
+ (stringp group))
+ (let ((gnus-current-article article))
+ (gnus-summary-move-article nil group))))))
+
(defun spam-generic-register-routine (spam-func ham-func)
(let ((articles gnus-newsgroup-articles)
article mark ham-articles spam-articles spam-mark-values
ham-mark-values)
;; marks are stored as symbolic values, so we have to dereference
- ;; them for memq to work we wouldn't have to do this if
+ ;; them for memq to work. we wouldn't have to do this if
;; gnus-summary-article-mark returned a symbol.
(dolist (mark spam-ham-marks)
(push (symbol-value mark) ham-mark-values))
'line-end-position)))
(defun spam-get-article-as-string (article)
- (let ((article-string))
+ (let ((article-buffer (spam-get-article-as-buffer article))
+ article-string)
+ (when article-buffer
+ (save-window-excursion
+ (set-buffer article-buffer)
+ (setq article-string (buffer-string))))
+ article-string))
+
+(defun spam-get-article-as-buffer (article)
+ (let ((article-buffer))
(when (numberp article)
(save-window-excursion
(gnus-summary-goto-subject article)
(gnus-summary-show-article t)
- (set-buffer gnus-article-buffer)
- (setq article-string (buffer-string))))
- article-string))
+ (setq article-buffer (get-buffer gnus-article-buffer))))
+ article-buffer))
+
+(defun spam-get-article-as-filename (article)
+ (let ((article-filename))
+ (when (numberp article)
+ (nnml-possibly-change-directory (gnus-group-real-name gnus-newsgroup-name))
+ (setq article-filename (expand-file-name (int-to-string article) nnml-current-directory)))
+ (if (file-exists-p article-filename)
+ article-filename
+ nil)))
(defun spam-fetch-field-from-fast (article)
"Fetch the `from' field quickly, using the internal gnus-data-list function"
;;;; Spam determination.
(defvar spam-list-of-checks
- '((spam-use-blacklist . spam-check-blacklist)
- (spam-use-whitelist . spam-check-whitelist)
- (spam-use-BBDB . spam-check-BBDB)
- (spam-use-ifile . spam-check-ifile)
- (spam-use-blackholes . spam-check-blackholes)
- (spam-use-bogofilter . spam-check-bogofilter))
+ '((spam-use-blacklist . spam-check-blacklist)
+ (spam-use-regex-headers . spam-check-regex-headers)
+ (spam-use-whitelist . spam-check-whitelist)
+ (spam-use-BBDB . spam-check-BBDB)
+ (spam-use-ifile . spam-check-ifile)
+ (spam-use-stat . spam-check-stat)
+ (spam-use-blackholes . spam-check-blackholes)
+ (spam-use-bogofilter-headers . spam-check-bogofilter-headers)
+ (spam-use-bogofilter . spam-check-bogofilter))
"The spam-list-of-checks list contains pairs associating a parameter
variable with a spam checking function. If the parameter variable is
true, then the checking function is called, and its value decides what
See the Info node `(gnus)Fancy Mail Splitting' for more details."
(interactive)
+
+ ;; load the spam-stat tables if needed
+ (when spam-use-stat (spam-stat-load))
(let ((list-of-checks spam-list-of-checks)
decision)
(while (and list-of-checks (not decision))
(let ((pair (pop list-of-checks)))
(when (symbol-value (car pair))
+ (gnus-message 5 "spam-split: calling the %s function" (symbol-name (cdr pair)))
(setq decision (funcall (cdr pair))))))
(if (eq decision t)
nil
decision)))
\f
+;;;; Regex headers
+
+(defun spam-check-regex-headers ()
+ (let (ret found)
+ (dolist (h-regex spam-regex-headers-ham)
+ (unless found
+ (goto-char (point-min))
+ (when (re-search-forward h-regex nil t)
+ (message "Ham regex header search positive.")
+ (setq found t))))
+ (dolist (s-regex spam-regex-headers-spam)
+ (unless found
+ (goto-char (point-min))
+ (when (re-search-forward s-regex nil t)
+ (message "Spam regex header search positive." (match-string 1))
+ (setq found t)
+ (setq ret spam-split-group))))
+ ret))
+
+\f
;;;; Blackholes.
(defun spam-check-blackholes ()
(with-temp-buffer
(insert headers)
(goto-char (point-min))
+ (gnus-message 5 "Checking headers for relay addresses")
(while (re-search-forward
"\\[\\([0-9]+.[0-9]+.[0-9]+.[0-9]+\\)\\]" nil t)
- (message "Blackhole search found host IP %s." (match-string 1))
+ (gnus-message 9 "Blackhole search found host IP %s." (match-string 1))
(push (mapconcat 'identity
(nreverse (split-string (match-string 1) "\\."))
".")
(if spam-use-dig
(let ((query-result (query-dig query-string)))
(when query-result
- (message "spam: positive blackhole check '%s'" query-result)
+ (gnus-message 5 "(DIG): positive blackhole check '%s'" query-result)
(push (list ip server query-result)
matches)))
;; else, if not using dig.el
(when (query-dns query-string)
+ (gnus-message 5 "positive blackhole check")
(push (list ip server (query-dns query-string 'TXT))
matches)))))))
(when matches
spam-split-group)))
\f
-;;;; BBDB original idea for spam-check-BBDB from Alexander Kotelnikov
+;;;; BBDB
+
+;;; original idea for spam-check-BBDB from Alexander Kotelnikov
;;; <sacha@giotto.sj.ru>
;; all this is done inside a condition-case to trap errors
+
(condition-case nil
(progn
-
+ (require 'bbdb)
(require 'bbdb-com)
-
- (defun spam-enter-ham-BBDB (from)
- "Enter an address into the BBDB; implies ham (non-spam) sender"
- (when (stringp from)
- (let* ((parsed-address (gnus-extract-address-components from))
- (name (or (car parsed-address) "Ham Sender"))
- (net-address (car (cdr parsed-address))))
- (message "Adding address %s to BBDB" from)
- (when (and net-address
- (not (bbdb-search (bbdb-records) nil nil net-address)))
- (bbdb-create-internal name nil net-address nil nil
- "ham sender added by spam.el")))))
-
- (defun spam-BBDB-register-routine ()
- (spam-generic-register-routine
- ;; spam function
- nil
- ;; ham function
- (lambda (article)
- (spam-enter-ham-BBDB (spam-fetch-field-from-fast article)))))
-
- (defun spam-check-BBDB ()
- "Mail from people in the BBDB is never considered spam"
- (let ((who (message-fetch-field "from")))
- (when who
- (setq who (regexp-quote (cadr
- (gnus-extract-address-components who))))
- (if (bbdb-search (bbdb-records) nil nil who)
- nil spam-split-group)))))
+
+ (defun spam-enter-ham-BBDB (from)
+ "Enter an address into the BBDB; implies ham (non-spam) sender"
+ (when (stringp from)
+ (let* ((parsed-address (gnus-extract-address-components from))
+ (name (or (car parsed-address) "Ham Sender"))
+ (net-address (car (cdr parsed-address))))
+ (gnus-message 5 "Adding address %s to BBDB" from)
+ (when (and net-address
+ (not (bbdb-search-simple nil net-address)))
+ (bbdb-create-internal name nil net-address nil nil
+ "ham sender added by spam.el")))))
+
+ (defun spam-BBDB-register-routine ()
+ (spam-generic-register-routine
+ ;; spam function
+ nil
+ ;; ham function
+ (lambda (article)
+ (spam-enter-ham-BBDB (spam-fetch-field-from-fast article)))))
+
+ (defun spam-check-BBDB ()
+ "Mail from people in the BBDB is never considered spam"
+ (let ((who (message-fetch-field "from")))
+ (when who
+ (setq who (regexp-quote (cadr
+ (gnus-extract-address-components who))))
+ (if (bbdb-search-simple nil who)
+ nil spam-split-group)))))
(file-error (progn
- (setq spam-list-of-checks
- (delete (assoc 'spam-use-BBDB spam-list-of-checks)
- spam-list-of-checks)))))
+ (defalias 'bbdb-search-simple 'ignore)
+ (defalias 'spam-check-BBDB 'ignore)
+ (defalias 'spam-BBDB-register-routine 'ignore)
+ (defalias 'spam-enter-ham-BBDB 'ignore)
+ (defalias 'bbdb-create-internal 'ignore)
+ (defalias 'bbdb-records 'ignore))))
\f
;;;; ifile
;;; check the ifile backend; return nil if the mail was NOT classified
;;; as spam
+(defun spam-get-ifile-database-parameter ()
+ "Get the command-line parameter for ifile's database from spam-ifile-database-path."
+ (if spam-ifile-database-path
+ (format "--db-file=%s" spam-ifile-database-path)
+ nil))
+
(defun spam-check-ifile ()
"Check the ifile backend for the classification of this message"
(let ((article-buffer-name (buffer-name))
category return)
(with-temp-buffer
- (let ((temp-buffer-name (buffer-name)))
+ (let ((temp-buffer-name (buffer-name))
+ (db-param (spam-get-ifile-database-parameter)))
(save-excursion
(set-buffer article-buffer-name)
- (call-process-region (point-min) (point-max) spam-ifile-path
- nil temp-buffer-name nil "-q" "-c"))
+ (if db-param
+ (call-process-region (point-min) (point-max) spam-ifile-path
+ nil temp-buffer-name nil "-q" "-c" db-param)
+ (call-process-region (point-min) (point-max) spam-ifile-path
+ nil temp-buffer-name nil "-q" "-c")))
(goto-char (point-min))
(if (not (eobp))
(setq category (buffer-substring (point) (spam-point-at-eol))))
(setq return category)
;; else, if spam-ifile-all-categories is not set...
(when (string-equal spam-ifile-spam-category category)
- (setq return spam-split-group)))))) ; always accept the ifile category
+ (setq return spam-split-group))))))
return))
(defun spam-ifile-register-with-ifile (article-string category)
"Register an article, given as a string, with a category.
Uses `gnus-newsgroup-name' if category is nil (for ham registration)."
(when (stringp article-string)
- (let ((category (or category gnus-newsgroup-name)))
+ (let ((category (or category gnus-newsgroup-name))
+ (db-param (spam-get-ifile-database-parameter)))
(with-temp-buffer
(insert-string article-string)
- (call-process-region (point-min) (point-max) spam-ifile-path
- nil nil nil "-h" "-i" category)))))
+ (if db-param
+ (call-process-region (point-min) (point-max) spam-ifile-path
+ nil nil nil
+ "-h" "-i" category db-param)
+ (call-process-region (point-min) (point-max) spam-ifile-path
+ nil nil nil
+ "-h" "-i" category))))))
(defun spam-ifile-register-spam-routine ()
(spam-generic-register-routine
nil
(lambda (article)
(spam-ifile-register-with-ifile
- (spam-get-article-as-string article) nil))))
+ (spam-get-article-as-string article) spam-ifile-ham-category))))
\f
+;;;; spam-stat
+
+(condition-case nil
+ (progn
+ (let ((spam-stat-install-hooks nil))
+ (require 'spam-stat))
+
+ (defun spam-check-stat ()
+ "Check the spam-stat backend for the classification of this message"
+ (let ((spam-stat-split-fancy-spam-group spam-split-group) ; override
+ (spam-stat-buffer (buffer-name)) ; stat the current buffer
+ category return)
+ (spam-stat-split-fancy)))
+
+ (defun spam-stat-register-spam-routine ()
+ (spam-generic-register-routine
+ (lambda (article)
+ (let ((article-string (spam-get-article-as-string article)))
+ (with-temp-buffer
+ (insert-string article-string)
+ (spam-stat-buffer-is-spam))))
+ nil)
+ (spam-stat-save))
+
+ (defun spam-stat-register-ham-routine ()
+ (spam-generic-register-routine
+ nil
+ (lambda (article)
+ (let ((article-string (spam-get-article-as-string article)))
+ (with-temp-buffer
+ (insert-string article-string)
+ (spam-stat-buffer-is-non-spam)))))
+ (spam-stat-save)))
+
+ (file-error (progn
+ (defalias 'spam-stat-register-ham-routine 'ignore)
+ (defalias 'spam-stat-register-spam-routine 'ignore)
+ (defalias 'spam-stat-buffer-is-spam 'ignore)
+ (defalias 'spam-stat-buffer-is-non-spam 'ignore)
+ (defalias 'spam-stat-split-fancy 'ignore)
+ (defalias 'spam-stat-load 'ignore)
+ (defalias 'spam-stat-save 'ignore)
+ (defalias 'spam-check-stat 'ignore))))
+
+\f
+
;;;; Blacklists and whitelists.
(defvar spam-whitelist-cache nil)
\f
;;;; Bogofilter
-;;; See Paul Graham article, at `http://www.paulgraham.com/spam.html'.
-
-;;; This page is for those wanting to control spam with the help of
-;;; Eric Raymond's speedy Bogofilter, see
-;;; http://www.tuxedo.org/~esr/bogofilter. This has been tested with
-;;; a locally patched copy of version 0.4.
-
-;;; Make sure Bogofilter is installed. Bogofilter internally uses
-;;; Judy fast associative arrays, so you need to install Judy first,
-;;; and Bogofilter next. Fetch both distributions by visiting the
-;;; following links and downloading the latest version of each:
-;;;
-;;; http://sourceforge.net/projects/judy/
-;;; http://www.tuxedo.org/~esr/bogofilter/
-;;;
-;;; Unpack the Judy distribution and enter its main directory. Then do:
-;;;
-;;; ./configure
-;;; make
-;;; make install
-;;;
-;;; You will likely need to become super-user for the last step.
-;;; Then, unpack the Bogofilter distribution and enter its main
-;;; directory:
-;;;
-;;; make
-;;; make install
-;;;
-;;; Here as well, you need to become super-user for the last step.
-;;; Now, initialize your word lists by doing, under your own identity:
-;;;
-;;; mkdir ~/.bogofilter
-;;; touch ~/.bogofilter/badlist
-;;; touch ~/.bogofilter/goodlist
-;;;
-;;; These two files are text files you may edit, but you normally don't!
-
-;;; The `M-d' command gets added to Gnus summary mode, marking current
-;;; article as spam, showing it with the `H' mark. Whenever you see a
-;;; spam article, make sure to mark its summary line with `M-d' before
-;;; leaving the group. Some groups, as per variable
-;;; `spam-junk-mailgroups' below, receive articles from Gnus splitting
-;;; on clues added by spam recognisers, so for these groups, we tack
-;;; an `H' mark at group entry for all summary lines which would
-;;; otherwise have no other mark. Make sure to _remove_ `H' marks for
-;;; any article which is _not_ genuine spam, before leaving such
-;;; groups: you may use `M-u' to "unread" the article, or `d' for
-;;; declaring it read the non-spam way. When you leave a group, all
-;;; `H' marked articles, saved or unsaved, are sent to Bogofilter
-;;; which will study them as spam samples.
-
-;;; Messages may also be deleted in various other ways, and unless
-;;; `spam-ham-marks-form' gets overridden below, marks `R' and `r' for
-;;; default read or explicit delete, marks `X' and 'K' for automatic
-;;; or explicit kills, as well as mark `Y' for low scores, are all
-;;; considered to be associated with articles which are not spam.
-;;; This assumption might be false, in particular if you use kill
-;;; files or score files as means for detecting genuine spam, you
-;;; should then adjust `spam-ham-marks-form'. When you leave a group,
-;;; all _unsaved_ articles bearing any the above marks are sent to
-;;; Bogofilter which will study these as not-spam samples. If you
-;;; explicit kill a lot, you might sometimes end up with articles
-;;; marked `K' which you never saw, and which might accidentally
-;;; contain spam. Best is to make sure that real spam is marked with
-;;; `H', and nothing else.
-
-;;; All other marks do not contribute to Bogofilter pre-conditioning.
-;;; In particular, ticked, dormant or souped articles are likely to
-;;; contribute later, when they will get deleted for real, so there is
-;;; no need to use them prematurely. Explicitly expired articles do
-;;; not contribute, command `E' is a way to get rid of an article
-;;; without Bogofilter ever seeing it.
-
-;;; In a word, with a minimum of care for associating the `H' mark for
-;;; spam articles only, Bogofilter training all gets fairly automatic.
-;;; You should do this until you get a few hundreds of articles in
-;;; each category, spam or not. The shell command `head -1
-;;; ~/.bogofilter/*' shows both article counts. The command `S S' in
-;;; summary mode, either for debugging or for curiosity, triggers
-;;; Bogofilter into displaying in another buffer the "spamicity" score
-;;; of the current article (between 0.0 and 1.0), together with the
-;;; article words which most significantly contribute to the score.
-
-;;; The real way for using Bogofilter, however, is to have some use
-;;; tool like `procmail' for invoking it on message reception, then
-;;; adding some recognisable header in case of detected spam. Gnus
-;;; splitting rules might later trip on these added headers and react
-;;; by sorting such articles into specific junk folders as per
-;;; `spam-junk-mailgroups'. Here is a possible `.procmailrc' contents
-;;; (still untested -- please tell me how it goes):
-;;;
-;;; :0HBf:
-;;; * ? bogofilter
-;;; | formail -bfI "X-Spam-Status: Yes"
-
-(defun spam-check-bogofilter ()
- ;; Dynamic spam check. I do not know how to check the exit status,
- ;; so instead, read `bogofilter -v' output.
- (when (and spam-use-bogofilter spam-bogofilter-path)
- (spam-bogofilter-articles nil "-v" (list (gnus-summary-article-number)))
- (when (save-excursion
- (set-buffer spam-bogofilter-output-buffer-name)
- (goto-char (point-min))
- (re-search-forward "Spamicity: \\(0\\.9\\|1\\.0\\)" nil t))
- spam-split-group)))
-
+(defun spam-check-bogofilter-headers (&optional score)
+ (let ((header (message-fetch-field spam-bogofilter-header)))
+ (when (and header
+ (string-match "^Yes" header))
+ (if score
+ (when (string-match "spamicity=\\([0-9.]+\\)" header)
+ (match-string 1 header))
+ spam-split-group))))
+
+
+;; return something sensible if the score can't be determined
(defun spam-bogofilter-score ()
- "Use `bogofilter -v' on the current article.
-This yields the 15 most discriminant words for this article and the
-spamicity coefficient of each, and the overall article spamicity."
+ "Get the Bogofilter spamicity score"
(interactive)
- (when (and spam-use-bogofilter spam-bogofilter-path)
- (spam-bogofilter-articles nil "-v" (list (gnus-summary-article-number)))
- (with-current-buffer spam-bogofilter-output-buffer-name
- (unless (zerop (buffer-size))
- (if (<= (count-lines (point-min) (point-max)) 1)
- (progn
- (goto-char (point-max))
- (when (bolp)
- (backward-char 1))
- (message "%s" (buffer-substring (point-min) (point))))
- (goto-char (point-min))
- (display-buffer (current-buffer)))))))
-
-(defun spam-bogofilter-register-routine ()
- (let ((articles gnus-newsgroup-articles)
- article mark ham-articles spam-articles spam-mark-values
- ham-mark-values)
-
- ;; marks are stored as symbolic values, so we have to dereference
- ;; them for memq to work we wouldn't have to do this if
- ;; gnus-summary-article-mark returned a symbol.
- (dolist (mark spam-ham-marks)
- (push (symbol-value mark) ham-mark-values))
+ (save-window-excursion
+ (gnus-summary-show-article t)
+ (set-buffer gnus-article-buffer)
+ (let ((score (spam-check-bogofilter t)))
+ (message "Spamicity score %s" score)
+ (or score "0"))))
+
+(defun spam-check-bogofilter (&optional score)
+ "Check the Bogofilter backend for the classification of this message"
+ (let ((article-buffer-name (buffer-name))
+ return)
+ (with-temp-buffer
+ (let ((temp-buffer-name (buffer-name)))
+ (save-excursion
+ (set-buffer article-buffer-name)
+ (if spam-bogofilter-database-directory
+ (call-process-region (point-min) (point-max)
+ spam-bogofilter-path
+ nil temp-buffer-name nil "-v"
+ "-d" spam-bogofilter-database-directory)
+ (call-process-region (point-min) (point-max) spam-bogofilter-path
+ nil temp-buffer-name nil "-v")))
+ (setq return (spam-check-bogofilter-headers score))))
+ return))
- (dolist (mark spam-spam-marks)
- (push (symbol-value mark) spam-mark-values))
+(defun spam-bogofilter-register-with-bogofilter (article-string spam)
+ "Register an article, given as a string, as spam or non-spam."
+ (when (stringp article-string)
+ (let ((switch (if spam "-s" "-n")))
+ (with-temp-buffer
+ (insert-string article-string)
+ (if spam-bogofilter-database-directory
+ (call-process-region (point-min) (point-max)
+ spam-bogofilter-path
+ nil nil nil "-v" switch
+ "-d" spam-bogofilter-database-directory)
+ (call-process-region (point-min) (point-max) spam-bogofilter-path
+ nil nil nil "-v" switch))))))
+
+(defun spam-bogofilter-register-spam-routine ()
+ (spam-generic-register-routine
+ (lambda (article)
+ (spam-bogofilter-register-with-bogofilter
+ (spam-get-article-as-string article) t))
+ nil))
- (while articles
- (setq article (pop articles)
- mark (gnus-summary-article-mark article))
- (cond ((memq mark spam-mark-values) (push article spam-articles))
- ((memq article gnus-newsgroup-saved))
- ((memq mark ham-mark-values) (push article ham-articles))))
- (when ham-articles
- (spam-bogofilter-articles "ham" "-n" ham-articles))
- (when spam-articles
- (spam-bogofilter-articles "SPAM" "-s" spam-articles))))
-
-(defun spam-bogofilter-articles (type option articles)
- (let ((output-buffer (get-buffer-create spam-bogofilter-output-buffer-name))
- (article-copy (get-buffer-create " *Bogofilter Article Copy*"))
- (remove-regexp (concat spam-bogofilter-spaminfo-header-regexp
- "\\|Xref:"))
- (counter 0)
- prefix process article)
- (when type
- (setq prefix (format "Studying %d articles as %s..." (length articles)
- type))
- (message "%s" prefix))
- (save-excursion (set-buffer output-buffer) (erase-buffer))
- (setq process (start-process "bogofilter" output-buffer
- spam-bogofilter-path "-F" option))
- (process-kill-without-query process t)
- (unwind-protect
- (save-window-excursion
- (while articles
- (setq counter (1+ counter))
- (when prefix
- (message "%s %d" prefix counter))
- (setq article (pop articles))
- (gnus-summary-goto-subject article)
- (gnus-summary-show-article t)
- (gnus-eval-in-buffer-window article-copy
- (insert-buffer-substring gnus-original-article-buffer)
- ;; Remove spam classification redundant headers: they may induce
- ;; unwanted biases in later analysis.
- (message-remove-header remove-regexp t)
- ;; Bogofilter really wants From envelopes for counting articles.
- ;; Fake one at the beginning, make sure there will be no other.
- (goto-char (point-min))
- (if (looking-at "From ")
- (forward-line 1)
- (insert "From nobody " (current-time-string) "\n"))
- (let (case-fold-search)
- (while (re-search-forward "^From " nil t)
- (beginning-of-line)
- (insert ">")))
- (process-send-region process (point-min) (point-max))
- (erase-buffer))))
- ;; Sending the EOF is unwind-protected. This is to prevent lost copies
- ;; of `bogofilter', hung on reading their standard input, in case the
- ;; whole registering process gets interrupted by the user.
- (process-send-eof process))
- (kill-buffer article-copy)
- ;; Receive process output. It sadly seems that we still have to protect
- ;; ourselves against hung `bogofilter' processes.
- (let ((status (process-status process))
- (timeout (* 1000 spam-bogofilter-initial-timeout))
- (quanta 200)) ; also counted in milliseconds
- (while (and (not (eq status 'exit)) (> timeout 0))
- ;; `accept-process-output' timeout is counted in microseconds.
- (setq timeout (if (accept-process-output process 0 (* 1000 quanta))
- (* 1000 spam-bogofilter-subsequent-timeout)
- (- timeout quanta))
- status (process-status process)))
- (if (eq status 'exit)
- (when prefix
- (message "%s done!" prefix))
- ;; Sigh! The process did time out... Become brutal!
- (interrupt-process process)
- (message "%s %d INTERRUPTED! (Article %d, status %s)"
- (or prefix "Bogofilter process...")
- counter article status)
- ;; Give some time for user to read. Sitting redisplays but gives up
- ;; if input is pending. Sleeping does not give up, but it does not
- ;; redisplay either. Mix both: let's redisplay and not give up.
- (sit-for 1)
- (sleep-for 3)))))
+(defun spam-bogofilter-register-ham-routine ()
+ (spam-generic-register-routine
+ nil
+ (lambda (article)
+ (spam-bogofilter-register-with-bogofilter
+ (spam-get-article-as-string article) nil))))
(provide 'spam)