X-Git-Url: https://cgit.sxemacs.org/?a=blobdiff_plain;ds=sidebyside;f=lisp%2Fspam.el;h=1c7417a7f7f8fda9cb6ff39bc2e2fa5382b5a095;hb=ddc4559686cad2f1330f9f7b47a947afb114e490;hp=8138b7f0615bf32b3df03bcdb9cc8027d0eb2737;hpb=341ed1f5b7b690a864028d3f8b7c2e88135e6a9f;p=gnus diff --git a/lisp/spam.el b/lisp/spam.el index 8138b7f06..1c7417a7f 100644 --- a/lisp/spam.el +++ b/lisp/spam.el @@ -30,147 +30,347 @@ ;;; The integration with Gnus is not yet complete. See various `FIXME' ;;; comments, below, for supplementary explanations or discussions. +;;; Several TODO items are marked as such + ;;; Code: (require 'gnus-sum) -;; FIXME! We should not require `dns' nor `message' until we actually -;; need them. Best would be to declare needed functions as auto-loadable. -(require 'dns) -(require 'message) +(require 'gnus-uu) ; because of key prefix issues +(require 'gnus) ; for the definitions of group content classification and spam processors -(autoload 'bbdb-records "bbdb-com") +;; FIXME! We should not require `message' until we actually need +;; them. Best would be to declare needed functions as auto-loadable. +(require 'message) ;; Attempt to load BBDB macros (eval-when-compile (condition-case nil (require 'bbdb-com) - (file-error (defalias 'bbdb-search 'ignore)) - (error))) + (file-error (defalias 'bbdb-search 'ignore)))) ;; autoload executable-find -(autoload 'executable-find "executable") - -;;; Main parameters. - -(defvar spam-use-blacklist t - "True if the blacklist should be used.") - -(defvar spam-use-whitelist t - "True if the whitelist should be used.") +(eval-and-compile + ;; executable-find is not autoloaded in Emacs 20 + (autoload 'executable-find "executable")) -(defvar spam-use-blackholes nil - ;; FIXME! Turned off for now. The DNS routines are said to be flaky. - "True if blackholes should be used.") +;; autoload ifile-spam-filter +(eval-and-compile + (autoload 'ifile-spam-filter "ifile-gnus")) -(defvar spam-use-bogofilter t - "True if bogofilter should be used.") +;; autoload query-dig +(eval-and-compile + (autoload 'query-dig "dig")) -(defvar spam-split-group "spam" - "Usual group name where spam should be split.") +;; autoload query-dns +(eval-and-compile + (autoload 'query-dns "dns")) -(defvar spam-junk-mailgroups - ;; FIXME! The mailgroup list evidently depends on other choices made by the - ;; user, so the built-in default below is not likely to be appropriate. - (cons spam-split-group '("mail.junk" "poste.pourriel")) - "Mailgroups which are dedicated by splitting to receive various junk. -All unmarked article in such group receive the spam mark on group entry.") +;;; Main parameters. -;; FIXME! For `spam-ham-marks' and `spam-spam-marks', I wonder if it would -;; not be easier for the user to just accept a string of mark letters, instead -;; of a list of Gnus variable names. In such case, the stunt of deferred -;; evaluation would not be useful anymore. Lars?? :-) +(defgroup spam nil + "Spam configuration.") -;; FIXME! It is rather questionable to see `K', `X' and `Y' as indicating -;; positive ham. It much depends on how and why people use kill files, score -;; files, and the kill command. Maybe it would be better, by default, to not -;; process a message neither as ham nor spam, that is, just ignore it for -;; learning purposes, when we are not sure of how the user sees it. -;; But `r' and `R' should undoubtedly be seen as ham. +(defcustom spam-directory "~/News/spam/" + "Directory for spam whitelists and blacklists." + :type 'directory + :group 'spam) -;; FIXME! Some might consider overkill to define a list of spam marks. On -;; the other hand, who knows, some users might for example like that -;; explicitly `E'xpired articles be processed as positive spam. +(defcustom spam-whitelist (expand-file-name "whitelist" spam-directory) + "The location of the whitelist. +The file format is one regular expression per line. +The regular expression is matched against the address." + :type 'file + :group 'spam) -(defvar spam-ham-marks - (list gnus-del-mark gnus-read-mark gnus-killed-mark - gnus-kill-file-mark gnus-low-score-mark) +(defcustom spam-blacklist (expand-file-name "blacklist" spam-directory) + "The location of the blacklist. +The file format is one regular expression per line. +The regular expression is matched against the address." + :type 'file + :group 'spam) + +(defcustom spam-use-dig t + "Whether query-dig should be used instead of query-dns." + :type 'boolean + :group 'spam) + +(defcustom spam-use-blacklist nil + "Whether the blacklist should be used by spam-split." + :type 'boolean + :group 'spam) + +(defcustom spam-use-whitelist nil + "Whether the whitelist should be used by spam-split." + :type 'boolean + :group 'spam) + +(defcustom spam-use-blackholes nil + "Whether blackholes should be used by spam-split." + :type 'boolean + :group 'spam) + +(defcustom spam-use-bogofilter nil + "Whether bogofilter should be used by spam-split." + :type 'boolean + :group 'spam) + +(defcustom spam-use-BBDB nil + "Whether BBDB should be used by spam-split." + :type 'boolean + :group 'spam) + +(defcustom spam-use-ifile nil + "Whether ifile should be used by spam-split." + :type 'boolean + :group 'spam) + +(defcustom spam-split-group "spam" + "Group name where incoming spam should be put by spam-split." + :type 'string + :group 'spam) + +;; FIXME! The mailgroup list evidently depends on other choices made by the +;; user, so the built-in default below is not likely to be appropriate. +(defcustom spam-junk-mailgroups (cons spam-split-group '("mail.junk" "poste.pourriel")) + "Mailgroups with spam contents. +All unmarked article in such group receive the spam mark on group entry." + :type '(repeat (string :tag "Group")) + :group 'spam) + +(defcustom spam-blackhole-servers '("bl.spamcop.net" "relays.ordb.org" + "dev.null.dk" "relays.visi.com") + "List of blackhole servers." + :type '(repeat (string :tag "Server")) + :group 'spam) + +(defcustom spam-ham-marks (list 'gnus-del-mark 'gnus-read-mark + 'gnus-killed-mark 'gnus-kill-file-mark + 'gnus-low-score-mark) "Marks considered as being ham (positively not spam). -Such articles will be transmitted to `bogofilter -n' on group exit.") - -(defvar spam-spam-marks - (list gnus-spam-mark) +Such articles will be processed as ham (non-spam) on group exit." + :type '(set + (variable-item gnus-del-mark) + (variable-item gnus-read-mark) + (variable-item gnus-killed-mark) + (variable-item gnus-kill-file-mark) + (variable-item gnus-low-score-mark)) + :group 'spam) + +(defcustom spam-spam-marks (list 'gnus-spam-mark) "Marks considered as being spam (positively spam). -Such articles will be transmitted to `bogofilter -s' on group exit.") - -;; FIXME! Ideally, the remainder of this page should be fully integrated -;; within `gnus-sum.el'. +Such articles will be transmitted to `bogofilter -s' on group exit." + :type '(set + (variable-item gnus-spam-mark) + (variable-item gnus-killed-mark) + (variable-item gnus-kill-file-mark) + (variable-item gnus-low-score-mark)) + :group 'spam) + +(defcustom spam-face 'gnus-splash-face + "Face for spam-marked articles" + :type 'face + :group 'spam) + +(defgroup spam-bogofilter nil + "Spam bogofilter configuration." + :group 'spam) + +(defcustom spam-bogofilter-output-buffer-name "*Bogofilter Output*" + "Name of buffer when displaying `bogofilter -v' output." + :type 'string + :group 'spam-bogofilter) + +(defcustom spam-bogofilter-initial-timeout 40 + "Timeout in seconds for the initial reply from the `bogofilter' program." + :type 'integer + :group 'spam-bogofilter) + +(defcustom spam-bogofilter-subsequent-timeout 15 + "Timeout in seconds for any subsequent reply from the `bogofilter' program." + :type 'integer + :group 'spam-bogofilter) + +(defcustom spam-bogofilter-path (executable-find "bogofilter") + "File path of the Bogofilter executable program." + :type '(choice (file :tag "Location of bogofilter") + (const :tag "Bogofilter is not installed")) + :group 'spam-bogofilter) + +;; FIXME! In the following regexp, we should explain which tool produces +;; which kind of header. I do not even remember them all by now. X-Junk +;; (and previously X-NoSpam) are produced by the `NoSpam' tool, which has +;; never been published, so it might not be reasonable leaving it in the +;; list. +(defcustom spam-bogofilter-spaminfo-header-regexp + "^X-\\(jf\\|Junk\\|NoSpam\\|Spam\\|SB\\)[^:]*:" + "Regexp for spam markups in headers. +Markup from spam recognisers, as well as `Xref', are to be removed from +articles before they get registered by Bogofilter." + :type 'regexp + :group 'spam-bogofilter) ;;; Key bindings for spam control. -;; FIXME! The justification for `M-d' is that this is what Paul Graham -;; suggests in his original article, and what Eric Raymond's patch for Mutt -;; uses. But more importantly, that binding was still free in Summary mode! - -;; FIXME! Lars has not blessed the following key bindings yet. It looks -;; convenient that the score analysis command uses a sequence ending with the -;; letter `t', so it nicely parallels `B t' or `V t'. `M-d' is a kind of -;; "alternate" `d', it is also the sequence suggested in Paul Graham article, -;; and also in Eric Raymond's patch for Mutt. `S x' might be the more -;; official key binding for `M-d'. - (gnus-define-keys gnus-summary-mode-map "St" spam-bogofilter-score "Sx" gnus-summary-mark-as-spam + "Mst" spam-bogofilter-score + "Msx" gnus-summary-mark-as-spam "\M-d" gnus-summary-mark-as-spam) ;;; How to highlight a spam summary line. -;; FIXME! Of course, `gnus-splash-face' has another purpose. Maybe a -;; special face should be created, named and used instead, for spam lines. +;; TODO: How do we redo this every time spam-face is customized? -(push '((eq mark gnus-spam-mark) . gnus-splash-face) +(push '((eq mark gnus-spam-mark) . spam-face) gnus-summary-highlight) +;; convenience functions +(defun spam-group-spam-contents-p (group) + (if (stringp group) + (or (member group spam-junk-mailgroups) + (memq 'gnus-group-spam-classification-spam + (gnus-parameter-spam-contents group))) + nil)) + +(defun spam-group-ham-contents-p (group) + (if (stringp group) + (memq 'gnus-group-spam-classification-ham + (gnus-parameter-spam-contents group)) + nil)) + +(defun spam-group-processor-p (group processor) + (if (and (stringp group) + (symbolp processor)) + (member processor (car (gnus-parameter-spam-process group))) + nil)) + +(defun spam-group-processor-bogofilter-p (group) + (spam-group-processor-p group 'gnus-group-spam-exit-processor-bogofilter)) + +(defun spam-group-processor-ifile-p (group) + (spam-group-processor-p group 'gnus-group-spam-exit-processor-ifile)) + +(defun spam-group-processor-blacklist-p (group) + (spam-group-processor-p group 'gnus-group-spam-exit-processor-blacklist)) + +(defun spam-group-processor-whitelist-p (group) + (spam-group-processor-p group 'gnus-group-ham-exit-processor-whitelist)) + +(defun spam-group-processor-BBDB-p (group) + (spam-group-processor-p group 'gnus-group-ham-exit-processor-BBDB)) + ;;; Hooks dispatching. A bit raw for now. (defun spam-summary-prepare () (spam-mark-junk-as-spam-routine)) (defun spam-summary-prepare-exit () - (spam-bogofilter-register-routine)) + ;; The spam processors are invoked for any group, spam or ham or neither + (when (and spam-bogofilter-path + (spam-group-processor-bogofilter-p gnus-newsgroup-name)) + (spam-bogofilter-register-routine)) + + (when (spam-group-processor-ifile-p gnus-newsgroup-name) + (spam-ifile-register-routine)) + + (when (spam-group-processor-bogofilter-p gnus-newsgroup-name) + (spam-blacklist-register-routine)) + + ;; Only for spam groups, we expire and maybe move articles + (when (spam-group-spam-contents-p gnus-newsgroup-name) + (spam-mark-spam-as-expired-and-move-routine + (gnus-parameter-spam-process-destination gnus-newsgroup-name))) + + (when (spam-group-ham-contents-p gnus-newsgroup-name) + (when (spam-group-processor-whitelist-p gnus-newsgroup-name) + (spam-whitelist-register-routine)) + (when (spam-group-processor-BBDB-p gnus-newsgroup-name) + (spam-BBDB-register-routine)))) (add-hook 'gnus-summary-prepare-hook 'spam-summary-prepare) (add-hook 'gnus-summary-prepare-exit-hook 'spam-summary-prepare-exit) (defun spam-mark-junk-as-spam-routine () - (when (member gnus-newsgroup-name spam-junk-mailgroups) + ;; check the global list of group names spam-junk-mailgroups and the + ;; group parameters + (when (spam-group-spam-contents-p gnus-newsgroup-name) (let ((articles gnus-newsgroup-articles) article) (while articles (setq article (pop articles)) (when (eq (gnus-summary-article-mark article) gnus-unread-mark) (gnus-summary-mark-article article gnus-spam-mark)))))) + +(defun spam-mark-spam-as-expired-and-move-routine (&optional group) + (let ((articles gnus-newsgroup-articles) + article) + (while articles + (setq article (pop articles)) + (when (eq (gnus-summary-article-mark article) gnus-spam-mark) + (gnus-summary-mark-article article gnus-expirable-mark) + (when (stringp group) + (let ((gnus-current-article article)) + (gnus-summary-move-article nil group))))))) + +(defun spam-generic-register-routine (spam-func ham-func) + (let ((articles gnus-newsgroup-articles) + article mark ham-articles spam-articles spam-mark-values + ham-mark-values) + + ;; marks are stored as symbolic values, so we have to dereference + ;; them for memq to work we wouldn't have to do this if + ;; gnus-summary-article-mark returned a symbol. + (dolist (mark spam-ham-marks) + (push (symbol-value mark) ham-mark-values)) + + (dolist (mark spam-spam-marks) + (push (symbol-value mark) spam-mark-values)) + + (while articles + (setq article (pop articles) + mark (gnus-summary-article-mark article)) + (cond ((memq mark spam-mark-values) (push article spam-articles)) + ((memq article gnus-newsgroup-saved)) + ((memq mark ham-mark-values) (push article ham-articles)))) + (when (and ham-articles ham-func) + (mapc ham-func ham-articles)) ; we use mapc because unlike + ; mapcar it discards the + ; return values + (when (and spam-articles spam-func) + (mapc spam-func spam-articles)))) ; we use mapc because unlike + ; mapcar it discards the + ; return values + +(defun spam-fetch-field-from-fast (article) + "Fetch the `from' field quickly, using the internal gnus-data-list function" + (if (and (numberp article) + (assoc article (gnus-data-list nil))) + (mail-header-from (gnus-data-header (assoc article (gnus-data-list nil)))) + nil)) + ;;;; Spam determination. -;; The following list contains pairs associating a parameter variable with a -;; spam checking function. If the parameter variable is true, then the -;; checking function is called, and its value decides what happens. Each -;; individual check may return `nil', `t', or a mailgroup name. The value -;; `nil' means that the check does not yield a decision, and so, that further -;; checks are needed. The value `t' means that the message is definitely not -;; spam, and that further spam checks should be inhibited. Otherwise, a -;; mailgroup name is returned where the mail should go, and further checks are -;; also inhibited. The usual mailgroup name is the value of -;; `spam-split-group', meaning that the message is definitely a spam. - (defvar spam-list-of-checks '((spam-use-blacklist . spam-check-blacklist) (spam-use-whitelist . spam-check-whitelist) - (spam-use-bbdb . spam-check-bbdb) + (spam-use-BBDB . spam-check-BBDB) + (spam-use-ifile . spam-check-ifile) (spam-use-blackholes . spam-check-blackholes) - (spam-use-bogofilter . spam-check-bogofilter))) + (spam-use-bogofilter . spam-check-bogofilter)) +"The spam-list-of-checks list contains pairs associating a parameter +variable with a spam checking function. If the parameter variable is +true, then the checking function is called, and its value decides what +happens. Each individual check may return `nil', `t', or a mailgroup +name. The value `nil' means that the check does not yield a decision, +and so, that further checks are needed. The value `t' means that the +message is definitely not spam, and that further spam checks should be +inhibited. Otherwise, a mailgroup name is returned where the mail +should go, and further checks are also inhibited. The usual mailgroup +name is the value of `spam-split-group', meaning that the message is +definitely a spam.") (defun spam-split () "Split this message into the `spam' group if it is spam. @@ -184,23 +384,16 @@ See the Info node `(gnus)Fancy Mail Splitting' for more details." decision) (while (and list-of-checks (not decision)) (let ((pair (pop list-of-checks))) - (when (eval (car pair)) - (setq decision (apply (cdr pair)))))) + (when (symbol-value (car pair)) + (setq decision (funcall (cdr pair)))))) (if (eq decision t) nil decision))) ;;;; Blackholes. -(defvar spam-blackhole-servers '("bl.spamcop.net" - "relays.ordb.org" - "dev.null.dk" - "relays.visi.com" - "rbl.maps.vix.com") - "List of blackhole servers.") - (defun spam-check-blackholes () - "Check the Receieved headers for blackholed relays." + "Check the Received headers for blackholed relays." (let ((headers (message-fetch-field "received")) ips matches) (when headers @@ -216,26 +409,90 @@ See the Info node `(gnus)Fancy Mail Splitting' for more details." ips))) (dolist (server spam-blackhole-servers) (dolist (ip ips) - (when (query-dns (concat ip "." server)) - (push (list ip server (query-dns (concat ip "." server) 'TXT)) - matches))))) + (let ((query-string (concat ip "." server))) + (if spam-use-dig + (let ((query-result (query-dig query-string))) + (when query-result + (message "spam: positive blackhole check '%s'" query-result) + (push (list ip server query-result) + matches))) + ;; else, if not using dig.el + (when (query-dns query-string) + (push (list ip server (query-dns query-string 'TXT)) + matches))))))) (when matches spam-split-group))) -;;;; Blacklists and whitelists. +;;;; BBDB original idea for spam-check-BBDB from Alexander Kotelnikov +;;; -(defvar spam-directory "~/News/spam/" - "When spam files are kept.") +;; all this is done inside a condition-case to trap errors +(condition-case nil + (progn -(defvar spam-whitelist (expand-file-name "whitelist" spam-directory) - "The location of the whitelist. -The file format is one regular expression per line. -The regular expression is matched against the address.") + (require 'bbdb-com) -(defvar spam-blacklist (expand-file-name "blacklist" spam-directory) - "The location of the blacklist. -The file format is one regular expression per line. -The regular expression is matched against the address.") + (defun spam-enter-ham-BBDB (from) + "Enter an address into the BBDB; implies ham (non-spam) sender" + (when (stringp from) + (let* ((parsed-address (gnus-extract-address-components from)) + (name (or (car parsed-address) "Ham Sender")) + (net-address (car (cdr parsed-address)))) + (message "Adding address %s to BBDB" from) + (when (and net-address + (not (bbdb-search (bbdb-records) nil nil net-address))) + (bbdb-create-internal name nil net-address nil nil + "ham sender added by spam.el"))))) + + (defun spam-BBDB-register-routine () + (spam-generic-register-routine + ;; spam function + nil + ;; ham function + (lambda (article) + (spam-enter-ham-BBDB (spam-fetch-field-from-fast article))))) + + (defun spam-check-BBDB () + "Mail from people in the BBDB is never considered spam" + (let ((who (message-fetch-field "from"))) + (when who + (setq who (regexp-quote (cadr + (gnus-extract-address-components who)))) + (if (bbdb-search (bbdb-records) nil nil who) + nil spam-split-group))))) + + (file-error (progn + (setq spam-list-of-checks + (delete (assoc 'spam-use-BBDB spam-list-of-checks) + spam-list-of-checks)) + (defun spam-check-BBDB () + message "spam-check-BBDB was invoked, but it shouldn't have") + (defun spam-BBDB-register-routine () + (spam-generic-register-routine nil nil))))) + + +;;;; ifile + +;;; uses ifile-gnus.el from +;;; http://www.ai.mit.edu/people/jhbrown/ifile-gnus.html + +;;; check the ifile backend; return nil if the mail was NOT classified +;;; as spam + +;;; TODO: we can't (require 'ifile-gnus), because it will insinuate +;;; itself automatically +(defun spam-check-ifile () + (let ((ifile-primary-spam-group spam-split-group)) + (ifile-spam-filter nil))) + +;; TODO: add ifile registration +;; We need ifile-gnus.el to support nnimap; we could feel the message +;; directly to ifile like we do with bogofilter but that's ugly. +(defun spam-ifile-register-routine () + (spam-generic-register-routine nil nil)) + + +;;;; Blacklists and whitelists. (defvar spam-whitelist-cache nil) (defvar spam-blacklist-cache nil) @@ -272,19 +529,6 @@ The regular expression is matched against the address.") (setq spam-whitelist-cache (spam-parse-list spam-whitelist))) (if (spam-from-listed-p spam-whitelist-cache) nil spam-split-group)) -;;; copied from code by Alexander Kotelnikov -(defun spam-check-bbdb () - "We want messages from people who are in the BBDB not to be split to spam" - (let ((who (message-fetch-field "from"))) - (when who - (setq who (regexp-quote (cadr (gnus-extract-address-components who)))) - (if (bbdb-search (bbdb-records) nil nil who) nil spam-split-group)))) - -;; let spam-check-bbdb be nil if the BBDB can't be loaded -(condition-case nil - (require 'bbdb) - (file-error (defalias 'spam-check-bbdb 'ignore))) - (defun spam-check-blacklist () ;; FIXME! Should it detect when file timestamps change? (unless spam-blacklist-cache @@ -319,19 +563,41 @@ The regular expression is matched against the address.") (setq found t cache nil))) found)) + +(defun spam-blacklist-register-routine () + (spam-generic-register-routine + ;; the spam function + (lambda (article) + (let ((from (spam-fetch-field-from-fast article))) + (when (stringp from) + (spam-enter-blacklist from)))) + ;; the ham function + nil)) + +(defun spam-whitelist-register-routine () + (spam-generic-register-routine + ;; the spam function + nil + ;; the ham function + (lambda (article) + (let ((from (spam-fetch-field-from-fast article))) + (when (stringp from) + (spam-enter-whitelist from)))))) + -;;;; Training via Bogofilter. Last updated 2002-09-02. +;;;; Bogofilter ;;; See Paul Graham article, at `http://www.paulgraham.com/spam.html'. -;;; This page is for those wanting to control spam with the help of Eric -;;; Raymond's speedy Bogofilter, see http://www.tuxedo.org/~esr/bogofilter. -;;; This has been tested with a locally patched copy of version 0.4. +;;; This page is for those wanting to control spam with the help of +;;; Eric Raymond's speedy Bogofilter, see +;;; http://www.tuxedo.org/~esr/bogofilter. This has been tested with +;;; a locally patched copy of version 0.4. -;;; Make sure Bogofilter is installed. Bogofilter internally uses Judy fast -;;; associative arrays, so you need to install Judy first, and Bogofilter -;;; next. Fetch both distributions by visiting the following links and -;;; downloading the latest version of each: +;;; Make sure Bogofilter is installed. Bogofilter internally uses +;;; Judy fast associative arrays, so you need to install Judy first, +;;; and Bogofilter next. Fetch both distributions by visiting the +;;; following links and downloading the latest version of each: ;;; ;;; http://sourceforge.net/projects/judy/ ;;; http://www.tuxedo.org/~esr/bogofilter/ @@ -342,14 +608,15 @@ The regular expression is matched against the address.") ;;; make ;;; make install ;;; -;;; You will likely need to become super-user for the last step. Then, unpack -;;; the Bogofilter distribution and enter its main directory: +;;; You will likely need to become super-user for the last step. +;;; Then, unpack the Bogofilter distribution and enter its main +;;; directory: ;;; ;;; make ;;; make install ;;; -;;; Here as well, you need to become super-user for the last step. Now, -;;; initialises your word lists by doing, under your own identity: +;;; Here as well, you need to become super-user for the last step. +;;; Now, initialize your word lists by doing, under your own identity: ;;; ;;; mkdir ~/.bogofilter ;;; touch ~/.bogofilter/badlist @@ -357,82 +624,71 @@ The regular expression is matched against the address.") ;;; ;;; These two files are text files you may edit, but you normally don't! -;;; The `M-d' command gets added to Gnus summary mode, marking current article -;;; as spam, showing it with the `H' mark. Whenever you see a spam article, -;;; make sure to mark its summary line with `M-d' before leaving the group. -;;; Some groups, as per variable `spam-junk-mailgroups' below, receive articles -;;; from Gnus splitting on clues added by spam recognisers, so for these -;;; groups, we tack an `H' mark at group entry for all summary lines which -;;; would otherwise have no other mark. Make sure to _remove_ `H' marks for -;;; any article which is _not_ genuine spam, before leaving such groups: you -;;; may use `M-u' to "unread" the article, or `d' for declaring it read the -;;; non-spam way. When you leave a group, all `H' marked articles, saved or -;;; unsaved, are sent to Bogofilter which will study them as spam samples. +;;; The `M-d' command gets added to Gnus summary mode, marking current +;;; article as spam, showing it with the `H' mark. Whenever you see a +;;; spam article, make sure to mark its summary line with `M-d' before +;;; leaving the group. Some groups, as per variable +;;; `spam-junk-mailgroups' below, receive articles from Gnus splitting +;;; on clues added by spam recognisers, so for these groups, we tack +;;; an `H' mark at group entry for all summary lines which would +;;; otherwise have no other mark. Make sure to _remove_ `H' marks for +;;; any article which is _not_ genuine spam, before leaving such +;;; groups: you may use `M-u' to "unread" the article, or `d' for +;;; declaring it read the non-spam way. When you leave a group, all +;;; `H' marked articles, saved or unsaved, are sent to Bogofilter +;;; which will study them as spam samples. ;;; Messages may also be deleted in various other ways, and unless -;;; `spam-ham-marks-form' gets overridden below, marks `R' and `r' for default -;;; read or explicit delete, marks `X' and 'K' for automatic or explicit -;;; kills, as well as mark `Y' for low scores, are all considered to be -;;; associated with articles which are not spam. This assumption might be -;;; false, in particular if you use kill files or score files as means for -;;; detecting genuine spam, you should then adjust `spam-ham-marks-form'. When -;;; you leave a group, all _unsaved_ articles bearing any the above marks are -;;; sent to Bogofilter which will study these as not-spam samples. If you -;;; explicit kill a lot, you might sometimes end up with articles marked `K' -;;; which you never saw, and which might accidentally contain spam. Best is -;;; to make sure that real spam is marked with `H', and nothing else. - -;;; All other marks do not contribute to Bogofilter pre-conditioning. In -;;; particular, ticked, dormant or souped articles are likely to contribute -;;; later, when they will get deleted for real, so there is no need to use -;;; them prematurely. Explicitly expired articles do not contribute, command -;;; `E' is a way to get rid of an article without Bogofilter ever seeing it. - -;;; In a word, with a minimum of care for associating the `H' mark for spam -;;; articles only, Bogofilter training all gets fairly automatic. You should -;;; do this until you get a few hundreds of articles in each category, spam -;;; or not. The shell command `head -1 ~/.bogofilter/*' shows both article -;;; counts. The command `S S' in summary mode, either for debugging or for -;;; curiosity, triggers Bogofilter into displaying in another buffer the -;;; "spamicity" score of the current article (between 0.0 and 1.0), together -;;; with the article words which most significantly contribute to the score. - -;;; The real way for using Bogofilter, however, is to have some use tool like -;;; `procmail' for invoking it on message reception, then adding some -;;; recognisable header in case of detected spam. Gnus splitting rules might -;;; later trip on these added headers and react by sorting such articles into -;;; specific junk folders as per `spam-junk-mailgroups'. Here is a possible -;;; `.procmailrc' contents (still untested -- please tell me how it goes): +;;; `spam-ham-marks-form' gets overridden below, marks `R' and `r' for +;;; default read or explicit delete, marks `X' and 'K' for automatic +;;; or explicit kills, as well as mark `Y' for low scores, are all +;;; considered to be associated with articles which are not spam. +;;; This assumption might be false, in particular if you use kill +;;; files or score files as means for detecting genuine spam, you +;;; should then adjust `spam-ham-marks-form'. When you leave a group, +;;; all _unsaved_ articles bearing any the above marks are sent to +;;; Bogofilter which will study these as not-spam samples. If you +;;; explicit kill a lot, you might sometimes end up with articles +;;; marked `K' which you never saw, and which might accidentally +;;; contain spam. Best is to make sure that real spam is marked with +;;; `H', and nothing else. + +;;; All other marks do not contribute to Bogofilter pre-conditioning. +;;; In particular, ticked, dormant or souped articles are likely to +;;; contribute later, when they will get deleted for real, so there is +;;; no need to use them prematurely. Explicitly expired articles do +;;; not contribute, command `E' is a way to get rid of an article +;;; without Bogofilter ever seeing it. + +;;; In a word, with a minimum of care for associating the `H' mark for +;;; spam articles only, Bogofilter training all gets fairly automatic. +;;; You should do this until you get a few hundreds of articles in +;;; each category, spam or not. The shell command `head -1 +;;; ~/.bogofilter/*' shows both article counts. The command `S S' in +;;; summary mode, either for debugging or for curiosity, triggers +;;; Bogofilter into displaying in another buffer the "spamicity" score +;;; of the current article (between 0.0 and 1.0), together with the +;;; article words which most significantly contribute to the score. + +;;; The real way for using Bogofilter, however, is to have some use +;;; tool like `procmail' for invoking it on message reception, then +;;; adding some recognisable header in case of detected spam. Gnus +;;; splitting rules might later trip on these added headers and react +;;; by sorting such articles into specific junk folders as per +;;; `spam-junk-mailgroups'. Here is a possible `.procmailrc' contents +;;; (still untested -- please tell me how it goes): ;;; ;;; :0HBf: ;;; * ? bogofilter ;;; | formail -bfI "X-Spam-Status: Yes" -(defvar spam-output-buffer-name "*Bogofilter Output*" - "Name of buffer when displaying `bogofilter -v' output.") - -(defvar spam-spaminfo-header-regexp - ;; FIXME! In the following regexp, we should explain which tool produces - ;; which kind of header. I do not even remember them all by now. X-Junk - ;; (and previously X-NoSpam) are produced by the `NoSpam' tool, which has - ;; never been published, so it might not be reasonable leaving it in the - ;; list. - "^X-\\(jf\\|Junk\\|NoSpam\\|Spam\\|SB\\)[^:]*:" - "Regexp for spam markups in headers. -Markup from spam recognisers, as well as `Xref', are to be removed from -articles before they get registered by Bogofilter.") - -(defvar spam-bogofilter-path (executable-find "bogofilter") - "File path of the Bogofilter executable program. -Force this variable to nil if you want to inhibit the functionality.") - (defun spam-check-bogofilter () ;; Dynamic spam check. I do not know how to check the exit status, ;; so instead, read `bogofilter -v' output. (when (and spam-use-bogofilter spam-bogofilter-path) (spam-bogofilter-articles nil "-v" (list (gnus-summary-article-number))) (when (save-excursion - (set-buffer spam-output-buffer-name) + (set-buffer spam-bogofilter-output-buffer-name) (goto-char (point-min)) (re-search-forward "Spamicity: \\(0\\.9\\|1\\.0\\)" nil t)) spam-split-group))) @@ -444,37 +700,47 @@ spamicity coefficient of each, and the overall article spamicity." (interactive) (when (and spam-use-bogofilter spam-bogofilter-path) (spam-bogofilter-articles nil "-v" (list (gnus-summary-article-number))) - (save-excursion - (set-buffer spam-output-buffer-name) - (unless (= (point-min) (point-max)) - (display-message-or-buffer (current-buffer) - spam-output-buffer-name))))) + (with-current-buffer spam-bogofilter-output-buffer-name + (unless (zerop (buffer-size)) + (if (<= (count-lines (point-min) (point-max)) 1) + (progn + (goto-char (point-max)) + (when (bolp) + (backward-char 1)) + (message "%s" (buffer-substring (point-min) (point)))) + (goto-char (point-min)) + (display-buffer (current-buffer))))))) (defun spam-bogofilter-register-routine () - (when (and spam-use-bogofilter spam-bogofilter-path) - (let ((articles gnus-newsgroup-articles) - article mark ham-articles spam-articles) - (while articles - (setq article (pop articles) - mark (gnus-summary-article-mark article)) - (cond ((memq mark spam-spam-marks) (push article spam-articles)) - ((memq article gnus-newsgroup-saved)) - ((memq mark spam-ham-marks) (push article ham-articles)))) - (when ham-articles - (spam-bogofilter-articles "ham" "-n" ham-articles)) - (when spam-articles - (spam-bogofilter-articles "SPAM" "-s" spam-articles))))) - -(defvar spam-bogofilter-initial-timeout 40 - "Timeout in seconds for the initial reply from the `bogofilter' program.") - -(defvar spam-bogofilter-subsequent-timeout 15 - "Timeout in seconds for any subsequent reply from the `bogofilter' program.") + (let ((articles gnus-newsgroup-articles) + article mark ham-articles spam-articles spam-mark-values + ham-mark-values) + + ;; marks are stored as symbolic values, so we have to dereference + ;; them for memq to work we wouldn't have to do this if + ;; gnus-summary-article-mark returned a symbol. + (dolist (mark spam-ham-marks) + (push (symbol-value mark) ham-mark-values)) + + (dolist (mark spam-spam-marks) + (push (symbol-value mark) spam-mark-values)) + + (while articles + (setq article (pop articles) + mark (gnus-summary-article-mark article)) + (cond ((memq mark spam-mark-values) (push article spam-articles)) + ((memq article gnus-newsgroup-saved)) + ((memq mark ham-mark-values) (push article ham-articles)))) + (when ham-articles + (spam-bogofilter-articles "ham" "-n" ham-articles)) + (when spam-articles + (spam-bogofilter-articles "SPAM" "-s" spam-articles)))) (defun spam-bogofilter-articles (type option articles) - (let ((output-buffer (get-buffer-create spam-output-buffer-name)) + (let ((output-buffer (get-buffer-create spam-bogofilter-output-buffer-name)) (article-copy (get-buffer-create " *Bogofilter Article Copy*")) - (remove-regexp (concat spam-spaminfo-header-regexp "\\|Xref:")) + (remove-regexp (concat spam-bogofilter-spaminfo-header-regexp + "\\|Xref:")) (counter 0) prefix process article) (when type @@ -493,20 +759,15 @@ spamicity coefficient of each, and the overall article spamicity." (message "%s %d" prefix counter)) (setq article (pop articles)) (gnus-summary-goto-subject article) - (gnus-summary-select-article) + (gnus-summary-show-article t) (gnus-eval-in-buffer-window article-copy (insert-buffer-substring gnus-original-article-buffer) ;; Remove spam classification redundant headers: they may induce ;; unwanted biases in later analysis. - (goto-char (point-min)) - (while (not (or (eobp) (= (following-char) ?\n))) - (if (looking-at remove-regexp) - (delete-region (point) - (save-excursion (forward-line 1) (point))) - (forward-line 1))) - (goto-char (point-min)) + (message-remove-header remove-regexp t) ;; Bogofilter really wants From envelopes for counting articles. ;; Fake one at the beginning, make sure there will be no other. + (goto-char (point-min)) (if (looking-at "From ") (forward-line 1) (insert "From nobody " (current-time-string) "\n"))