X-Git-Url: http://cgit.sxemacs.org/?p=gnus;a=blobdiff_plain;f=lisp%2Fspam.el;h=a7d84ee40f7265011e3870b0dd72aff0e679a1d6;hp=aaa69da0c0a3b6bdd314bdfcacde8d8f4a921975;hb=7476a1a88aea15c78cb233ee964a466c89fef558;hpb=2436674e6014238431ef610d88771ce71d8099ab diff --git a/lisp/spam.el b/lisp/spam.el index aaa69da0c..a7d84ee40 100644 --- a/lisp/spam.el +++ b/lisp/spam.el @@ -1,5 +1,5 @@ ;;; spam.el --- Identifying spam -;; Copyright (C) 2002 Free Software Foundation, Inc. +;; Copyright (C) 2002, 2003 Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen ;; Keywords: network @@ -34,12 +34,17 @@ ;;; Code: +(eval-when-compile (require 'cl)) + (require 'gnus-sum) (require 'gnus-uu) ; because of key prefix issues (require 'gnus) ; for the definitions of group content classification and spam processors (require 'message) ;for the message-fetch-field functions +;; for nnimap-split-download-body-default +(eval-when-compile (require 'nnimap)) + ;; autoload executable-find (eval-and-compile ;; executable-find is not autoloaded in Emacs 20 @@ -49,6 +54,10 @@ (eval-and-compile (autoload 'query-dig "dig")) +;; autoload spam-report +(eval-and-compile + (autoload 'spam-report-gmane "spam-report")) + ;; autoload query-dns (eval-and-compile (autoload 'query-dns "dns")) @@ -69,7 +78,33 @@ When nil, only ham and unclassified groups will have their spam moved to the spam-process-destination. When t, spam will also be moved from spam groups." :type 'boolean - :group 'spam-ifile) + :group 'spam) + +(defcustom spam-process-ham-in-nonham-groups nil + "Whether ham should be processed in non-ham groups." + :type 'boolean + :group 'spam) + +(defcustom spam-process-ham-in-spam-groups nil + "Whether ham should be processed in spam groups." + :type 'boolean + :group 'spam) + +(defcustom spam-mark-only-unseen-as-spam t + "Whether only unseen articles should be marked as spam in spam +groups. When nil, all unread articles in a spam group are marked as +spam. Set this if you want to leave an article unread in a spam group +without losing it to the automatic spam-marking process." + :type 'boolean + :group 'spam) + +(defcustom spam-mark-ham-unread-before-move-from-spam-group nil + "Whether ham should be marked unread before it's moved out of a spam +group according to ham-process-destination. This variable is an +official entry in the international Longest Variable Name +Competition." + :type 'boolean + :group 'spam) (defcustom spam-whitelist (expand-file-name "whitelist" spam-directory) "The location of the whitelist. @@ -100,13 +135,44 @@ The regular expression is matched against the address." :type 'boolean :group 'spam) +(defcustom spam-use-whitelist-exclusive nil + "Whether whitelist-exclusive should be used by spam-split. +Exclusive whitelisting means that all messages from senders not in the whitelist +are considered spam." + :type 'boolean + :group 'spam) + (defcustom spam-use-blackholes nil "Whether blackholes should be used by spam-split." :type 'boolean :group 'spam) +(defcustom spam-use-hashcash nil + "Whether hashcash payments should be detected by spam-split." + :type 'boolean + :group 'spam) + +(defcustom spam-use-regex-headers nil + "Whether a header regular expression match should be used by spam-split. +Also see the variables `spam-regex-headers-spam' and `spam-regex-headers-ham'." + :type 'boolean + :group 'spam) + +(defcustom spam-use-regex-body nil + "Whether a body regular expression match should be used by spam-split. +Also see the variables `spam-regex-body-spam' and `spam-regex-body-ham'." + :type 'boolean + :group 'spam) + +(defcustom spam-use-bogofilter-headers nil + "Whether bogofilter headers should be used by spam-split. +Enable this if you pre-process messages with Bogofilter BEFORE Gnus sees them." + :type 'boolean + :group 'spam) + (defcustom spam-use-bogofilter nil - "Whether bogofilter should be used by spam-split." + "Whether bogofilter should be invoked by spam-split. +Enable this if you want Gnus to invoke Bogofilter on new messages." :type 'boolean :group 'spam) @@ -115,16 +181,55 @@ The regular expression is matched against the address." :type 'boolean :group 'spam) +(defcustom spam-use-BBDB-exclusive nil + "Whether BBDB-exclusive should be used by spam-split. +Exclusive BBDB means that all messages from senders not in the BBDB are +considered spam." + :type 'boolean + :group 'spam) + (defcustom spam-use-ifile nil "Whether ifile should be used by spam-split." :type 'boolean :group 'spam) +(defcustom spam-use-stat nil + "Whether spam-stat should be used by spam-split." + :type 'boolean + :group 'spam) + +(defcustom spam-use-spamoracle nil + "Whether spamoracle should be used by spam-split." + :type 'boolean + :group 'spam) + +(defcustom spam-install-hooks (or + spam-use-dig + spam-use-blacklist + spam-use-whitelist + spam-use-whitelist-exclusive + spam-use-blackholes + spam-use-hashcash + spam-use-regex-headers + spam-use-regex-body + spam-use-bogofilter-headers + spam-use-bogofilter + spam-use-BBDB + spam-use-BBDB-exclusive + spam-use-ifile + spam-use-stat + spam-use-spamoracle) + "Whether the spam hooks should be installed, default to t if one of +the spam-use-* variables is set." + :group 'gnus-registry + :type 'boolean) + (defcustom spam-split-group "spam" "Group name where incoming spam should be put by spam-split." :type 'string :group 'spam) +;;; TODO: deprecate this variable, it's confusing since it's a list of strings, not regular expressions (defcustom spam-junk-mailgroups (cons spam-split-group '("mail.junk" "poste.pourriel")) "Mailgroups with spam contents. All unmarked article in such group receive the spam mark on group entry." @@ -137,27 +242,10 @@ All unmarked article in such group receive the spam mark on group entry." :type '(repeat (string :tag "Server")) :group 'spam) -(defcustom spam-ham-marks (list 'gnus-del-mark 'gnus-read-mark - 'gnus-killed-mark 'gnus-kill-file-mark - 'gnus-low-score-mark) - "Marks considered as being ham (positively not spam). -Such articles will be processed as ham (non-spam) on group exit." - :type '(set - (variable-item gnus-del-mark) - (variable-item gnus-read-mark) - (variable-item gnus-killed-mark) - (variable-item gnus-kill-file-mark) - (variable-item gnus-low-score-mark)) - :group 'spam) - -(defcustom spam-spam-marks (list 'gnus-spam-mark) - "Marks considered as being spam (positively spam). -Such articles will be transmitted to `bogofilter -s' on group exit." - :type '(set - (variable-item gnus-spam-mark) - (variable-item gnus-killed-mark) - (variable-item gnus-kill-file-mark) - (variable-item gnus-low-score-mark)) +(defcustom spam-blackhole-good-server-regex nil + "String matching IP addresses that should not be checked in the blackholes" + :type '(radio (const nil) + (regexp :format "%t: %v\n" :size 0)) :group 'spam) (defcustom spam-face 'gnus-splash-face @@ -165,6 +253,26 @@ Such articles will be transmitted to `bogofilter -s' on group exit." :type 'face :group 'spam) +(defcustom spam-regex-headers-spam '("^X-Spam-Flag: YES") + "Regular expression for positive header spam matches" + :type '(repeat (regexp :tag "Regular expression to match spam header")) + :group 'spam) + +(defcustom spam-regex-headers-ham '("^X-Spam-Flag: NO") + "Regular expression for positive header ham matches" + :type '(repeat (regexp :tag "Regular expression to match ham header")) + :group 'spam) + +(defcustom spam-regex-body-spam '() + "Regular expression for positive body spam matches" + :type '(repeat (regexp :tag "Regular expression to match spam body")) + :group 'spam) + +(defcustom spam-regex-body-ham '() + "Regular expression for positive body ham matches" + :type '(repeat (regexp :tag "Regular expression to match ham body")) + :group 'spam) + (defgroup spam-ifile nil "Spam ifile configuration." :group 'spam) @@ -186,6 +294,13 @@ Such articles will be transmitted to `bogofilter -s' on group exit." :type 'string :group 'spam-ifile) +(defcustom spam-ifile-ham-category nil + "Name of the ham ifile category. If nil, the current group name will +be used." + :type '(choice (string :tag "Use a fixed category") + (const :tag "Use the current group name")) + :group 'spam-ifile) + (defcustom spam-ifile-all-categories nil "Whether the ifile check will return all categories, or just spam. Set this to t if you want to use the spam-split invocation of ifile as @@ -197,40 +312,55 @@ your main source of newsgroup names." "Spam bogofilter configuration." :group 'spam) -(defcustom spam-bogofilter-output-buffer-name "*Bogofilter Output*" - "Name of buffer when displaying `bogofilter -v' output." - :type 'string +(defcustom spam-bogofilter-path (executable-find "bogofilter") + "File path of the Bogofilter executable program." + :type '(choice (file :tag "Location of bogofilter") + (const :tag "Bogofilter is not installed")) :group 'spam-bogofilter) -(defcustom spam-bogofilter-initial-timeout 40 - "Timeout in seconds for the initial reply from the `bogofilter' program." - :type 'integer +(defcustom spam-bogofilter-header "X-Bogosity" + "The header that Bogofilter inserts in messages." + :type 'string :group 'spam-bogofilter) -(defcustom spam-bogofilter-subsequent-timeout 15 - "Timeout in seconds for any subsequent reply from the `bogofilter' program." - :type 'integer +(defcustom spam-bogofilter-spam-switch "-s" + "The switch that Bogofilter uses to register spam messages." + :type 'string :group 'spam-bogofilter) -(defcustom spam-bogofilter-path (executable-find "bogofilter") - "File path of the Bogofilter executable program." - :type '(choice (file :tag "Location of bogofilter") - (const :tag "Bogofilter is not installed")) +(defcustom spam-bogofilter-ham-switch "-n" + "The switch that Bogofilter uses to register ham messages." + :type 'string :group 'spam-bogofilter) -;; FIXME! In the following regexp, we should explain which tool produces -;; which kind of header. I do not even remember them all by now. X-Junk -;; (and previously X-NoSpam) are produced by the `NoSpam' tool, which has -;; never been published, so it might not be reasonable leaving it in the -;; list. -(defcustom spam-bogofilter-spaminfo-header-regexp - "^X-\\(jf\\|Junk\\|NoSpam\\|Spam\\|SB\\)[^:]*:" - "Regexp for spam markups in headers. -Markup from spam recognisers, as well as `Xref', are to be removed from -articles before they get registered by Bogofilter." +(defcustom spam-bogofilter-bogosity-positive-spam-header "^\\(Yes\\|Spam\\)" + "The regex on `spam-bogofilter-header' for positive spam identification." :type 'regexp :group 'spam-bogofilter) +(defcustom spam-bogofilter-database-directory nil + "Directory path of the Bogofilter databases." + :type '(choice (directory :tag "Location of the Bogofilter database directory") + (const :tag "Use the default")) + :group 'spam-ifile) + +(defgroup spam-spamoracle nil + "Spam ifile configuration." + :group 'spam) + +(defcustom spam-spamoracle-database nil + "Location of spamoracle database file. When nil, use the default +spamoracle database." + :type '(choice (directory :tag "Location of spamoracle database file.") + (const :tag "Use the default")) + :group 'spam-spamoracle) + +(defcustom spam-spamoracle-binary (executable-find "spamoracle") + "Location of the spamoracle binary." + :type '(choice (directory :tag "Location of the spamoracle binary") + (const :tag "Use the default")) + :group 'spam-spamoracle) + ;;; Key bindings for spam control. (gnus-define-keys gnus-summary-mode-map @@ -240,14 +370,30 @@ articles before they get registered by Bogofilter." "Msx" gnus-summary-mark-as-spam "\M-d" gnus-summary-mark-as-spam) -;;; How to highlight a spam summary line. - -;; TODO: How do we redo this every time spam-face is customized? - -(push '((eq mark gnus-spam-mark) . spam-face) - gnus-summary-highlight) - ;; convenience functions +(defun spam-group-ham-mark-p (group mark &optional spam) + (when (stringp group) + (let* ((marks (spam-group-ham-marks group spam)) + (marks (if (symbolp mark) + marks + (mapcar 'symbol-value marks)))) + (memq mark marks)))) + +(defun spam-group-spam-mark-p (group mark) + (spam-group-ham-mark-p group mark t)) + +(defun spam-group-ham-marks (group &optional spam) + (when (stringp group) + (let* ((marks (if spam + (gnus-parameter-spam-marks group) + (gnus-parameter-ham-marks group))) + (marks (car marks)) + (marks (if (listp (car marks)) (car marks) marks))) + marks))) + +(defun spam-group-spam-marks (group) + (spam-group-ham-marks group t)) + (defun spam-group-spam-contents-p (group) (if (stringp group) (or (member group spam-junk-mailgroups) @@ -267,6 +413,9 @@ articles before they get registered by Bogofilter." (member processor (car (gnus-parameter-spam-process group))) nil)) +(defun spam-group-spam-processor-report-gmane-p (group) + (spam-group-processor-p group 'gnus-group-spam-exit-processor-report-gmane)) + (defun spam-group-spam-processor-bogofilter-p (group) (spam-group-processor-p group 'gnus-group-spam-exit-processor-bogofilter)) @@ -279,113 +428,197 @@ articles before they get registered by Bogofilter." (defun spam-group-ham-processor-ifile-p (group) (spam-group-processor-p group 'gnus-group-ham-exit-processor-ifile)) +(defun spam-group-spam-processor-spamoracle-p (group) + (spam-group-processor-p group 'gnus-group-spam-exit-processor-spamoracle)) + +(defun spam-group-ham-processor-bogofilter-p (group) + (spam-group-processor-p group 'gnus-group-ham-exit-processor-bogofilter)) + +(defun spam-group-spam-processor-stat-p (group) + (spam-group-processor-p group 'gnus-group-spam-exit-processor-stat)) + +(defun spam-group-ham-processor-stat-p (group) + (spam-group-processor-p group 'gnus-group-ham-exit-processor-stat)) + (defun spam-group-ham-processor-whitelist-p (group) (spam-group-processor-p group 'gnus-group-ham-exit-processor-whitelist)) (defun spam-group-ham-processor-BBDB-p (group) (spam-group-processor-p group 'gnus-group-ham-exit-processor-BBDB)) +(defun spam-group-ham-processor-copy-p (group) + (spam-group-processor-p group 'gnus-group-ham-exit-processor-copy)) + +(defun spam-group-ham-processor-spamoracle-p (group) + (spam-group-processor-p group 'gnus-group-ham-exit-processor-spamoracle)) + ;;; Summary entry and exit processing. (defun spam-summary-prepare () (spam-mark-junk-as-spam-routine)) -(add-hook 'gnus-summary-prepare-hook 'spam-summary-prepare) - +;; The spam processors are invoked for any group, spam or ham or neither (defun spam-summary-prepare-exit () - ;; The spam processors are invoked for any group, spam or ham or neither - (when (and spam-bogofilter-path - (spam-group-spam-processor-bogofilter-p gnus-newsgroup-name)) - (spam-bogofilter-register-routine)) + (unless gnus-group-is-exiting-without-update-p + (gnus-message 6 "Exiting summary buffer and applying spam rules") + (when (and spam-bogofilter-path + (spam-group-spam-processor-bogofilter-p gnus-newsgroup-name)) + (gnus-message 5 "Registering spam with bogofilter") + (spam-bogofilter-register-spam-routine)) - (when (and spam-ifile-path - (spam-group-spam-processor-ifile-p gnus-newsgroup-name)) - (spam-ifile-register-spam-routine)) + (when (and spam-ifile-path + (spam-group-spam-processor-ifile-p gnus-newsgroup-name)) + (gnus-message 5 "Registering spam with ifile") + (spam-ifile-register-spam-routine)) - (when (spam-group-spam-processor-bogofilter-p gnus-newsgroup-name) - (spam-blacklist-register-routine)) - - (if spam-move-spam-nonspam-groups-only - (when (not (spam-group-spam-contents-p gnus-newsgroup-name)) - (spam-mark-spam-as-expired-and-move-routine - (gnus-parameter-spam-process-destination gnus-newsgroup-name))) - (spam-mark-spam-as-expired-and-move-routine - (gnus-parameter-spam-process-destination gnus-newsgroup-name))) - - ;; now we redo spam-mark-spam-as-expired-and-move-routine to only - ;; expire spam, in case the above did not expire them - (spam-mark-spam-as-expired-and-move-routine nil) - - (when (spam-group-ham-contents-p gnus-newsgroup-name) - (when (spam-group-ham-processor-whitelist-p gnus-newsgroup-name) - (spam-whitelist-register-routine)) - (when (spam-group-ham-processor-ifile-p gnus-newsgroup-name) - (spam-ifile-register-ham-routine)) - (when (spam-group-ham-processor-BBDB-p gnus-newsgroup-name) - (spam-BBDB-register-routine))) - - ;; now move all ham articles out of spam groups - (when (spam-group-spam-contents-p gnus-newsgroup-name) - (spam-ham-move-routine - (gnus-parameter-ham-process-destination gnus-newsgroup-name)))) - -(add-hook 'gnus-summary-prepare-exit-hook 'spam-summary-prepare-exit) + (when (spam-group-spam-processor-spamoracle-p gnus-newsgroup-name) + (gnus-message 5 "Registering spam with spamoracle") + (spam-spamoracle-learn-spam)) + + (when (spam-group-spam-processor-stat-p gnus-newsgroup-name) + (gnus-message 5 "Registering spam with spam-stat") + (spam-stat-register-spam-routine)) + + (when (spam-group-spam-processor-blacklist-p gnus-newsgroup-name) + (gnus-message 5 "Registering spam with the blacklist") + (spam-blacklist-register-routine)) + + (when (spam-group-spam-processor-report-gmane-p gnus-newsgroup-name) + (gnus-message 5 "Registering spam with the Gmane report") + (spam-report-gmane-register-routine)) + + (if spam-move-spam-nonspam-groups-only + (when (not (spam-group-spam-contents-p gnus-newsgroup-name)) + (spam-mark-spam-as-expired-and-move-routine + (gnus-parameter-spam-process-destination gnus-newsgroup-name))) + (gnus-message 5 "Marking spam as expired and moving it to %s" gnus-newsgroup-name) + (spam-mark-spam-as-expired-and-move-routine + (gnus-parameter-spam-process-destination gnus-newsgroup-name))) + + ;; now we redo spam-mark-spam-as-expired-and-move-routine to only + ;; expire spam, in case the above did not expire them + (gnus-message 5 "Marking spam as expired without moving it") + (spam-mark-spam-as-expired-and-move-routine nil) + + (when (or (spam-group-ham-contents-p gnus-newsgroup-name) + (and (spam-group-spam-contents-p gnus-newsgroup-name) + spam-process-ham-in-spam-groups) + spam-process-ham-in-nonham-groups) + (when (spam-group-ham-processor-whitelist-p gnus-newsgroup-name) + (gnus-message 5 "Registering ham with the whitelist") + (spam-whitelist-register-routine)) + (when (spam-group-ham-processor-ifile-p gnus-newsgroup-name) + (gnus-message 5 "Registering ham with ifile") + (spam-ifile-register-ham-routine)) + (when (spam-group-ham-processor-bogofilter-p gnus-newsgroup-name) + (gnus-message 5 "Registering ham with Bogofilter") + (spam-bogofilter-register-ham-routine)) + (when (spam-group-ham-processor-stat-p gnus-newsgroup-name) + (gnus-message 5 "Registering ham with spam-stat") + (spam-stat-register-ham-routine)) + (when (spam-group-ham-processor-BBDB-p gnus-newsgroup-name) + (gnus-message 5 "Registering ham with the BBDB") + (spam-BBDB-register-routine)) + (when (spam-group-ham-processor-spamoracle-p gnus-newsgroup-name) + (gnus-message 5 "Registering ham with spamoracle") + (spam-spamoracle-learn-ham))) + + (when (spam-group-ham-processor-copy-p gnus-newsgroup-name) + (gnus-message 5 "Copying ham") + (spam-ham-copy-routine + (gnus-parameter-ham-process-destination gnus-newsgroup-name))) + + ;; now move all ham articles out of spam groups + (when (spam-group-spam-contents-p gnus-newsgroup-name) + (gnus-message 5 "Moving ham messages from spam group") + (spam-ham-move-routine + (gnus-parameter-ham-process-destination gnus-newsgroup-name))))) (defun spam-mark-junk-as-spam-routine () ;; check the global list of group names spam-junk-mailgroups and the ;; group parameters (when (spam-group-spam-contents-p gnus-newsgroup-name) - (let ((articles gnus-newsgroup-articles) - article) - (while articles - (setq article (pop articles)) - (when (eq (gnus-summary-article-mark article) gnus-unread-mark) - (gnus-summary-mark-article article gnus-spam-mark)))))) - -(defun spam-mark-spam-as-expired-and-move-routine (&optional group) + (gnus-message 5 "Marking %s articles as spam" + (if spam-mark-only-unseen-as-spam + "unseen" + "unread")) + (let ((articles (if spam-mark-only-unseen-as-spam + gnus-newsgroup-unseen + gnus-newsgroup-unreads))) + (dolist (article articles) + (gnus-summary-mark-article article gnus-spam-mark))))) + +(defun spam-mark-spam-as-expired-and-move-routine (&rest groups) + (gnus-summary-kill-process-mark) (let ((articles gnus-newsgroup-articles) - article) - (while articles - (setq article (pop articles)) + article tomove) + (dolist (article articles) (when (eq (gnus-summary-article-mark article) gnus-spam-mark) (gnus-summary-mark-article article gnus-expirable-mark) - (when (stringp group) - (let ((gnus-current-article article)) - (gnus-summary-move-article nil group))))))) - -(defun spam-ham-move-routine (&optional group) - (let ((articles gnus-newsgroup-articles) - article ham-mark-values mark) - (dolist (mark spam-ham-marks) - (push (symbol-value mark) ham-mark-values)) + (push article tomove))) - (while articles - (setq article (pop articles)) - (when (and (memq mark ham-mark-values) + ;; now do the actual copies + (dolist (group groups) + (when (and tomove (stringp group)) - (let ((gnus-current-article article)) - (gnus-summary-move-article nil group)))))) + (dolist (article tomove) + (gnus-summary-set-process-mark article)) + (when tomove + (gnus-summary-copy-article nil group)))) + + ;; now delete the articles + (dolist (article tomove) + (gnus-summary-set-process-mark article)) + (when tomove + (gnus-summary-delete-article nil))) + + (gnus-summary-yank-process-mark)) + +(defun spam-ham-copy-or-move-routine (copy &rest groups) + (gnus-summary-kill-process-mark) + (let ((articles gnus-newsgroup-articles) + article mark todo) + (dolist (article articles) + (when (spam-group-ham-mark-p gnus-newsgroup-name + (gnus-summary-article-mark article)) + (push article todo))) + + ;; now do the actual move + (dolist (group groups) + (when todo + (dolist (article todo) + (when spam-mark-ham-unread-before-move-from-spam-group + (gnus-summary-mark-article article gnus-unread-mark)) + (gnus-summary-set-process-mark article)) + (gnus-summary-copy-article nil group))) + + ;; now delete the articles + (dolist (article todo) + (gnus-summary-set-process-mark article)) + (when todo + (gnus-summary-delete-article nil))) + + (gnus-summary-yank-process-mark)) + +(defun spam-ham-copy-routine (&rest groups) + (spam-ham-copy-or-move-routine t groups)) + +(defun spam-ham-move-routine (&rest groups) + (spam-ham-copy-or-move-routine nil groups)) (defun spam-generic-register-routine (spam-func ham-func) (let ((articles gnus-newsgroup-articles) - article mark ham-articles spam-articles spam-mark-values - ham-mark-values) - - ;; marks are stored as symbolic values, so we have to dereference - ;; them for memq to work. we wouldn't have to do this if - ;; gnus-summary-article-mark returned a symbol. - (dolist (mark spam-ham-marks) - (push (symbol-value mark) ham-mark-values)) - - (dolist (mark spam-spam-marks) - (push (symbol-value mark) spam-mark-values)) + article mark ham-articles spam-articles) (while articles (setq article (pop articles) mark (gnus-summary-article-mark article)) - (cond ((memq mark spam-mark-values) (push article spam-articles)) + (cond ((spam-group-spam-mark-p gnus-newsgroup-name mark) + (push article spam-articles)) ((memq article gnus-newsgroup-saved)) - ((memq mark ham-mark-values) (push article ham-articles)))) + ((spam-group-ham-mark-p gnus-newsgroup-name mark) + (push article ham-articles)))) + (when (and ham-articles ham-func) (mapc ham-func ham-articles)) ; we use mapc because unlike ; mapcar it discards the @@ -401,14 +634,32 @@ articles before they get registered by Bogofilter." 'line-end-position))) (defun spam-get-article-as-string (article) - (let ((article-string)) + (let ((article-buffer (spam-get-article-as-buffer article)) + article-string) + (when article-buffer + (save-window-excursion + (set-buffer article-buffer) + (setq article-string (buffer-string)))) + article-string)) + +(defun spam-get-article-as-buffer (article) + (let ((article-buffer)) (when (numberp article) (save-window-excursion (gnus-summary-goto-subject article) (gnus-summary-show-article t) - (set-buffer gnus-article-buffer) - (setq article-string (buffer-string)))) - article-string)) + (setq article-buffer (get-buffer gnus-article-buffer)))) + article-buffer)) + +;; disabled for now +;; (defun spam-get-article-as-filename (article) +;; (let ((article-filename)) +;; (when (numberp article) +;; (nnml-possibly-change-directory (gnus-group-real-name gnus-newsgroup-name)) +;; (setq article-filename (expand-file-name (int-to-string article) nnml-current-directory))) +;; (if (file-exists-p article-filename) +;; article-filename +;; nil))) (defun spam-fetch-field-from-fast (article) "Fetch the `from' field quickly, using the internal gnus-data-list function" @@ -428,75 +679,160 @@ articles before they get registered by Bogofilter." ;;;; Spam determination. (defvar spam-list-of-checks - '((spam-use-blacklist . spam-check-blacklist) - (spam-use-whitelist . spam-check-whitelist) - (spam-use-BBDB . spam-check-BBDB) - (spam-use-ifile . spam-check-ifile) - (spam-use-blackholes . spam-check-blackholes) - (spam-use-bogofilter . spam-check-bogofilter)) + '((spam-use-blacklist . spam-check-blacklist) + (spam-use-regex-headers . spam-check-regex-headers) + (spam-use-regex-body . spam-check-regex-body) + (spam-use-whitelist . spam-check-whitelist) + (spam-use-BBDB . spam-check-BBDB) + (spam-use-ifile . spam-check-ifile) + (spam-use-spamoracle . spam-check-spamoracle) + (spam-use-stat . spam-check-stat) + (spam-use-blackholes . spam-check-blackholes) + (spam-use-hashcash . spam-check-hashcash) + (spam-use-bogofilter-headers . spam-check-bogofilter-headers) + (spam-use-bogofilter . spam-check-bogofilter)) "The spam-list-of-checks list contains pairs associating a parameter variable with a spam checking function. If the parameter variable is true, then the checking function is called, and its value decides what -happens. Each individual check may return `nil', `t', or a mailgroup -name. The value `nil' means that the check does not yield a decision, -and so, that further checks are needed. The value `t' means that the +happens. Each individual check may return nil, t, or a mailgroup +name. The value nil means that the check does not yield a decision, +and so, that further checks are needed. The value t means that the message is definitely not spam, and that further spam checks should be inhibited. Otherwise, a mailgroup name is returned where the mail should go, and further checks are also inhibited. The usual mailgroup name is the value of `spam-split-group', meaning that the message is definitely a spam.") -(defun spam-split () +(defvar spam-list-of-statistical-checks + '(spam-use-ifile spam-use-regex-body spam-use-stat spam-use-bogofilter spam-use-spamoracle) +"The spam-list-of-statistical-checks list contains all the mail +splitters that need to have the full message body available.") + +;;;TODO: modify to invoke self with each specific check if invoked without specific checks +(defun spam-split (&rest specific-checks) "Split this message into the `spam' group if it is spam. This function can be used as an entry in `nnmail-split-fancy', for -example like this: (: spam-split) +example like this: (: spam-split). It can take checks as parameters. See the Info node `(gnus)Fancy Mail Splitting' for more details." (interactive) + (save-excursion + (save-restriction + (dolist (check spam-list-of-statistical-checks) + (when (symbol-value check) + (widen) + (gnus-message 8 "spam-split: widening the buffer (%s requires it)" + (symbol-name check)) + (return))) + ;; (progn (widen) (debug (buffer-string))) + (let ((list-of-checks spam-list-of-checks) + decision) + (while (and list-of-checks (not decision)) + (let ((pair (pop list-of-checks))) + (when (and (symbol-value (car pair)) + (or (null specific-checks) + (memq (car pair) specific-checks))) + (gnus-message 5 "spam-split: calling the %s function" (symbol-name (cdr pair))) + (setq decision (funcall (cdr pair)))))) + (if (eq decision t) + nil + decision))))) + +(defun spam-setup-widening () + (dolist (check spam-list-of-statistical-checks) + (when (symbol-value check) + (setq nnimap-split-download-body-default t)))) + + +;;;; Regex body + +(defun spam-check-regex-body () + (let ((spam-regex-headers-ham spam-regex-body-ham) + (spam-regex-headers-spam spam-regex-body-spam)) + (spam-check-regex-headers t))) + + +;;;; Regex headers + +(defun spam-check-regex-headers (&optional body) + (let ((type (if body "body" "header")) + ret found) + (dolist (h-regex spam-regex-headers-ham) + (unless found + (goto-char (point-min)) + (when (re-search-forward h-regex nil t) + (message "Ham regex %s search positive." type) + (setq found t)))) + (dolist (s-regex spam-regex-headers-spam) + (unless found + (goto-char (point-min)) + (when (re-search-forward s-regex nil t) + (message "Spam regex %s search positive." type) + (setq found t) + (setq ret spam-split-group)))) + ret)) - (let ((list-of-checks spam-list-of-checks) - decision) - (while (and list-of-checks (not decision)) - (let ((pair (pop list-of-checks))) - (when (symbol-value (car pair)) - (setq decision (funcall (cdr pair)))))) - (if (eq decision t) - nil - decision))) ;;;; Blackholes. +(defun spam-reverse-ip-string (ip) + (when (stringp ip) + (mapconcat 'identity + (nreverse (split-string ip "\\.")) + "."))) + (defun spam-check-blackholes () "Check the Received headers for blackholed relays." - (let ((headers (message-fetch-field "received")) + (let ((headers (nnmail-fetch-field "received")) ips matches) (when headers (with-temp-buffer (insert headers) (goto-char (point-min)) + (gnus-message 5 "Checking headers for relay addresses") (while (re-search-forward - "\\[\\([0-9]+.[0-9]+.[0-9]+.[0-9]+\\)\\]" nil t) - (message "Blackhole search found host IP %s." (match-string 1)) - (push (mapconcat 'identity - (nreverse (split-string (match-string 1) "\\.")) - ".") + "\\([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+\\)" nil t) + (gnus-message 9 "Blackhole search found host IP %s." (match-string 1)) + (push (spam-reverse-ip-string (match-string 1)) ips))) (dolist (server spam-blackhole-servers) (dolist (ip ips) - (let ((query-string (concat ip "." server))) - (if spam-use-dig - (let ((query-result (query-dig query-string))) - (when query-result - (message "spam: positive blackhole check '%s'" query-result) - (push (list ip server query-result) - matches))) - ;; else, if not using dig.el - (when (query-dns query-string) - (push (list ip server (query-dns query-string 'TXT)) - matches))))))) + (unless (and spam-blackhole-good-server-regex + ;; match the good-server-regex against the reversed (again) IP string + (string-match + spam-blackhole-good-server-regex + (spam-reverse-ip-string ip))) + (unless matches + (let ((query-string (concat ip "." server))) + (if spam-use-dig + (let ((query-result (query-dig query-string))) + (when query-result + (gnus-message 5 "(DIG): positive blackhole check '%s'" + query-result) + (push (list ip server query-result) + matches))) + ;; else, if not using dig.el + (when (query-dns query-string) + (gnus-message 5 "positive blackhole check") + (push (list ip server (query-dns query-string 'TXT)) + matches))))))))) (when matches spam-split-group))) +;;;; Hashcash. + +(condition-case nil + (progn + (require 'hashcash) + + (defun spam-check-hashcash () + "Check the headers for hashcash payments." + (mail-check-payment))) ;mail-check-payment returns a boolean + + (file-error (progn + (defalias 'mail-check-payment 'ignore) + (defalias 'spam-check-hashcash 'ignore)))) + ;;;; BBDB ;;; original idea for spam-check-BBDB from Alexander Kotelnikov @@ -515,7 +851,7 @@ See the Info node `(gnus)Fancy Mail Splitting' for more details." (let* ((parsed-address (gnus-extract-address-components from)) (name (or (car parsed-address) "Ham Sender")) (net-address (car (cdr parsed-address)))) - (message "Adding address %s to BBDB" from) + (gnus-message 5 "Adding address %s to BBDB" from) (when (and net-address (not (bbdb-search-simple nil net-address))) (bbdb-create-internal name nil net-address nil nil @@ -530,13 +866,15 @@ See the Info node `(gnus)Fancy Mail Splitting' for more details." (spam-enter-ham-BBDB (spam-fetch-field-from-fast article))))) (defun spam-check-BBDB () - "Mail from people in the BBDB is never considered spam" - (let ((who (message-fetch-field "from"))) + "Mail from people in the BBDB is classified as ham or non-spam" + (let ((who (nnmail-fetch-field "from"))) (when who - (setq who (regexp-quote (cadr - (gnus-extract-address-components who)))) + (setq who (cadr (gnus-extract-address-components who))) (if (bbdb-search-simple nil who) - nil spam-split-group))))) + t + (if spam-use-BBDB-exclusive + spam-split-group + nil)))))) (file-error (progn (defalias 'bbdb-search-simple 'ignore) @@ -580,7 +918,6 @@ See the Info node `(gnus)Fancy Mail Splitting' for more details." (setq return category) ;; else, if spam-ifile-all-categories is not set... (when (string-equal spam-ifile-spam-category category) - ;; always accept the ifile category (setq return spam-split-group)))))) return)) @@ -588,13 +925,17 @@ See the Info node `(gnus)Fancy Mail Splitting' for more details." "Register an article, given as a string, with a category. Uses `gnus-newsgroup-name' if category is nil (for ham registration)." (when (stringp article-string) - (let ((category (or category gnus-newsgroup-name))) + (let ((category (or category gnus-newsgroup-name)) + (db-param (spam-get-ifile-database-parameter))) (with-temp-buffer - (insert-string article-string) - (call-process-region (point-min) (point-max) spam-ifile-path - nil nil nil - "-h" "-i" category - (spam-get-ifile-database-parameter)))))) + (insert article-string) + (if db-param + (call-process-region (point-min) (point-max) spam-ifile-path + nil nil nil + "-h" "-i" category db-param) + (call-process-region (point-min) (point-max) spam-ifile-path + nil nil nil + "-h" "-i" category)))))) (defun spam-ifile-register-spam-routine () (spam-generic-register-routine @@ -608,9 +949,61 @@ Uses `gnus-newsgroup-name' if category is nil (for ham registration)." nil (lambda (article) (spam-ifile-register-with-ifile - (spam-get-article-as-string article) nil)))) + (spam-get-article-as-string article) spam-ifile-ham-category)))) + + +;;;; spam-stat + +(condition-case nil + (progn + (let ((spam-stat-install-hooks nil)) + (require 'spam-stat)) + + (defun spam-check-stat () + "Check the spam-stat backend for the classification of this message" + (let ((spam-stat-split-fancy-spam-group spam-split-group) ; override + (spam-stat-buffer (buffer-name)) ; stat the current buffer + category return) + (spam-stat-split-fancy))) + + (defun spam-stat-register-spam-routine () + (spam-generic-register-routine + (lambda (article) + (let ((article-string (spam-get-article-as-string article))) + (with-temp-buffer + (insert article-string) + (spam-stat-buffer-is-spam)))) + nil)) + + (defun spam-stat-register-ham-routine () + (spam-generic-register-routine + nil + (lambda (article) + (let ((article-string (spam-get-article-as-string article))) + (with-temp-buffer + (insert article-string) + (spam-stat-buffer-is-non-spam)))))) + + (defun spam-maybe-spam-stat-load () + (when spam-use-stat (spam-stat-load))) + + (defun spam-maybe-spam-stat-save () + (when spam-use-stat (spam-stat-save)))) + + (file-error (progn + (defalias 'spam-maybe-spam-stat-load 'ignore) + (defalias 'spam-maybe-spam-stat-save 'ignore) + (defalias 'spam-stat-register-ham-routine 'ignore) + (defalias 'spam-stat-register-spam-routine 'ignore) + (defalias 'spam-stat-buffer-is-spam 'ignore) + (defalias 'spam-stat-buffer-is-non-spam 'ignore) + (defalias 'spam-stat-split-fancy 'ignore) + (defalias 'spam-stat-load 'ignore) + (defalias 'spam-stat-save 'ignore) + (defalias 'spam-check-stat 'ignore)))) + ;;;; Blacklists and whitelists. (defvar spam-whitelist-cache nil) @@ -635,18 +1028,24 @@ Uses `gnus-newsgroup-name' if category is nil (for ham registration)." (save-excursion (set-buffer (find-file-noselect file)) - (goto-char (point-max)) - (unless (bobp) - (insert "\n")) - (insert address "\n") - (save-buffer))) - -;;; returns nil if the sender is in the whitelist, spam-split-group otherwise + (goto-char (point-min)) + (unless (re-search-forward (regexp-quote address) nil t) + (goto-char (point-max)) + (unless (bobp) + (insert "\n")) + (insert address "\n") + (save-buffer)))) + +;;; returns t if the sender is in the whitelist, nil or spam-split-group otherwise (defun spam-check-whitelist () ;; FIXME! Should it detect when file timestamps change? (unless spam-whitelist-cache (setq spam-whitelist-cache (spam-parse-list spam-whitelist))) - (if (spam-from-listed-p spam-whitelist-cache) nil spam-split-group)) + (if (spam-from-listed-p spam-whitelist-cache) + t + (if spam-use-whitelist-exclusive + spam-split-group + nil))) (defun spam-check-blacklist () ;; FIXME! Should it detect when file timestamps change? @@ -662,20 +1061,25 @@ Uses `gnus-newsgroup-name' if category is nil (for ham registration)." (while (not (eobp)) (setq address (buffer-substring (point) (spam-point-at-eol))) (forward-line 1) + ;; insert the e-mail address if detected, otherwise the raw data (unless (zerop (length address)) - (setq address (regexp-quote address)) - (while (string-match "\\\\\\*" address) - (setq address (replace-match ".*" t t address))) - (push address contents)))) + (let ((pure-address (cadr (gnus-extract-address-components address)))) + (push (or pure-address address) contents))))) (nreverse contents)))) (defun spam-from-listed-p (cache) - (let ((from (message-fetch-field "from")) + (let ((from (nnmail-fetch-field "from")) found) (while cache - (when (string-match (pop cache) from) - (setq found t - cache nil))) + (let ((address (pop cache))) + (unless (zerop (length address)) ; 0 for a nil address too + (setq address (regexp-quote address)) + ;; fix regexp-quote's treatment of user-intended regexes + (while (string-match "\\\\\\*" address) + (setq address (replace-match ".*" t t address)))) + (when (and address (string-match address from)) + (setq found t + cache nil)))) found)) (defun spam-blacklist-register-routine () @@ -699,227 +1103,174 @@ Uses `gnus-newsgroup-name' if category is nil (for ham registration)." (spam-enter-whitelist from)))))) -;;;; Bogofilter - -;;; See Paul Graham article, at `http://www.paulgraham.com/spam.html'. - -;;; This page is for those wanting to control spam with the help of -;;; Eric Raymond's speedy Bogofilter, see -;;; http://www.tuxedo.org/~esr/bogofilter. This has been tested with -;;; a locally patched copy of version 0.4. - -;;; Make sure Bogofilter is installed. Bogofilter internally uses -;;; Judy fast associative arrays, so you need to install Judy first, -;;; and Bogofilter next. Fetch both distributions by visiting the -;;; following links and downloading the latest version of each: -;;; -;;; http://sourceforge.net/projects/judy/ -;;; http://www.tuxedo.org/~esr/bogofilter/ -;;; -;;; Unpack the Judy distribution and enter its main directory. Then do: -;;; -;;; ./configure -;;; make -;;; make install -;;; -;;; You will likely need to become super-user for the last step. -;;; Then, unpack the Bogofilter distribution and enter its main -;;; directory: -;;; -;;; make -;;; make install -;;; -;;; Here as well, you need to become super-user for the last step. -;;; Now, initialize your word lists by doing, under your own identity: -;;; -;;; mkdir ~/.bogofilter -;;; touch ~/.bogofilter/badlist -;;; touch ~/.bogofilter/goodlist -;;; -;;; These two files are text files you may edit, but you normally don't! - -;;; The `M-d' command gets added to Gnus summary mode, marking current -;;; article as spam, showing it with the `H' mark. Whenever you see a -;;; spam article, make sure to mark its summary line with `M-d' before -;;; leaving the group. Some groups, as per variable -;;; `spam-junk-mailgroups' below, receive articles from Gnus splitting -;;; on clues added by spam recognisers, so for these groups, we tack -;;; an `H' mark at group entry for all summary lines which would -;;; otherwise have no other mark. Make sure to _remove_ `H' marks for -;;; any article which is _not_ genuine spam, before leaving such -;;; groups: you may use `M-u' to "unread" the article, or `d' for -;;; declaring it read the non-spam way. When you leave a group, all -;;; `H' marked articles, saved or unsaved, are sent to Bogofilter -;;; which will study them as spam samples. - -;;; Messages may also be deleted in various other ways, and unless -;;; `spam-ham-marks-form' gets overridden below, marks `R' and `r' for -;;; default read or explicit delete, marks `X' and 'K' for automatic -;;; or explicit kills, as well as mark `Y' for low scores, are all -;;; considered to be associated with articles which are not spam. -;;; This assumption might be false, in particular if you use kill -;;; files or score files as means for detecting genuine spam, you -;;; should then adjust `spam-ham-marks-form'. When you leave a group, -;;; all _unsaved_ articles bearing any the above marks are sent to -;;; Bogofilter which will study these as not-spam samples. If you -;;; explicit kill a lot, you might sometimes end up with articles -;;; marked `K' which you never saw, and which might accidentally -;;; contain spam. Best is to make sure that real spam is marked with -;;; `H', and nothing else. - -;;; All other marks do not contribute to Bogofilter pre-conditioning. -;;; In particular, ticked, dormant or souped articles are likely to -;;; contribute later, when they will get deleted for real, so there is -;;; no need to use them prematurely. Explicitly expired articles do -;;; not contribute, command `E' is a way to get rid of an article -;;; without Bogofilter ever seeing it. - -;;; In a word, with a minimum of care for associating the `H' mark for -;;; spam articles only, Bogofilter training all gets fairly automatic. -;;; You should do this until you get a few hundreds of articles in -;;; each category, spam or not. The shell command `head -1 -;;; ~/.bogofilter/*' shows both article counts. The command `S S' in -;;; summary mode, either for debugging or for curiosity, triggers -;;; Bogofilter into displaying in another buffer the "spamicity" score -;;; of the current article (between 0.0 and 1.0), together with the -;;; article words which most significantly contribute to the score. - -;;; The real way for using Bogofilter, however, is to have some use -;;; tool like `procmail' for invoking it on message reception, then -;;; adding some recognisable header in case of detected spam. Gnus -;;; splitting rules might later trip on these added headers and react -;;; by sorting such articles into specific junk folders as per -;;; `spam-junk-mailgroups'. Here is a possible `.procmailrc' contents -;;; (still untested -- please tell me how it goes): -;;; -;;; :0HBf: -;;; * ? bogofilter -;;; | formail -bfI "X-Spam-Status: Yes" - -(defun spam-check-bogofilter () - ;; Dynamic spam check. I do not know how to check the exit status, - ;; so instead, read `bogofilter -v' output. - (when (and spam-use-bogofilter spam-bogofilter-path) - (spam-bogofilter-articles nil "-v" (list (gnus-summary-article-number))) - (when (save-excursion - (set-buffer spam-bogofilter-output-buffer-name) - (goto-char (point-min)) - (re-search-forward "Spamicity: \\(0\\.9\\|1\\.0\\)" nil t)) - spam-split-group))) +;;;; Spam-report glue +(defun spam-report-gmane-register-routine () + (spam-generic-register-routine + 'spam-report-gmane + nil)) + +;;;; Bogofilter +(defun spam-check-bogofilter-headers (&optional score) + (let ((header (nnmail-fetch-field spam-bogofilter-header))) + (when header ; return nil when no header + (if score ; scoring mode + (if (string-match "spamicity=\\([0-9.]+\\)" header) + (match-string 1 header) + "0") + ;; spam detection mode + (when (string-match spam-bogofilter-bogosity-positive-spam-header + header) + spam-split-group))))) + +;; return something sensible if the score can't be determined (defun spam-bogofilter-score () - "Use `bogofilter -v' on the current article. -This yields the 15 most discriminant words for this article and the -spamicity coefficient of each, and the overall article spamicity." + "Get the Bogofilter spamicity score" (interactive) - (when (and spam-use-bogofilter spam-bogofilter-path) - (spam-bogofilter-articles nil "-v" (list (gnus-summary-article-number))) - (with-current-buffer spam-bogofilter-output-buffer-name - (unless (zerop (buffer-size)) - (if (<= (count-lines (point-min) (point-max)) 1) - (progn - (goto-char (point-max)) - (when (bolp) - (backward-char 1)) - (message "%s" (buffer-substring (point-min) (point)))) - (goto-char (point-min)) - (display-buffer (current-buffer))))))) - -(defun spam-bogofilter-register-routine () - (let ((articles gnus-newsgroup-articles) - article mark ham-articles spam-articles spam-mark-values - ham-mark-values) + (save-window-excursion + (gnus-summary-show-article t) + (set-buffer gnus-article-buffer) + (let ((score (or (spam-check-bogofilter-headers t) + (spam-check-bogofilter t)))) + (message "Spamicity score %s" score) + (or score "0")) + (gnus-summary-show-article))) + +(defun spam-check-bogofilter (&optional score) + "Check the Bogofilter backend for the classification of this message" + (let ((article-buffer-name (buffer-name)) + return) + (with-temp-buffer + (let ((temp-buffer-name (buffer-name))) + (save-excursion + (set-buffer article-buffer-name) + (if spam-bogofilter-database-directory + (call-process-region (point-min) (point-max) + spam-bogofilter-path + nil temp-buffer-name nil "-v" + "-d" spam-bogofilter-database-directory) + (call-process-region (point-min) (point-max) spam-bogofilter-path + nil temp-buffer-name nil "-v"))) + (setq return (spam-check-bogofilter-headers score)))) + return)) - ;; marks are stored as symbolic values, so we have to dereference - ;; them for memq to work we wouldn't have to do this if - ;; gnus-summary-article-mark returned a symbol. - (dolist (mark spam-ham-marks) - (push (symbol-value mark) ham-mark-values)) +(defun spam-bogofilter-register-with-bogofilter (article-string spam) + "Register an article, given as a string, as spam or non-spam." + (when (stringp article-string) + (let ((switch (if spam spam-bogofilter-spam-switch + spam-bogofilter-ham-switch))) + (with-temp-buffer + (insert article-string) + (if spam-bogofilter-database-directory + (call-process-region (point-min) (point-max) + spam-bogofilter-path + nil nil nil "-v" switch + "-d" spam-bogofilter-database-directory) + (call-process-region (point-min) (point-max) spam-bogofilter-path + nil nil nil "-v" switch)))))) + +(defun spam-bogofilter-register-spam-routine () + (spam-generic-register-routine + (lambda (article) + (spam-bogofilter-register-with-bogofilter + (spam-get-article-as-string article) t)) + nil)) - (dolist (mark spam-spam-marks) - (push (symbol-value mark) spam-mark-values)) +(defun spam-bogofilter-register-ham-routine () + (spam-generic-register-routine + nil + (lambda (article) + (spam-bogofilter-register-with-bogofilter + (spam-get-article-as-string article) nil)))) - (while articles - (setq article (pop articles) - mark (gnus-summary-article-mark article)) - (cond ((memq mark spam-mark-values) (push article spam-articles)) - ((memq article gnus-newsgroup-saved)) - ((memq mark ham-mark-values) (push article ham-articles)))) - (when ham-articles - (spam-bogofilter-articles "ham" "-n" ham-articles)) - (when spam-articles - (spam-bogofilter-articles "SPAM" "-s" spam-articles)))) - -(defun spam-bogofilter-articles (type option articles) - (let ((output-buffer (get-buffer-create spam-bogofilter-output-buffer-name)) - (article-copy (get-buffer-create " *Bogofilter Article Copy*")) - (remove-regexp (concat spam-bogofilter-spaminfo-header-regexp - "\\|Xref:")) - (counter 0) - prefix process article) - (when type - (setq prefix (format "Studying %d articles as %s..." (length articles) - type)) - (message "%s" prefix)) - (save-excursion (set-buffer output-buffer) (erase-buffer)) - (setq process (start-process "bogofilter" output-buffer - spam-bogofilter-path "-F" option)) - (process-kill-without-query process t) - (unwind-protect - (save-window-excursion - (while articles - (setq counter (1+ counter)) - (when prefix - (message "%s %d" prefix counter)) - (setq article (pop articles)) - (gnus-summary-goto-subject article) - (gnus-summary-show-article t) - (gnus-eval-in-buffer-window article-copy - (insert-buffer-substring gnus-original-article-buffer) - ;; Remove spam classification redundant headers: they may induce - ;; unwanted biases in later analysis. - (message-remove-header remove-regexp t) - ;; Bogofilter really wants From envelopes for counting articles. - ;; Fake one at the beginning, make sure there will be no other. - (goto-char (point-min)) - (if (looking-at "From ") - (forward-line 1) - (insert "From nobody " (current-time-string) "\n")) - (let (case-fold-search) - (while (re-search-forward "^From " nil t) - (beginning-of-line) - (insert ">"))) - (process-send-region process (point-min) (point-max)) - (erase-buffer)))) - ;; Sending the EOF is unwind-protected. This is to prevent lost copies - ;; of `bogofilter', hung on reading their standard input, in case the - ;; whole registering process gets interrupted by the user. - (process-send-eof process)) - (kill-buffer article-copy) - ;; Receive process output. It sadly seems that we still have to protect - ;; ourselves against hung `bogofilter' processes. - (let ((status (process-status process)) - (timeout (* 1000 spam-bogofilter-initial-timeout)) - (quanta 200)) ; also counted in milliseconds - (while (and (not (eq status 'exit)) (> timeout 0)) - ;; `accept-process-output' timeout is counted in microseconds. - (setq timeout (if (accept-process-output process 0 (* 1000 quanta)) - (* 1000 spam-bogofilter-subsequent-timeout) - (- timeout quanta)) - status (process-status process))) - (if (eq status 'exit) - (when prefix - (message "%s done!" prefix)) - ;; Sigh! The process did time out... Become brutal! - (interrupt-process process) - (message "%s %d INTERRUPTED! (Article %d, status %s)" - (or prefix "Bogofilter process...") - counter article status) - ;; Give some time for user to read. Sitting redisplays but gives up - ;; if input is pending. Sleeping does not give up, but it does not - ;; redisplay either. Mix both: let's redisplay and not give up. - (sit-for 1) - (sleep-for 3))))) + +;;;; spamoracle +(defun spam-check-spamoracle () + "Run spamoracle on an article to determine whether it's spam." + (let ((article-buffer-name (buffer-name))) + (with-temp-buffer + (let ((temp-buffer-name (buffer-name))) + (save-excursion + (set-buffer article-buffer-name) + (let ((status + (apply 'call-process-region + (point-min) (point-max) + spam-spamoracle-binary + nil temp-buffer-name nil + (if spam-spamoracle-database + `("-f" ,spam-spamoracle-database "mark") + '("mark"))))) + (if (zerop status) + (progn + (set-buffer temp-buffer-name) + (goto-char (point-min)) + (when (re-search-forward "^X-Spam: yes;" nil t) + spam-split-group)) + (error "Error running spamoracle" status)))))))) + +(defun spam-spamoracle-learn (article article-is-spam-p) + "Run spamoracle in training mode." + (with-temp-buffer + (let ((temp-buffer-name (buffer-name))) + (save-excursion + (goto-char (point-min)) + (insert (spam-get-article-as-string article)) + (let* ((arg (if article-is-spam-p "-spam" "-good")) + (status + (apply 'call-process-region + (point-min) (point-max) + spam-spamoracle-binary + nil temp-buffer-name nil + (if spam-spamoracle-database + `("-f" ,spam-spamoracle-database + "add" ,arg) + `("add" ,arg))))) + (when (not (zerop status)) + (error "Error running spamoracle" status))))))) + +(defun spam-spamoracle-learn-ham () + (spam-generic-register-routine + nil + (lambda (article) + (spam-spamoracle-learn article nil)))) + +(defun spam-spamoracle-learn-spam () + (spam-generic-register-routine + (lambda (article) + (spam-spamoracle-learn article t)) + nil)) + +;;;; Hooks + +;;;###autoload +(defun spam-initialize () + "Install the spam.el hooks and do other initialization" + (interactive) + (setq spam-install-hooks t) + ;; TODO: How do we redo this every time spam-face is customized? + (push '((eq mark gnus-spam-mark) . spam-face) + gnus-summary-highlight) + ;; Add hooks for loading and saving the spam stats + (when spam-use-stat + (add-hook 'gnus-save-newsrc-hook 'spam-maybe-spam-stat-save) + (add-hook 'gnus-get-top-new-news-hook 'spam-maybe-spam-stat-load) + (add-hook 'gnus-startup-hook 'spam-maybe-spam-stat-load)) + (add-hook 'gnus-summary-prepare-exit-hook 'spam-summary-prepare-exit) + (add-hook 'gnus-summary-prepare-hook 'spam-summary-prepare) + (add-hook 'gnus-get-new-news-hook 'spam-setup-widening)) + +(defun spam-unload-hook () + "Uninstall the spam.el hooks" + (interactive) + (remove-hook 'gnus-save-newsrc-hook 'spam-maybe-spam-stat-save) + (remove-hook 'gnus-get-top-new-news-hook 'spam-maybe-spam-stat-load) + (remove-hook 'gnus-startup-hook 'spam-maybe-spam-stat-load) + (remove-hook 'gnus-summary-prepare-exit-hook 'spam-summary-prepare-exit) + (remove-hook 'gnus-summary-prepare-hook 'spam-summary-prepare) + (remove-hook 'gnus-get-new-news-hook 'spam-setup-widening)) + +(when spam-install-hooks + (spam-initialize)) (provide 'spam)