Oops.
[gnus] / lisp / spam.el
1 ;; TODO: spam scores, detection of spam in newsgroups, cross-server splitting, remote processing, training through files
2
3 ;;; spam.el --- Identifying spam
4 ;; Copyright (C) 2002, 2003 Free Software Foundation, Inc.
5
6 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
7 ;; Keywords: network
8
9 ;; This file is part of GNU Emacs.
10
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING.  If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Commentary:
27
28 ;;; This module addresses a few aspects of spam control under Gnus.  Page
29 ;;; breaks are used for grouping declarations and documentation relating to
30 ;;; each particular aspect.
31
32 ;;; The integration with Gnus is not yet complete.  See various `FIXME'
33 ;;; comments, below, for supplementary explanations or discussions.
34
35 ;;; Several TODO items are marked as such
36
37 ;;; Code:
38
39 (eval-when-compile (require 'cl))
40
41 (require 'gnus-sum)
42
43 (require 'gnus-uu)                      ; because of key prefix issues
44 ;;; for the definitions of group content classification and spam processors
45 (require 'gnus) 
46 (require 'message)              ;for the message-fetch-field functions
47
48 ;; for nnimap-split-download-body-default
49 (eval-when-compile (require 'nnimap))
50
51 ;; autoload executable-find
52 (eval-and-compile
53   ;; executable-find is not autoloaded in Emacs 20
54   (autoload 'executable-find "executable"))
55
56 ;; autoload query-dig
57 (eval-and-compile
58   (autoload 'query-dig "dig"))
59
60 ;; autoload spam-report
61 (eval-and-compile
62   (autoload 'spam-report-gmane "spam-report"))
63
64 ;; autoload gnus-registry
65 (eval-and-compile
66   (autoload 'gnus-registry-store-extra-entry "gnus-registry")
67   (autoload 'gnus-registry-fetch-extra "gnus-registry"))
68
69 ;; autoload query-dns
70 (eval-and-compile
71   (autoload 'query-dns "dns"))
72
73 ;;; Main parameters.
74
75 (defgroup spam nil
76   "Spam configuration.")
77
78 (defcustom spam-directory "~/News/spam/"
79   "Directory for spam whitelists and blacklists."
80   :type 'directory
81   :group 'spam)
82
83 (defcustom spam-move-spam-nonspam-groups-only t
84   "Whether spam should be moved in non-spam groups only.
85 When t, only ham and unclassified groups will have their spam moved
86 to the spam-process-destination.  When nil, spam will also be moved from
87 spam groups."
88   :type 'boolean
89   :group 'spam)
90
91 (defcustom spam-process-ham-in-nonham-groups nil
92   "Whether ham should be processed in non-ham groups."
93   :type 'boolean
94   :group 'spam)
95
96 (defcustom spam-log-to-registry nil
97   "Whether spam/ham processing should be logged in the registry."
98   :type 'boolean
99   :group 'spam)
100
101 (defcustom spam-process-ham-in-spam-groups nil
102   "Whether ham should be processed in spam groups."
103   :type 'boolean
104   :group 'spam)
105
106 (defcustom spam-mark-only-unseen-as-spam t
107   "Whether only unseen articles should be marked as spam in spam
108 groups.  When nil, all unread articles in a spam group are marked as
109 spam.  Set this if you want to leave an article unread in a spam group
110 without losing it to the automatic spam-marking process."
111   :type 'boolean
112   :group 'spam)
113
114 (defcustom spam-mark-ham-unread-before-move-from-spam-group nil
115   "Whether ham should be marked unread before it's moved out of a spam
116 group according to ham-process-destination.  This variable is an
117 official entry in the international Longest Variable Name
118 Competition."
119   :type 'boolean
120   :group 'spam)
121
122 (defcustom spam-whitelist (expand-file-name "whitelist" spam-directory)
123   "The location of the whitelist.
124 The file format is one regular expression per line.
125 The regular expression is matched against the address."
126   :type 'file
127   :group 'spam)
128
129 (defcustom spam-blacklist (expand-file-name "blacklist" spam-directory)
130   "The location of the blacklist.
131 The file format is one regular expression per line.
132 The regular expression is matched against the address."
133   :type 'file
134   :group 'spam)
135
136 (defcustom spam-use-dig t
137   "Whether query-dig should be used instead of query-dns."
138   :type 'boolean
139   :group 'spam)
140
141 (defcustom spam-use-blacklist nil
142   "Whether the blacklist should be used by spam-split."
143   :type 'boolean
144   :group 'spam)
145
146 (defcustom spam-blacklist-ignored-regexes nil
147   "Regular expressions that the blacklist should ignore."
148   :type '(repeat (regexp :tag "Regular expression to ignore when blacklisting"))
149   :group 'spam)
150
151 (defcustom spam-use-whitelist nil
152   "Whether the whitelist should be used by spam-split."
153   :type 'boolean
154   :group 'spam)
155
156 (defcustom spam-use-whitelist-exclusive nil
157   "Whether whitelist-exclusive should be used by spam-split.
158 Exclusive whitelisting means that all messages from senders not in the whitelist
159 are considered spam."
160   :type 'boolean
161   :group 'spam)
162
163 (defcustom spam-use-blackholes nil
164   "Whether blackholes should be used by spam-split."
165   :type 'boolean
166   :group 'spam)
167
168 (defcustom spam-use-hashcash nil
169   "Whether hashcash payments should be detected by spam-split."
170   :type 'boolean
171   :group 'spam)
172
173 (defcustom spam-use-regex-headers nil
174   "Whether a header regular expression match should be used by spam-split.
175 Also see the variables `spam-regex-headers-spam' and `spam-regex-headers-ham'."
176   :type 'boolean
177   :group 'spam)
178
179 (defcustom spam-use-regex-body nil
180   "Whether a body regular expression match should be used by spam-split.
181 Also see the variables `spam-regex-body-spam' and `spam-regex-body-ham'."
182   :type 'boolean
183   :group 'spam)
184
185 (defcustom spam-use-bogofilter-headers nil
186   "Whether bogofilter headers should be used by spam-split.
187 Enable this if you pre-process messages with Bogofilter BEFORE Gnus sees them."
188   :type 'boolean
189   :group 'spam)
190
191 (defcustom spam-use-bogofilter nil
192   "Whether bogofilter should be invoked by spam-split.
193 Enable this if you want Gnus to invoke Bogofilter on new messages."
194   :type 'boolean
195   :group 'spam)
196
197 (defcustom spam-use-BBDB nil
198   "Whether BBDB should be used by spam-split."
199   :type 'boolean
200   :group 'spam)
201
202 (defcustom spam-use-BBDB-exclusive nil
203   "Whether BBDB-exclusive should be used by spam-split.
204 Exclusive BBDB means that all messages from senders not in the BBDB are 
205 considered spam."
206   :type 'boolean
207   :group 'spam)
208
209 (defcustom spam-use-ifile nil
210   "Whether ifile should be used by spam-split."
211   :type 'boolean
212   :group 'spam)
213
214 (defcustom spam-use-stat nil
215   "Whether spam-stat should be used by spam-split."
216   :type 'boolean
217   :group 'spam)
218
219 (defcustom spam-use-spamoracle nil
220   "Whether spamoracle should be used by spam-split."
221   :type 'boolean
222   :group 'spam)
223
224 (defcustom spam-install-hooks (or
225                                spam-use-dig
226                                spam-use-blacklist
227                                spam-use-whitelist 
228                                spam-use-whitelist-exclusive 
229                                spam-use-blackholes 
230                                spam-use-hashcash 
231                                spam-use-regex-headers 
232                                spam-use-regex-body 
233                                spam-use-bogofilter-headers 
234                                spam-use-bogofilter 
235                                spam-use-BBDB 
236                                spam-use-BBDB-exclusive 
237                                spam-use-ifile 
238                                spam-use-stat
239                                spam-use-spamoracle)
240   "Whether the spam hooks should be installed, default to t if one of
241 the spam-use-* variables is set."
242   :group 'spam
243   :type 'boolean)
244
245 (defcustom spam-split-group "spam"
246   "Group name where incoming spam should be put by spam-split."
247   :type 'string
248   :group 'spam)
249
250 ;;; TODO: deprecate this variable, it's confusing since it's a list of strings,
251 ;;; not regular expressions
252 (defcustom spam-junk-mailgroups (cons 
253                                  spam-split-group 
254                                  '("mail.junk" "poste.pourriel"))
255   "Mailgroups with spam contents.
256 All unmarked article in such group receive the spam mark on group entry."
257   :type '(repeat (string :tag "Group"))
258   :group 'spam)
259
260 (defcustom spam-blackhole-servers '("bl.spamcop.net" "relays.ordb.org" 
261                                     "dev.null.dk" "relays.visi.com")
262   "List of blackhole servers."
263   :type '(repeat (string :tag "Server"))
264   :group 'spam)
265
266 (defcustom spam-blackhole-good-server-regex nil
267   "String matching IP addresses that should not be checked in the blackholes"
268   :type '(radio (const nil)
269                 (regexp :format "%t: %v\n" :size 0))
270   :group 'spam)
271
272 (defcustom spam-face 'gnus-splash-face
273   "Face for spam-marked articles"
274   :type 'face
275   :group 'spam)
276
277 (defcustom spam-regex-headers-spam '("^X-Spam-Flag: YES")
278   "Regular expression for positive header spam matches"
279   :type '(repeat (regexp :tag "Regular expression to match spam header"))
280   :group 'spam)
281
282 (defcustom spam-regex-headers-ham '("^X-Spam-Flag: NO")
283   "Regular expression for positive header ham matches"
284   :type '(repeat (regexp :tag "Regular expression to match ham header"))
285   :group 'spam)
286
287 (defcustom spam-regex-body-spam '()
288   "Regular expression for positive body spam matches"
289   :type '(repeat (regexp :tag "Regular expression to match spam body"))
290   :group 'spam)
291
292 (defcustom spam-regex-body-ham '()
293   "Regular expression for positive body ham matches"
294   :type '(repeat (regexp :tag "Regular expression to match ham body"))
295   :group 'spam)
296
297 (defgroup spam-ifile nil
298   "Spam ifile configuration."
299   :group 'spam)
300
301 (defcustom spam-ifile-path (executable-find "ifile")
302   "File path of the ifile executable program."
303   :type '(choice (file :tag "Location of ifile")
304                  (const :tag "ifile is not installed"))
305   :group 'spam-ifile)
306
307 (defcustom spam-ifile-database-path nil
308   "File path of the ifile database."
309   :type '(choice (file :tag "Location of the ifile database")
310                  (const :tag "Use the default"))
311   :group 'spam-ifile)
312
313 (defcustom spam-ifile-spam-category "spam"
314   "Name of the spam ifile category."  
315   :type 'string
316   :group 'spam-ifile)
317
318 (defcustom spam-ifile-ham-category nil
319   "Name of the ham ifile category.  If nil, the current group name will
320 be used."
321   :type '(choice (string :tag "Use a fixed category")
322                  (const :tag "Use the current group name"))
323   :group 'spam-ifile)
324
325 (defcustom spam-ifile-all-categories nil
326   "Whether the ifile check will return all categories, or just spam.
327 Set this to t if you want to use the spam-split invocation of ifile as
328 your main source of newsgroup names."
329   :type 'boolean
330   :group 'spam-ifile)
331
332 (defgroup spam-bogofilter nil
333   "Spam bogofilter configuration."
334   :group 'spam)
335
336 (defcustom spam-bogofilter-path (executable-find "bogofilter")
337   "File path of the Bogofilter executable program."
338   :type '(choice (file :tag "Location of bogofilter")
339                  (const :tag "Bogofilter is not installed"))
340   :group 'spam-bogofilter)
341
342 (defcustom spam-bogofilter-header "X-Bogosity"
343   "The header that Bogofilter inserts in messages."
344   :type 'string
345   :group 'spam-bogofilter)
346
347 (defcustom spam-bogofilter-spam-switch "-s"
348   "The switch that Bogofilter uses to register spam messages."
349   :type 'string
350   :group 'spam-bogofilter)
351
352 (defcustom spam-bogofilter-ham-switch "-n"
353   "The switch that Bogofilter uses to register ham messages."
354   :type 'string
355   :group 'spam-bogofilter)
356
357 (defcustom spam-bogofilter-spam-strong-switch "-S"
358   "The switch that Bogofilter uses to unregister ham messages."
359   :type 'string
360   :group 'spam-bogofilter)
361
362 (defcustom spam-bogofilter-ham-strong-switch "-N"
363   "The switch that Bogofilter uses to unregister spam messages."
364   :type 'string
365   :group 'spam-bogofilter)
366
367 (defcustom spam-bogofilter-bogosity-positive-spam-header "^\\(Yes\\|Spam\\)"
368   "The regex on `spam-bogofilter-header' for positive spam identification."
369   :type 'regexp
370   :group 'spam-bogofilter)
371
372 (defcustom spam-bogofilter-database-directory nil
373   "Directory path of the Bogofilter databases."
374   :type '(choice (directory 
375                   :tag "Location of the Bogofilter database directory")
376                  (const :tag "Use the default"))
377   :group 'spam-ifile)
378
379 (defgroup spam-spamoracle nil
380   "Spam spamoracle configuration."
381   :group 'spam)
382
383 (defcustom spam-spamoracle-database nil 
384   "Location of spamoracle database file. When nil, use the default
385 spamoracle database."
386   :type '(choice (directory :tag "Location of spamoracle database file.")
387                  (const :tag "Use the default"))
388   :group 'spam-spamoracle)
389
390 (defcustom spam-spamoracle-binary (executable-find "spamoracle")
391   "Location of the spamoracle binary."
392   :type '(choice (directory :tag "Location of the spamoracle binary")
393                  (const :tag "Use the default"))
394   :group 'spam-spamoracle)
395
396 ;;; Key bindings for spam control.
397
398 (gnus-define-keys gnus-summary-mode-map
399   "St" spam-bogofilter-score
400   "Sx" gnus-summary-mark-as-spam
401   "Mst" spam-bogofilter-score
402   "Msx" gnus-summary-mark-as-spam
403   "\M-d" gnus-summary-mark-as-spam)
404
405 (defvar spam-old-ham-articles nil
406   "List of old ham articles, generated when a group is entered.")
407
408 (defvar spam-old-spam-articles nil
409   "List of old spam articles, generated when a group is entered.")
410
411
412 ;; convenience functions
413 (defun spam-xor (a b) ; logical exclusive or
414   (and (or a b) (not (and a b))))
415
416 (defun spam-group-ham-mark-p (group mark &optional spam)
417   (when (stringp group)
418     (let* ((marks (spam-group-ham-marks group spam))
419            (marks (if (symbolp mark) 
420                       marks 
421                     (mapcar 'symbol-value marks))))
422       (memq mark marks))))
423
424 (defun spam-group-spam-mark-p (group mark)
425   (spam-group-ham-mark-p group mark t))
426
427 (defun spam-group-ham-marks (group &optional spam)
428   (when (stringp group)
429     (let* ((marks (if spam
430                       (gnus-parameter-spam-marks group)
431                     (gnus-parameter-ham-marks group)))
432            (marks (car marks))
433            (marks (if (listp (car marks)) (car marks) marks)))
434       marks)))
435
436 (defun spam-group-spam-marks (group)
437   (spam-group-ham-marks group t))
438
439 (defun spam-group-spam-contents-p (group)
440   (if (stringp group)
441       (or (member group spam-junk-mailgroups)
442           (memq 'gnus-group-spam-classification-spam 
443                 (gnus-parameter-spam-contents group)))
444     nil))
445   
446 (defun spam-group-ham-contents-p (group)
447   (if (stringp group)
448       (memq 'gnus-group-spam-classification-ham 
449             (gnus-parameter-spam-contents group))
450     nil))
451
452 (defvar spam-list-of-processors
453   '((gnus-group-spam-exit-processor-report-gmane spam spam-use-gmane)
454     (gnus-group-spam-exit-processor-bogofilter   spam spam-use-bogofilter)
455     (gnus-group-spam-exit-processor-blacklist    spam spam-use-blacklist)
456     (gnus-group-spam-exit-processor-ifile        spam spam-use-ifile)
457     (gnus-group-spam-exit-processor-stat         spam spam-use-stat)
458     (gnus-group-spam-exit-processor-spamoracle   spam spam-use-spamoracle)
459     (gnus-group-ham-exit-processor-ifile         ham spam-use-ifile)
460     (gnus-group-ham-exit-processor-bogofilter    ham spam-use-bogofilter)
461     (gnus-group-ham-exit-processor-stat          ham spam-use-stat)
462     (gnus-group-ham-exit-processor-whitelist     ham spam-use-whitelist)
463     (gnus-group-ham-exit-processor-BBDB          ham spam-use-BBDB)
464     (gnus-group-ham-exit-processor-copy          ham spam-use-ham-copy)
465     (gnus-group-ham-exit-processor-spamoracle    ham spam-use-spamoracle))
466   "The spam-list-of-processors list contains pairs associating a
467 ham/spam exit processor variable with a classification and a
468 spam-use-* variable.")
469
470 (defun spam-group-processor-p (group processor)
471   (if (and (stringp group)
472            (symbolp processor))
473       (or (member processor (nth 0 (gnus-parameter-spam-process group)))
474           (spam-group-processor-multiple-p 
475            group 
476            (cdr-safe (assoc processor spam-list-of-processors))))
477     nil))
478
479 (defun spam-group-processor-multiple-p (group processor-info)
480   (let* ((classification (nth 0 processor-info))
481          (check (nth 1 processor-info))
482          (parameters (nth 0 (gnus-parameter-spam-process group)))
483          found)
484     (dolist (parameter parameters)
485       (when (and (null found)
486                  (listp parameter)
487                  (eq classification (nth 0 parameter))
488                  (eq check (nth 1 parameter)))
489         (setq found t)))
490     found))
491
492 (defun spam-group-spam-processor-report-gmane-p (group)
493   (spam-group-processor-p group 'gnus-group-spam-exit-processor-report-gmane))
494
495 (defun spam-group-spam-processor-bogofilter-p (group)
496   (spam-group-processor-p group 'gnus-group-spam-exit-processor-bogofilter))
497
498 (defun spam-group-spam-processor-blacklist-p (group)
499   (spam-group-processor-p group 'gnus-group-spam-exit-processor-blacklist))
500
501 (defun spam-group-spam-processor-ifile-p (group)
502   (spam-group-processor-p group 'gnus-group-spam-exit-processor-ifile))
503
504 (defun spam-group-ham-processor-ifile-p (group)
505   (spam-group-processor-p group 'gnus-group-ham-exit-processor-ifile))
506
507 (defun spam-group-spam-processor-spamoracle-p (group)
508   (spam-group-processor-p group 'gnus-group-spam-exit-processor-spamoracle))
509
510 (defun spam-group-ham-processor-bogofilter-p (group)
511   (spam-group-processor-p group 'gnus-group-ham-exit-processor-bogofilter))
512
513 (defun spam-group-spam-processor-stat-p (group)
514   (spam-group-processor-p group 'gnus-group-spam-exit-processor-stat))
515
516 (defun spam-group-ham-processor-stat-p (group)
517   (spam-group-processor-p group 'gnus-group-ham-exit-processor-stat))
518
519 (defun spam-group-ham-processor-whitelist-p (group)
520   (spam-group-processor-p group 'gnus-group-ham-exit-processor-whitelist))
521
522 (defun spam-group-ham-processor-BBDB-p (group)
523   (spam-group-processor-p group 'gnus-group-ham-exit-processor-BBDB))
524
525 (defun spam-group-ham-processor-copy-p (group)
526   (spam-group-processor-p group 'gnus-group-ham-exit-processor-copy))
527
528 (defun spam-group-ham-processor-spamoracle-p (group)
529   (spam-group-processor-p group 'gnus-group-ham-exit-processor-spamoracle))
530
531 ;;; Summary entry and exit processing.
532
533 (defun spam-summary-prepare ()
534   (setq spam-old-ham-articles 
535         (spam-list-articles gnus-newsgroup-articles 'ham))
536   (setq spam-old-spam-articles 
537         (spam-list-articles gnus-newsgroup-articles 'spam))
538   (spam-mark-junk-as-spam-routine))
539
540 ;; The spam processors are invoked for any group, spam or ham or neither
541 (defun spam-summary-prepare-exit ()
542   (unless gnus-group-is-exiting-without-update-p
543     (gnus-message 6 "Exiting summary buffer and applying spam rules")
544
545     ;; first of all, unregister any articles that are no longer ham or spam
546     ;; we have to iterate over the processors, or else we'll be too slow
547     (dolist (classification '(spam ham))
548       (let* ((old-articles (if (eq classification 'spam)
549                                spam-old-spam-articles 
550                              spam-old-ham-articles))
551              (new-articles (spam-list-articles 
552                             gnus-newsgroup-articles 
553                             classification))
554              (changed-articles (gnus-set-difference old-articles new-articles)))
555         ;; now that we have the changed articles, we go through the processors
556         (dolist (processor-param spam-list-of-processors)
557           (let ((processor (nth 0 processor-param))
558                 (processor-classification (nth 1 processor-param))
559                 (check (nth 2 processor-param))
560                 unregister-list)
561             (dolist (article changed-articles)
562               (let ((id (spam-fetch-field-message-id-fast article)))
563                 (when (spam-log-unregistration-needed-p 
564                        id 'process classification check)
565                   (push article unregister-list))))
566             ;; call spam-register-routine with specific articles to unregister,
567             ;; when there are articles to unregister and the check is enabled
568             (when (and unregister-list (symbol-value check))
569               (spam-register-routine classification check t unregister-list))))))
570       
571     ;; find all the spam processors applicable to this group
572     (dolist (processor-param spam-list-of-processors)
573       (let ((processor (nth 0 processor-param))
574             (classification (nth 1 processor-param))
575             (check (nth 2 processor-param)))
576         (when (and (eq 'spam classification)
577                    (spam-group-processor-p gnus-newsgroup-name processor))
578           (spam-register-routine classification check))))
579
580     (if spam-move-spam-nonspam-groups-only      
581         (when (not (spam-group-spam-contents-p gnus-newsgroup-name))
582           (spam-mark-spam-as-expired-and-move-routine
583            (gnus-parameter-spam-process-destination gnus-newsgroup-name)))
584       (gnus-message 5 "Marking spam as expired and moving it to %s" 
585                     gnus-newsgroup-name)
586       (spam-mark-spam-as-expired-and-move-routine 
587        (gnus-parameter-spam-process-destination gnus-newsgroup-name)))
588
589     ;; now we redo spam-mark-spam-as-expired-and-move-routine to only
590     ;; expire spam, in case the above did not expire them
591     (gnus-message 5 "Marking spam as expired without moving it")
592     (spam-mark-spam-as-expired-and-move-routine nil)
593
594     (when (or (spam-group-ham-contents-p gnus-newsgroup-name)
595               (and (spam-group-spam-contents-p gnus-newsgroup-name)
596                    spam-process-ham-in-spam-groups)
597               spam-process-ham-in-nonham-groups)
598       ;; find all the ham processors applicable to this group
599       (dolist (processor-param spam-list-of-processors)
600         (let ((processor (nth 0 processor-param))
601               (classification (nth 1 processor-param))
602               (check (nth 2 processor-param)))
603           (when (and (eq 'ham classification)
604                      (spam-group-processor-p gnus-newsgroup-name processor))
605             (spam-register-routine classification check)))))
606
607     (when (spam-group-ham-processor-copy-p gnus-newsgroup-name)
608       (gnus-message 5 "Copying ham")
609       (spam-ham-copy-routine
610        (gnus-parameter-ham-process-destination gnus-newsgroup-name)))
611
612     ;; now move all ham articles out of spam groups
613     (when (spam-group-spam-contents-p gnus-newsgroup-name)
614       (gnus-message 5 "Moving ham messages from spam group")
615       (spam-ham-move-routine
616        (gnus-parameter-ham-process-destination gnus-newsgroup-name))))
617
618   (setq spam-old-ham-articles nil)
619   (setq spam-old-spam-articles nil))
620
621 (defun spam-mark-junk-as-spam-routine ()
622   ;; check the global list of group names spam-junk-mailgroups and the
623   ;; group parameters
624   (when (spam-group-spam-contents-p gnus-newsgroup-name)
625     (gnus-message 5 "Marking %s articles as spam"
626                   (if spam-mark-only-unseen-as-spam 
627                       "unseen"
628                     "unread"))
629     (let ((articles (if spam-mark-only-unseen-as-spam 
630                         gnus-newsgroup-unseen
631                       gnus-newsgroup-unreads)))
632       (dolist (article articles)
633         (gnus-summary-mark-article article gnus-spam-mark)))))
634
635 (defun spam-mark-spam-as-expired-and-move-routine (&rest groups)
636   (if (and (car-safe groups) (listp (car-safe groups)))
637       (apply 'spam-mark-spam-as-expired-and-move-routine (car groups))
638     (gnus-summary-kill-process-mark)
639     (let ((articles gnus-newsgroup-articles)
640           (backend-supports-deletions
641            (gnus-check-backend-function
642             'request-move-article gnus-newsgroup-name))
643           article tomove deletep)
644       (dolist (article articles)
645         (when (eq (gnus-summary-article-mark article) gnus-spam-mark)
646           (gnus-summary-mark-article article gnus-expirable-mark)
647           (push article tomove)))
648     
649       ;; now do the actual copies
650       (dolist (group groups)
651         (when (and tomove
652                    (stringp group))
653           (dolist (article tomove)
654             (gnus-summary-set-process-mark article))
655           (when tomove
656             (if (or (not backend-supports-deletions)
657                     (> (length groups) 1))
658                 (progn 
659                   (gnus-summary-copy-article nil group)
660                   (setq deletep t))
661               (gnus-summary-move-article nil group)))))
662     
663       ;; now delete the articles, if there was a copy done, and the
664       ;; backend allows it
665       (when (and deletep backend-supports-deletions)
666         (dolist (article tomove)
667           (gnus-summary-set-process-mark article))
668         (when tomove
669           (let ((gnus-novice-user nil)) ; don't ask me if I'm sure
670             (gnus-summary-delete-article nil))))
671     
672       (gnus-summary-yank-process-mark))))
673  
674 (defun spam-ham-copy-or-move-routine (copy groups)
675   (gnus-summary-kill-process-mark)
676   (let ((articles gnus-newsgroup-articles)
677         (backend-supports-deletions
678          (gnus-check-backend-function
679           'request-move-article gnus-newsgroup-name))
680         (respool-method (gnus-find-method-for-group gnus-newsgroup-name))
681         article mark todo deletep respool)
682     (dolist (article articles)
683       (when (spam-group-ham-mark-p gnus-newsgroup-name
684                                    (gnus-summary-article-mark article))
685         (push article todo)))
686
687     (when (member 'respool groups)
688       (setq respool t)                  ; boolean for later
689       (setq groups '("fake"))) ; when respooling, groups are dynamic so fake it
690
691     ;; now do the actual move
692     (dolist (group groups)
693       (when (and todo (stringp group))
694         (dolist (article todo)
695           (when spam-mark-ham-unread-before-move-from-spam-group
696             (gnus-summary-mark-article article gnus-unread-mark))
697           (gnus-summary-set-process-mark article))
698
699         (if respool                ; respooling is with a "fake" group
700             (gnus-summary-respool-article nil respool-method)
701           (if (or (not backend-supports-deletions) ; else, we are not respooling
702                   (> (length groups) 1))
703               (progn                ; if copying, copy and set deletep
704                 (gnus-summary-copy-article nil group)
705                 (setq deletep t))
706             (gnus-summary-move-article nil group))))) ; else move articles
707     
708     ;; now delete the articles, unless a) copy is t, and there was a copy done
709     ;;                                 b) a move was done to a single group
710     ;;                                 c) backend-supports-deletions is nil
711     (unless copy
712       (when (and deletep backend-supports-deletions)
713         (dolist (article todo)
714           (gnus-summary-set-process-mark article))
715         (when todo
716           (let ((gnus-novice-user nil)) ; don't ask me if I'm sure
717             (gnus-summary-delete-article nil))))))
718   
719   (gnus-summary-yank-process-mark))
720  
721 (defun spam-ham-copy-routine (&rest groups)
722   (if (and (car-safe groups) (listp (car-safe groups)))
723       (apply 'spam-ham-copy-routine (car groups))
724     (spam-ham-copy-or-move-routine t groups)))
725  
726 (defun spam-ham-move-routine (&rest groups)
727   (if (and (car-safe groups) (listp (car-safe groups)))
728       (apply 'spam-ham-move-routine (car groups))
729     (spam-ham-copy-or-move-routine nil groups)))
730  
731 (eval-and-compile
732   (defalias 'spam-point-at-eol (if (fboundp 'point-at-eol)
733                                    'point-at-eol
734                                  'line-end-position)))
735
736 (defun spam-get-article-as-string (article)
737   (let ((article-buffer (spam-get-article-as-buffer article))
738         article-string)
739     (when article-buffer
740       (save-window-excursion
741         (set-buffer article-buffer)
742         (setq article-string (buffer-string))))
743     article-string))
744
745 (defun spam-get-article-as-buffer (article)
746   (let ((article-buffer))
747     (when (numberp article)
748       (save-window-excursion
749         (gnus-summary-goto-subject article)
750         (gnus-summary-show-article t)
751         (setq article-buffer (get-buffer gnus-article-buffer))))
752     article-buffer))
753
754 ;; disabled for now
755 ;; (defun spam-get-article-as-filename (article)
756 ;;   (let ((article-filename))
757 ;;     (when (numberp article)
758 ;;       (nnml-possibly-change-directory 
759 ;;        (gnus-group-real-name gnus-newsgroup-name))
760 ;;       (setq article-filename (expand-file-name 
761 ;;                              (int-to-string article) nnml-current-directory)))
762 ;;     (if (file-exists-p article-filename)
763 ;;      article-filename
764 ;;       nil)))
765
766 (defun spam-fetch-field-from-fast (article)
767   "Fetch the `from' field quickly, using the internal gnus-data-list function"
768   (if (and (numberp article)
769            (assoc article (gnus-data-list nil)))
770       (mail-header-from 
771        (gnus-data-header (assoc article (gnus-data-list nil))))
772     nil))
773
774 (defun spam-fetch-field-subject-fast (article)
775   "Fetch the `subject' field quickly, using the internal
776   gnus-data-list function"
777   (if (and (numberp article)
778            (assoc article (gnus-data-list nil)))
779       (mail-header-subject 
780        (gnus-data-header (assoc article (gnus-data-list nil))))
781     nil))
782
783 (defun spam-fetch-field-message-id-fast (article)
784   "Fetch the `Message-ID' field quickly, using the internal
785   gnus-data-list function"
786   (if (and (numberp article)
787            (assoc article (gnus-data-list nil)))
788       (mail-header-message-id 
789        (gnus-data-header (assoc article (gnus-data-list nil))))
790     nil))
791
792 \f
793 ;;;; Spam determination.
794
795 (defvar spam-list-of-checks
796   '((spam-use-blacklist          . spam-check-blacklist)
797     (spam-use-regex-headers      . spam-check-regex-headers)
798     (spam-use-regex-body         . spam-check-regex-body)
799     (spam-use-whitelist          . spam-check-whitelist)
800     (spam-use-BBDB               . spam-check-BBDB)
801     (spam-use-ifile              . spam-check-ifile)
802     (spam-use-spamoracle         . spam-check-spamoracle)
803     (spam-use-stat               . spam-check-stat)
804     (spam-use-blackholes         . spam-check-blackholes)
805     (spam-use-hashcash           . spam-check-hashcash)
806     (spam-use-bogofilter-headers . spam-check-bogofilter-headers)
807     (spam-use-bogofilter         . spam-check-bogofilter))
808   "The spam-list-of-checks list contains pairs associating a parameter
809 variable with a spam checking function.  If the parameter variable is
810 true, then the checking function is called, and its value decides what
811 happens.  Each individual check may return nil, t, or a mailgroup
812 name.  The value nil means that the check does not yield a decision,
813 and so, that further checks are needed.  The value t means that the
814 message is definitely not spam, and that further spam checks should be
815 inhibited.  Otherwise, a mailgroup name is returned where the mail
816 should go, and further checks are also inhibited.  The usual mailgroup
817 name is the value of `spam-split-group', meaning that the message is
818 definitely a spam.")
819
820 (defvar spam-list-of-statistical-checks 
821   '(spam-use-ifile
822     spam-use-regex-body 
823     spam-use-stat 
824     spam-use-bogofilter
825     spam-use-spamoracle)
826   "The spam-list-of-statistical-checks list contains all the mail
827 splitters that need to have the full message body available.")
828
829 ;;;TODO: modify to invoke self with each check if invoked without specifics
830 (defun spam-split (&rest specific-checks)
831   "Split this message into the `spam' group if it is spam.
832 This function can be used as an entry in `nnmail-split-fancy',
833 for example like this: (: spam-split).  It can take checks as
834 parameters.  A string as a parameter will set the
835 spam-split-group to that string.
836
837 See the Info node `(gnus)Fancy Mail Splitting' for more details."
838   (interactive)
839   (let ((spam-split-group-choice spam-split-group))
840     (dolist (check specific-checks)
841       (when (stringp check)
842         (setq spam-split-group-choice check)
843         (setq specific-checks (delq check specific-checks))))
844
845     (let ((spam-split-group spam-split-group-choice))
846       (save-excursion
847         (save-restriction
848           (dolist (check spam-list-of-statistical-checks)
849             (when (and (symbolp check) (symbol-value check))
850               (widen)
851               (gnus-message 8 "spam-split: widening the buffer (%s requires it)"
852                             (symbol-name check))
853               (return)))
854           ;;   (progn (widen) (debug (buffer-string)))
855           (let ((list-of-checks spam-list-of-checks)
856                 decision)
857             (while (and list-of-checks (not decision))
858               (let ((pair (pop list-of-checks)))
859                 (when (and (symbol-value (car pair))
860                            (or (null specific-checks)
861                                (memq (car pair) specific-checks)))
862                   (gnus-message 5 "spam-split: calling the %s function" 
863                                 (symbol-name (cdr pair)))
864                   (setq decision (funcall (cdr pair))))))
865             (if (eq decision t)
866                 nil
867               decision)))))))
868
869 (defvar spam-registration-functions
870   ;; first the ham register, second the spam register function
871   ;; third the ham unregister, fourth the spam unregister function
872   '((spam-use-blacklist  nil 
873                          spam-blacklist-register-routine
874                          nil
875                          spam-blacklist-unregister-routine)
876     (spam-use-whitelist  spam-whitelist-register-routine
877                          nil
878                          spam-whitelist-unregister-routine
879                          nil)
880     (spam-use-BBDB       spam-BBDB-register-routine 
881                          nil
882                          spam-BBDB-unregister-routine 
883                          nil)
884     (spam-use-ifile      spam-ifile-register-ham-routine 
885                          spam-ifile-register-spam-routine
886                          spam-ifile-unregister-ham-routine 
887                          spam-ifile-unregister-spam-routine)
888     (spam-use-spamoracle spam-spamoracle-learn-ham 
889                          spam-spamoracle-learn-spam
890                          spam-spamoracle-unlearn-ham 
891                          spam-spamoracle-unlearn-spam)
892     (spam-use-stat       spam-stat-register-ham-routine 
893                          spam-stat-register-spam-routine
894                          spam-stat-unregister-ham-routine 
895                          spam-stat-unregister-spam-routine)
896     ;; note that spam-use-gmane is not a legitimate check
897     (spam-use-gmane      nil 
898                          spam-report-gmane-register-routine
899                          ;; does Gmane support unregistration?
900                          nil
901                          nil)
902     (spam-use-bogofilter spam-bogofilter-register-ham-routine 
903                          spam-bogofilter-register-spam-routine
904                          spam-bogofilter-unregister-ham-routine 
905                          spam-bogofilter-unregister-spam-routine))
906   "The spam-registration-functions list contains pairs
907 associating a parameter variable with the ham and spam
908 registration functions, and the ham and spam unregistration
909 functions")
910
911 (defun spam-classification-valid-p (classification)
912   (or  (eq classification 'spam)
913        (eq classification 'ham)))
914
915 (defun spam-process-type-valid-p (process-type)
916   (or  (eq process-type 'incoming)
917        (eq process-type 'process)))
918
919 (defun spam-registration-check-valid-p (check)
920   (assoc check spam-registration-functions))
921
922 (defun spam-unregistration-check-valid-p (check)
923   (assoc check spam-registration-functions))
924
925 (defun spam-registration-function (classification check)
926   (let ((flist (cdr-safe (assoc check spam-registration-functions))))
927     (if (eq classification 'spam)
928         (nth 1 flist)
929       (nth 0 flist))))
930
931 (defun spam-unregistration-function (classification check)
932   (let ((flist (cdr-safe (assoc check spam-registration-functions))))
933     (if (eq classification 'spam)
934         (nth 3 flist)
935       (nth 2 flist))))
936
937 (defun spam-list-articles (articles classification)
938   (let ((mark-check (if (eq classification 'spam) 
939                         'spam-group-spam-mark-p 
940                       'spam-group-ham-mark-p))
941         mark list)
942     (dolist (article articles)
943       (when (funcall mark-check 
944                      gnus-newsgroup-name 
945                      (gnus-summary-article-mark article))
946         (push article list)))
947     list))
948
949 (defun spam-register-routine (classification 
950                               check 
951                               &optional unregister 
952                               specific-articles)
953   (when (and (spam-classification-valid-p classification)
954              (spam-registration-check-valid-p check))
955     (let* ((register-function
956             (spam-registration-function classification check))
957            (unregister-function
958             (spam-unregistration-function classification check))
959            (run-function (if unregister 
960                              unregister-function 
961                            register-function))
962            (log-function (if unregister
963                              'spam-log-undo-registration
964                            'spam-log-processing-to-registry))
965            article articles)
966
967       (when run-function
968         ;; make list of articles, using specific-articles if given
969         (setq articles (or specific-articles
970                            (spam-list-articles 
971                             gnus-newsgroup-articles 
972                             classification)))
973         ;; process them
974         (gnus-message 5 "%s %d %s articles with classification %s, check %s"
975                       (if unregister "Unregistering" "Registering")
976                       (length articles)
977                       (if specific-articles "specific" "")
978                       (symbol-name classification)
979                       (symbol-name check))
980         (funcall run-function articles)
981         ;; now log all the registrations (or undo them, depending on unregister)
982         (dolist (article articles)
983           (funcall log-function
984                    (spam-fetch-field-message-id-fast article)
985                    'process
986                    classification
987                    check
988                    gnus-newsgroup-name))))))
989
990 ;;; log a ham- or spam-processor invocation to the registry
991 (defun spam-log-processing-to-registry (id type classification check group)
992   (when spam-log-to-registry
993     (if (and (stringp id)
994              (stringp group)
995              (spam-process-type-valid-p type)
996              (spam-classification-valid-p classification)
997              (spam-registration-check-valid-p check))
998         (let ((cell-list (cdr-safe (gnus-registry-fetch-extra id type)))
999               (cell (list classification check group)))
1000           (push cell cell-list)
1001           (gnus-registry-store-extra-entry
1002            id
1003            type
1004            cell-list))
1005
1006       (gnus-message 5 (format "%s called with bad ID, type, classification, check, or group"
1007                               "spam-log-processing-to-registry")))))
1008
1009 ;;; check if a ham- or spam-processor registration needs to be undone
1010 (defun spam-log-unregistration-needed-p (id type classification check)
1011   (when spam-log-to-registry
1012     (if (and (stringp id)
1013              (spam-process-type-valid-p type)
1014              (spam-classification-valid-p classification)
1015              (spam-registration-check-valid-p check))
1016         (let ((cell-list (cdr-safe (gnus-registry-fetch-extra id type)))
1017               found)
1018           (dolist (cell cell-list)
1019             (unless found
1020               (when (and (eq classification (nth 0 cell))
1021                          (eq check (nth 1 cell)))
1022                 (setq found t))))
1023           found)
1024       (progn 
1025         (gnus-message 5 (format "%s called with bad ID, type, classification, or check"
1026                                 "spam-log-unregistration-needed-p"))
1027         nil))))
1028
1029
1030 ;;; undo a ham- or spam-processor registration (the group is not used)
1031 (defun spam-log-undo-registration (id type classification check &optional group)
1032   (when (and spam-log-to-registry
1033              (spam-log-unregistration-needed-p id type classification check))
1034     (if (and (stringp id)
1035              (spam-process-type-valid-p type)
1036              (spam-classification-valid-p classification)
1037              (spam-registration-check-valid-p check))
1038         (let ((cell-list (cdr-safe (gnus-registry-fetch-extra id type)))
1039               new-cell-list found)
1040           (dolist (cell cell-list)
1041             (unless (and (eq classification (nth 0 cell))
1042                          (eq check (nth 1 cell)))
1043               (push cell new-cell-list)))
1044           (gnus-registry-store-extra-entry
1045            id
1046            type
1047            new-cell-list))
1048       (progn 
1049         (gnus-message 5 (format "%s called with bad ID, type, check, or group"
1050                                 "spam-log-undo-registration"))
1051         nil))))
1052
1053 ;;; set up IMAP widening if it's necessary  
1054 (defun spam-setup-widening ()
1055   (dolist (check spam-list-of-statistical-checks)
1056     (when (symbol-value check)
1057       (setq nnimap-split-download-body-default t))))
1058
1059 \f
1060 ;;;; Regex body
1061
1062 (defun spam-check-regex-body ()
1063   (let ((spam-regex-headers-ham spam-regex-body-ham)
1064         (spam-regex-headers-spam spam-regex-body-spam))
1065     (spam-check-regex-headers t)))
1066
1067 \f
1068 ;;;; Regex headers
1069
1070 (defun spam-check-regex-headers (&optional body)
1071   (let ((type (if body "body" "header"))
1072         ret found)
1073     (dolist (h-regex spam-regex-headers-ham)
1074       (unless found
1075         (goto-char (point-min))
1076         (when (re-search-forward h-regex nil t)
1077           (message "Ham regex %s search positive." type)
1078           (setq found t))))
1079     (dolist (s-regex spam-regex-headers-spam)
1080       (unless found
1081         (goto-char (point-min))
1082         (when (re-search-forward s-regex nil t)
1083           (message "Spam regex %s search positive." type)
1084           (setq found t)
1085           (setq ret spam-split-group))))
1086     ret))
1087
1088 \f
1089 ;;;; Blackholes.
1090
1091 (defun spam-reverse-ip-string (ip)
1092   (when (stringp ip)
1093     (mapconcat 'identity
1094                (nreverse (split-string ip "\\."))
1095                ".")))
1096
1097 (defun spam-check-blackholes ()
1098   "Check the Received headers for blackholed relays."
1099   (let ((headers (nnmail-fetch-field "received"))
1100         ips matches)
1101     (when headers
1102       (with-temp-buffer
1103         (insert headers)
1104         (goto-char (point-min))
1105         (gnus-message 5 "Checking headers for relay addresses")
1106         (while (re-search-forward
1107                 "\\([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+\\)" nil t)
1108           (gnus-message 9 "Blackhole search found host IP %s." (match-string 1))
1109           (push (spam-reverse-ip-string (match-string 1))
1110                 ips)))
1111       (dolist (server spam-blackhole-servers)
1112         (dolist (ip ips)
1113           (unless (and spam-blackhole-good-server-regex
1114                        ;; match the good-server-regex against the reversed (again) IP string
1115                        (string-match 
1116                         spam-blackhole-good-server-regex
1117                         (spam-reverse-ip-string ip)))
1118             (unless matches
1119               (let ((query-string (concat ip "." server)))
1120                 (if spam-use-dig
1121                     (let ((query-result (query-dig query-string)))
1122                       (when query-result
1123                         (gnus-message 5 "(DIG): positive blackhole check '%s'" 
1124                                       query-result)
1125                         (push (list ip server query-result)
1126                               matches)))
1127                   ;; else, if not using dig.el
1128                   (when (query-dns query-string)
1129                     (gnus-message 5 "positive blackhole check")
1130                     (push (list ip server (query-dns query-string 'TXT))
1131                           matches)))))))))
1132     (when matches
1133       spam-split-group)))
1134 \f
1135 ;;;; Hashcash.
1136
1137 (condition-case nil
1138     (progn
1139       (require 'hashcash)
1140       
1141       (defun spam-check-hashcash ()
1142         "Check the headers for hashcash payments."
1143         (mail-check-payment)))   ;mail-check-payment returns a boolean
1144
1145   (file-error (progn
1146                 (defalias 'mail-check-payment 'ignore)
1147                 (defalias 'spam-check-hashcash 'ignore))))
1148 \f
1149 ;;;; BBDB 
1150
1151 ;;; original idea for spam-check-BBDB from Alexander Kotelnikov
1152 ;;; <sacha@giotto.sj.ru>
1153
1154 ;; all this is done inside a condition-case to trap errors
1155
1156 (condition-case nil
1157     (progn
1158       (require 'bbdb)
1159       (require 'bbdb-com)
1160       
1161       (defun spam-enter-ham-BBDB (addresses &optional remove)
1162         "Enter an address into the BBDB; implies ham (non-spam) sender"
1163         (dolist (from addresses)
1164           (when (stringp from)
1165             (let* ((parsed-address (gnus-extract-address-components from))
1166                    (name (or (nth 0 parsed-address) "Ham Sender"))
1167                    (remove-function (if remove 
1168                                         'bbdb-delete-record-internal
1169                                       'ignore))
1170                    (net-address (nth 1 parsed-address))
1171                    (record (and net-address 
1172                                 (bbdb-search-simple nil net-address))))
1173               (when net-address
1174                 (gnus-message 5 "%s address %s %s BBDB" 
1175                               (if remove "Deleting" "Adding") 
1176                               from
1177                               (if remove "from" "to"))
1178                 (if record
1179                     (funcall remove-function record)
1180                   (bbdb-create-internal name nil net-address nil nil 
1181                                         "ham sender added by spam.el")))))))
1182       
1183       (defun spam-BBDB-register-routine (articles &optional unregister)
1184         (let (addresses)
1185           (dolist (article articles)
1186             (when (stringp (spam-fetch-field-from-fast article))
1187               (push (spam-fetch-field-from-fast article) addresses)))
1188           ;; now do the register/unregister action
1189           (spam-enter-ham-BBDB addresses unregister)))
1190
1191       (defun spam-BBDB-unregister-routine (articles)
1192         (spam-BBDB-register-routine articles t))
1193
1194       (defun spam-check-BBDB ()
1195         "Mail from people in the BBDB is classified as ham or non-spam"
1196         (let ((who (nnmail-fetch-field "from")))
1197           (when who
1198             (setq who (nth 1 (gnus-extract-address-components who)))
1199             (if (bbdb-search-simple nil who)
1200                 t 
1201               (if spam-use-BBDB-exclusive
1202                   spam-split-group
1203                 nil))))))
1204
1205   (file-error (progn
1206                 (defalias 'bbdb-search-simple 'ignore)
1207                 (defalias 'spam-check-BBDB 'ignore)
1208                 (defalias 'spam-BBDB-register-routine 'ignore)
1209                 (defalias 'spam-enter-ham-BBDB 'ignore)
1210                 (defalias 'bbdb-create-internal 'ignore)
1211                 (defalias 'bbdb-delete-record-internal 'ignore)
1212                 (defalias 'bbdb-records 'ignore))))
1213
1214 \f
1215 ;;;; ifile
1216
1217 ;;; check the ifile backend; return nil if the mail was NOT classified
1218 ;;; as spam
1219
1220 (defun spam-get-ifile-database-parameter ()
1221   "Get the command-line parameter for ifile's database from
1222   spam-ifile-database-path."
1223   (if spam-ifile-database-path
1224       (format "--db-file=%s" spam-ifile-database-path)
1225     nil))
1226     
1227 (defun spam-check-ifile ()
1228   "Check the ifile backend for the classification of this message"
1229   (let ((article-buffer-name (buffer-name)) 
1230         category return)
1231     (with-temp-buffer
1232       (let ((temp-buffer-name (buffer-name))
1233             (db-param (spam-get-ifile-database-parameter)))
1234         (save-excursion
1235           (set-buffer article-buffer-name)
1236           (apply 'call-process-region
1237                  (point-min) (point-max) spam-ifile-path
1238                  nil temp-buffer-name nil "-c"
1239                  (if db-param `(,db-param "-q") `("-q"))))
1240         ;; check the return now (we're back in the temp buffer)
1241         (goto-char (point-min))
1242         (if (not (eobp))
1243             (setq category (buffer-substring (point) (spam-point-at-eol))))
1244         (when (not (zerop (length category))) ; we need a category here
1245           (if spam-ifile-all-categories
1246               (setq return category)
1247             ;; else, if spam-ifile-all-categories is not set...
1248             (when (string-equal spam-ifile-spam-category category)
1249               (setq return spam-split-group)))))) ; note return is nil otherwise
1250     return))
1251
1252 (defun spam-ifile-register-with-ifile (articles category &optional unregister)
1253   "Register an article, given as a string, with a category.
1254 Uses `gnus-newsgroup-name' if category is nil (for ham registration)."
1255   (let ((category (or category gnus-newsgroup-name))
1256         (add-or-delete-option (if unregister "-d" "-i"))
1257         (db (spam-get-ifile-database-parameter))
1258         parameters)
1259     (with-temp-buffer
1260       (dolist (article articles)
1261         (let ((article-string (spam-get-article-as-string article)))
1262           (when (stringp article-string)
1263             (insert article-string))))
1264       (apply 'call-process-region
1265              (point-min) (point-max) spam-ifile-path
1266              nil nil nil 
1267              add-or-delete-option category
1268              (if db `(,db "-h") `("-h"))))))
1269
1270 (defun spam-ifile-register-spam-routine (articles &optional unregister)
1271   (spam-ifile-register-with-ifile articles spam-ifile-spam-category unregister))
1272
1273 (defun spam-ifile-unregister-spam-routine (articles)
1274   (spam-ifile-register-spam-routine articles t))
1275
1276 (defun spam-ifile-register-ham-routine (articles &optional unregister)
1277   (spam-ifile-register-with-ifile articles spam-ifile-ham-category unregister))
1278
1279 (defun spam-ifile-unregister-ham-routine (articles)
1280   (spam-ifile-register-ham-routine articles t))
1281
1282 \f
1283 ;;;; spam-stat
1284
1285 (condition-case nil
1286     (progn
1287       (let ((spam-stat-install-hooks nil))
1288         (require 'spam-stat))
1289       
1290       (defun spam-check-stat ()
1291         "Check the spam-stat backend for the classification of this message"
1292         (let ((spam-stat-split-fancy-spam-group spam-split-group) ; override
1293               (spam-stat-buffer (buffer-name)) ; stat the current buffer
1294               category return)
1295           (spam-stat-split-fancy)))
1296
1297       (defun spam-stat-register-spam-routine (articles &optional unregister)
1298         (dolist (article articles)
1299           (let ((article-string (spam-get-article-as-string article)))
1300             (with-temp-buffer
1301               (insert article-string)
1302               (if unregister
1303                   (spam-stat-buffer-change-to-non-spam)
1304               (spam-stat-buffer-is-spam))))))
1305
1306       (defun spam-stat-unregister-spam-routine (articles)
1307         (spam-stat-register-spam-routine articles t))
1308
1309       (defun spam-stat-register-ham-routine (articles &optional unregister)
1310         (dolist (article articles)
1311           (let ((article-string (spam-get-article-as-string article)))
1312             (with-temp-buffer
1313               (insert article-string)
1314               (if unregister
1315                   (spam-stat-buffer-change-to-spam)
1316               (spam-stat-buffer-is-non-spam))))))
1317
1318       (defun spam-stat-unregister-ham-routine (articles)
1319         (spam-stat-register-ham-routine articles t))
1320
1321       (defun spam-maybe-spam-stat-load ()
1322         (when spam-use-stat (spam-stat-load)))
1323       
1324       (defun spam-maybe-spam-stat-save ()
1325         (when spam-use-stat (spam-stat-save))))
1326
1327   (file-error (progn
1328                 (defalias 'spam-stat-load 'ignore)
1329                 (defalias 'spam-stat-save 'ignore)
1330                 (defalias 'spam-maybe-spam-stat-load 'ignore)
1331                 (defalias 'spam-maybe-spam-stat-save 'ignore)
1332                 (defalias 'spam-stat-register-ham-routine 'ignore)
1333                 (defalias 'spam-stat-unregister-ham-routine 'ignore)
1334                 (defalias 'spam-stat-register-spam-routine 'ignore)
1335                 (defalias 'spam-stat-unregister-spam-routine 'ignore)
1336                 (defalias 'spam-stat-buffer-is-spam 'ignore)
1337                 (defalias 'spam-stat-buffer-change-to-spam 'ignore)
1338                 (defalias 'spam-stat-buffer-is-non-spam 'ignore)
1339                 (defalias 'spam-stat-buffer-change-to-non-spam 'ignore)
1340                 (defalias 'spam-stat-split-fancy 'ignore)
1341                 (defalias 'spam-check-stat 'ignore))))
1342
1343 \f
1344
1345 ;;;; Blacklists and whitelists.
1346
1347 (defvar spam-whitelist-cache nil)
1348 (defvar spam-blacklist-cache nil)
1349
1350 (defun spam-kill-whole-line ()
1351   (beginning-of-line)
1352   (let ((kill-whole-line t))
1353     (kill-line)))
1354
1355 ;;; address can be a list, too
1356 (defun spam-enter-whitelist (address &optional remove)
1357   "Enter ADDRESS (list or single) into the whitelist.  With a
1358   non-nil REMOVE, remove them."
1359   (interactive "sAddress: ")
1360   (spam-enter-list address spam-whitelist remove)
1361   (setq spam-whitelist-cache nil))
1362
1363 ;;; address can be a list, too
1364 (defun spam-enter-blacklist (address &optional remove)
1365   "Enter ADDRESS (list or single) into the blacklist.  With a
1366   non-nil REMOVE, remove them."
1367   (interactive "sAddress: ")
1368   (spam-enter-list address spam-blacklist remove)
1369   (setq spam-blacklist-cache nil))
1370
1371 (defun spam-enter-list (addresses file &optional remove)
1372   "Enter ADDRESSES into the given FILE.
1373 Either the whitelist or the blacklist files can be used.  With
1374 REMOVE not nil, remove the ADDRESSES."
1375   (if (stringp addresses)
1376       (spam-enter-list (list addresses) file remove)
1377     ;; else, we have a list of addresses here
1378     (unless (file-exists-p (file-name-directory file))
1379       (make-directory (file-name-directory file) t))
1380     (save-excursion
1381       (set-buffer
1382        (find-file-noselect file))
1383       (dolist (a addresses)
1384         (when (stringp a)
1385           (goto-char (point-min))
1386           (if (re-search-forward (regexp-quote a) nil t)
1387               ;; found the address
1388               (when remove
1389                 (spam-kill-whole-line))
1390             ;; else, the address was not found
1391             (unless remove
1392               (goto-char (point-max))
1393               (unless (bobp)
1394                 (insert "\n"))
1395               (insert a "\n")))))
1396       (save-buffer))))
1397
1398 ;;; returns t if the sender is in the whitelist, nil or
1399 ;;; spam-split-group otherwise
1400 (defun spam-check-whitelist ()
1401   ;; FIXME!  Should it detect when file timestamps change?
1402   (unless spam-whitelist-cache
1403     (setq spam-whitelist-cache (spam-parse-list spam-whitelist)))
1404   (if (spam-from-listed-p spam-whitelist-cache) 
1405       t
1406     (if spam-use-whitelist-exclusive
1407         spam-split-group
1408       nil)))
1409
1410 (defun spam-check-blacklist ()
1411   ;; FIXME!  Should it detect when file timestamps change?
1412   (unless spam-blacklist-cache
1413     (setq spam-blacklist-cache (spam-parse-list spam-blacklist)))
1414   (and (spam-from-listed-p spam-blacklist-cache) spam-split-group))
1415
1416 (defun spam-parse-list (file)
1417   (when (file-readable-p file)
1418     (let (contents address)
1419       (with-temp-buffer
1420         (insert-file-contents file)
1421         (while (not (eobp))
1422           (setq address (buffer-substring (point) (spam-point-at-eol)))
1423           (forward-line 1)
1424           ;; insert the e-mail address if detected, otherwise the raw data
1425           (unless (zerop (length address))
1426             (let ((pure-address (nth 1 (gnus-extract-address-components address))))
1427               (push (or pure-address address) contents)))))
1428       (nreverse contents))))
1429
1430 (defun spam-from-listed-p (cache)
1431   (let ((from (nnmail-fetch-field "from"))
1432         found)
1433     (while cache
1434       (let ((address (pop cache)))
1435         (unless (zerop (length address)) ; 0 for a nil address too
1436           (setq address (regexp-quote address))
1437           ;; fix regexp-quote's treatment of user-intended regexes
1438           (while (string-match "\\\\\\*" address)
1439             (setq address (replace-match ".*" t t address))))
1440         (when (and address (string-match address from))
1441           (setq found t
1442                 cache nil))))
1443     found))
1444
1445 (defun spam-filelist-register-routine (articles blacklist &optional unregister)
1446   (let ((de-symbol (if blacklist 'spam-use-whitelist 'spam-use-blacklist))
1447         (declassification (if blacklist 'ham 'spam))
1448         (enter-function 
1449          (if blacklist 'spam-enter-blacklist 'spam-enter-whitelist))
1450         (remove-function
1451          (if blacklist 'spam-enter-whitelist 'spam-enter-blacklist))
1452         from addresses unregister-list)
1453     (dolist (article articles)
1454       (let ((from (spam-fetch-field-from-fast article))
1455             (id (spam-fetch-field-message-id-fast article))
1456             sender-ignored)
1457         (when (stringp from)
1458           (dolist (ignore-regex spam-blacklist-ignored-regexes)
1459             (when (and (not sender-ignored)
1460                        (stringp ignore-regex)
1461                        (string-match ignore-regex from))
1462               (setq sender-ignored t)))
1463           ;; remember the messages we need to unregister, unless remove is set
1464           (when (and
1465                  (null unregister) 
1466                  (spam-log-unregistration-needed-p
1467                   id 'process declassification de-symbol))
1468             (push from unregister-list))
1469           (unless sender-ignored
1470             (push from addresses)))))
1471
1472     (if unregister
1473         (funcall enter-function addresses t) ; unregister all these addresses
1474       ;; else, register normally and unregister what we need to
1475       (funcall remove-function unregister-list t)
1476       (dolist (article unregister-list)
1477         (spam-log-undo-registration
1478          (spam-fetch-field-message-id-fast article)
1479          'process
1480          declassification
1481          de-symbol))
1482       (funcall enter-function addresses nil))))
1483
1484 (defun spam-blacklist-unregister-routine (articles)
1485   (spam-blacklist-register-routine articles t))
1486
1487 (defun spam-blacklist-register-routine (articles &optional unregister)
1488   (spam-filelist-register-routine articles t unregister))
1489
1490 (defun spam-whitelist-unregister-routine (articles)
1491   (spam-whitelist-register-routine articles t))
1492
1493 (defun spam-whitelist-register-routine (articles &optional unregister)
1494   (spam-filelist-register-routine articles nil unregister))
1495
1496 \f
1497 ;;;; Spam-report glue
1498 (defun spam-report-gmane-register-routine (articles)
1499   (when articles
1500     (apply 'spam-report-gmane articles)))
1501
1502 \f
1503 ;;;; Bogofilter
1504 (defun spam-check-bogofilter-headers (&optional score)
1505   (let ((header (nnmail-fetch-field spam-bogofilter-header)))
1506     (when header                        ; return nil when no header
1507       (if score                         ; scoring mode
1508           (if (string-match "spamicity=\\([0-9.]+\\)" header)
1509               (match-string 1 header)
1510             "0")
1511         ;; spam detection mode
1512         (when (string-match spam-bogofilter-bogosity-positive-spam-header
1513                             header)
1514           spam-split-group)))))
1515
1516 ;; return something sensible if the score can't be determined
1517 (defun spam-bogofilter-score ()
1518   "Get the Bogofilter spamicity score"
1519   (interactive)
1520   (save-window-excursion
1521     (gnus-summary-show-article t)
1522     (set-buffer gnus-article-buffer)
1523     (let ((score (or (spam-check-bogofilter-headers t)
1524                      (spam-check-bogofilter t))))
1525       (message "Spamicity score %s" score)
1526       (or score "0"))
1527     (gnus-summary-show-article)))
1528
1529 (defun spam-check-bogofilter (&optional score)
1530   "Check the Bogofilter backend for the classification of this message"
1531   (let ((article-buffer-name (buffer-name))
1532         (db spam-bogofilter-database-directory)
1533         return)
1534     (with-temp-buffer
1535       (let ((temp-buffer-name (buffer-name)))
1536         (save-excursion
1537           (set-buffer article-buffer-name)
1538           (apply 'call-process-region
1539                  (point-min) (point-max) 
1540                  spam-bogofilter-path
1541                  nil temp-buffer-name nil
1542                  (if db `("-d" ,db "-v") `("-v")))
1543           (setq return (spam-check-bogofilter-headers score)))))
1544     return))
1545
1546 (defun spam-bogofilter-register-with-bogofilter (articles 
1547                                                  spam 
1548                                                  &optional unregister)
1549   "Register an article, given as a string, as spam or non-spam."
1550   (dolist (article articles)
1551     (let ((article-string (spam-get-article-as-string article))
1552           (db spam-bogofilter-database-directory)
1553           (switch (if unregister
1554                       (if spam 
1555                           spam-bogofilter-spam-strong-switch
1556                         spam-bogofilter-ham-strong-switch)
1557                     (if spam 
1558                         spam-bogofilter-spam-switch 
1559                       spam-bogofilter-ham-switch))))
1560       (when (stringp article-string)
1561         (with-temp-buffer
1562           (insert article-string)
1563
1564           (apply 'call-process-region
1565                  (point-min) (point-max) 
1566                  spam-bogofilter-path
1567                  nil nil nil switch
1568                  (if db `("-d" ,db "-v") `("-v"))))))))
1569   
1570 (defun spam-bogofilter-register-spam-routine (articles &optional unregister)
1571   (spam-bogofilter-register-with-bogofilter articles t unregister))
1572
1573 (defun spam-bogofilter-unregister-spam-routine (articles)
1574   (spam-bogofilter-register-spam-routine articles t))
1575
1576 (defun spam-bogofilter-register-ham-routine (articles &optional unregister)
1577   (spam-bogofilter-register-with-bogofilter articles nil unregister))
1578
1579 (defun spam-bogofilter-unregister-ham-routine (articles)
1580   (spam-bogofilter-register-ham-routine articles t))
1581
1582
1583 \f
1584 ;;;; spamoracle
1585 (defun spam-check-spamoracle ()
1586   "Run spamoracle on an article to determine whether it's spam."
1587   (let ((article-buffer-name (buffer-name)))
1588     (with-temp-buffer
1589       (let ((temp-buffer-name (buffer-name)))
1590         (save-excursion
1591           (set-buffer article-buffer-name)
1592           (let ((status 
1593                  (apply 'call-process-region 
1594                         (point-min) (point-max)
1595                         spam-spamoracle-binary 
1596                         nil temp-buffer-name nil
1597                         (if spam-spamoracle-database
1598                             `("-f" ,spam-spamoracle-database "mark")
1599                           '("mark")))))
1600             (if (eq 0 status)
1601                 (progn
1602                   (set-buffer temp-buffer-name)
1603                   (goto-char (point-min))
1604                   (when (re-search-forward "^X-Spam: yes;" nil t)
1605                     spam-split-group))
1606               (error "Error running spamoracle" status))))))))
1607
1608 (defun spam-spamoracle-learn (articles article-is-spam-p &optional unregister)
1609   "Run spamoracle in training mode."
1610   (with-temp-buffer
1611     (let ((temp-buffer-name (buffer-name)))
1612       (save-excursion
1613         (goto-char (point-min))
1614         (dolist (article articles)
1615           (insert (spam-get-article-as-string article)))
1616         (let* ((arg (if (spam-xor unregister article-is-spam-p)
1617                         "-spam" 
1618                       "-good"))
1619                (status 
1620                 (apply 'call-process-region
1621                        (point-min) (point-max)
1622                        spam-spamoracle-binary
1623                        nil temp-buffer-name nil
1624                        (if spam-spamoracle-database
1625                            `("-f" ,spam-spamoracle-database 
1626                              "add" ,arg)
1627                          `("add" ,arg)))))
1628           (when (not (eq 0 status))
1629             (error "Error running spamoracle" status)))))))
1630
1631 (defun spam-spamoracle-learn-ham (articles &optional unregister)
1632   (spam-spamoracle-learn articles nil unregister))
1633
1634 (defun spam-spamoracle-unlearn-ham (articles &optional unregister)
1635   (spam-spamoracle-learn-ham articles t))
1636
1637 (defun spam-spamoracle-learn-spam (articles &optional unregister)
1638   (spam-spamoracle-learn articles t unregister))
1639
1640 (defun spam-spamoracle-unlearn-spam (articles &optional unregister)
1641   (spam-spamoracle-learn-spam articles t))
1642
1643 \f
1644 ;;;; Hooks
1645
1646 ;;;###autoload
1647 (defun spam-initialize ()
1648   "Install the spam.el hooks and do other initialization"
1649   (interactive)
1650   (setq spam-install-hooks t)
1651   ;; TODO: How do we redo this every time spam-face is customized?
1652   (push '((eq mark gnus-spam-mark) . spam-face)
1653         gnus-summary-highlight)
1654   ;; Add hooks for loading and saving the spam stats
1655   (add-hook 'gnus-save-newsrc-hook 'spam-maybe-spam-stat-save)
1656   (add-hook 'gnus-get-top-new-news-hook 'spam-maybe-spam-stat-load)
1657   (add-hook 'gnus-startup-hook 'spam-maybe-spam-stat-load)
1658   (add-hook 'gnus-summary-prepare-exit-hook 'spam-summary-prepare-exit)
1659   (add-hook 'gnus-summary-prepare-hook 'spam-summary-prepare)
1660   (add-hook 'gnus-get-new-news-hook 'spam-setup-widening))
1661
1662 (defun spam-unload-hook ()
1663   "Uninstall the spam.el hooks"
1664   (interactive)
1665   (remove-hook 'gnus-save-newsrc-hook 'spam-maybe-spam-stat-save)
1666   (remove-hook 'gnus-get-top-new-news-hook 'spam-maybe-spam-stat-load)
1667   (remove-hook 'gnus-startup-hook 'spam-maybe-spam-stat-load)
1668   (remove-hook 'gnus-summary-prepare-exit-hook 'spam-summary-prepare-exit)
1669   (remove-hook 'gnus-summary-prepare-hook 'spam-summary-prepare)
1670   (remove-hook 'gnus-get-new-news-hook 'spam-setup-widening))
1671
1672 (when spam-install-hooks
1673   (spam-initialize))
1674
1675 (provide 'spam)
1676
1677 ;;; spam.el ends here.