(gnus-article-outlook-unwrap-lines)
[gnus] / lisp / spam.el
index dc3472d..573b204 100644 (file)
@@ -130,9 +130,9 @@ Competition."
   :group 'spam)
 
 (defcustom spam-disable-spam-split-during-ham-respool nil
-  "Whether `spam-split' should be ignored while resplitting ham in a process
-destination.  This is useful to prevent ham from ending up in the same spam
-group after the resplit.  Don't set this to t if you have spam-split as the
+  "Whether `spam-split' should be ignored while resplitting ham.
+This is useful to prevent ham from ending up in the same spam
+group after the resplit.  Don't set this to t if you have `spam-split' as the
 last rule in your split configuration."
   :type 'boolean
   :group 'spam)
@@ -405,8 +405,8 @@ your main source of newsgroup names."
   :group 'spam)
 
 (defcustom spam-spamoracle-database nil
-  "Location of spamoracle database file. When nil, use the default
-spamoracle database."
+  "Location of spamoracle database file.
+When nil, use the default spamoracle database."
   :type '(choice (directory :tag "Location of spamoracle database file.")
                 (const :tag "Use the default"))
   :group 'spam-spamoracle)
@@ -426,6 +426,14 @@ spamoracle database."
   "Msx" gnus-summary-mark-as-spam
   "\M-d" gnus-summary-mark-as-spam)
 
+(defvar spam-cache-lookups t
+  "Whether spam.el will try to cache lookups using `spam-caches'.")
+
+(defvar spam-caches (make-hash-table
+                    :size 10
+                    :test 'equal)
+  "Cache of spam detection entries.")
+
 (defvar spam-old-ham-articles nil
   "List of old ham articles, generated when a group is entered.")
 
@@ -436,15 +444,21 @@ spamoracle database."
   "If non-nil, `spam-split' is disabled, and always returns nil.")
 
 (defvar spam-split-last-successful-check nil
-  "`spam-split' will set this to nil or a spam-use-XYZ check if it
-  finds ham or spam.")
+  "Internal variable.
+`spam-split' will set this to nil or a spam-use-XYZ check if it
+finds ham or spam.")
 
 ;; convenience functions
+(defun spam-clear-cache (symbol)
+  "Clear the spam-caches entry for a check."
+  (remhash symbol spam-caches))
+
 (defun spam-xor (a b)
-  "Logical exclusive `or'."
+  "Logical A xor B."
   (and (or a b) (not (and a b))))
 
 (defun spam-group-ham-mark-p (group mark &optional spam)
+  "Checks if MARK is considered a ham mark in GROUP."
   (when (stringp group)
     (let* ((marks (spam-group-ham-marks group spam))
           (marks (if (symbolp mark)
@@ -453,9 +467,11 @@ spamoracle database."
       (memq mark marks))))
 
 (defun spam-group-spam-mark-p (group mark)
+  "Checks if MARK is considered a spam mark in GROUP."
   (spam-group-ham-mark-p group mark t))
 
 (defun spam-group-ham-marks (group &optional spam)
+  "In GROUP, get all the ham marks."
   (when (stringp group)
     (let* ((marks (if spam
                      (gnus-parameter-spam-marks group)
@@ -465,9 +481,11 @@ spamoracle database."
       marks)))
 
 (defun spam-group-spam-marks (group)
+  "In GROUP, get all the spam marks."
   (spam-group-ham-marks group t))
 
 (defun spam-group-spam-contents-p (group)
+  "Is GROUP a spam group?"
   (if (stringp group)
       (or (member group spam-junk-mailgroups)
          (memq 'gnus-group-spam-classification-spam
@@ -475,6 +493,7 @@ spamoracle database."
     nil))
 
 (defun spam-group-ham-contents-p (group)
+  "Is GROUP a ham group?"
   (if (stringp group)
       (memq 'gnus-group-spam-classification-ham
            (gnus-parameter-spam-contents group))
@@ -494,9 +513,9 @@ spamoracle database."
     (gnus-group-ham-exit-processor-BBDB          ham spam-use-BBDB)
     (gnus-group-ham-exit-processor-copy          ham spam-use-ham-copy)
     (gnus-group-ham-exit-processor-spamoracle    ham spam-use-spamoracle))
-  "The spam-list-of-processors list contains pairs associating a
-ham/spam exit processor variable with a classification and a
-spam-use-* variable.")
+  "The `spam-list-of-processors' list.
+This list contains pairs associating a ham/spam exit processor
+variable with a classification and a spam-use-* variable.")
 
 (defun spam-group-processor-p (group processor)
   (if (and (stringp group)
@@ -559,6 +578,14 @@ spam-use-* variable.")
 (defun spam-group-ham-processor-spamoracle-p (group)
   (spam-group-processor-p group 'gnus-group-ham-exit-processor-spamoracle))
 
+(defun spam-report-articles-gmane (n)
+  "Report the current message as spam.
+Respects the process/prefix convention."
+  (interactive "P")
+  (dolist (article (gnus-summary-work-articles n))
+    (gnus-summary-remove-process-mark article)
+    (spam-report-gmane article)))
+
 ;;; Summary entry and exit processing.
 
 (defun spam-summary-prepare ()
@@ -788,31 +815,60 @@ spam-use-* variable.")
 ;;     article-filename
 ;;       nil)))
 
-(defun spam-fetch-field-from-fast (article)
-  "Fetch the `from' field quickly, using the internal gnus-data-list function"
-  (if (and (numberp article)
-          (assoc article (gnus-data-list nil)))
-      (mail-header-from
-       (gnus-data-header (assoc article (gnus-data-list nil))))
-    nil))
-
-(defun spam-fetch-field-subject-fast (article)
-  "Fetch the `subject' field quickly, using the internal
-  gnus-data-list function"
-  (if (and (numberp article)
-          (assoc article (gnus-data-list nil)))
-      (mail-header-subject
-       (gnus-data-header (assoc article (gnus-data-list nil))))
-    nil))
-
-(defun spam-fetch-field-message-id-fast (article)
-  "Fetch the `Message-ID' field quickly, using the internal
-  gnus-data-list function"
-  (if (and (numberp article)
-          (assoc article (gnus-data-list nil)))
-      (mail-header-message-id
-       (gnus-data-header (assoc article (gnus-data-list nil))))
-    nil))
+(defun spam-fetch-field-fast (article field &optional prepared-data-header)
+  "Fetch a field quickly, using the internal gnus-data-list function"
+  (when (numberp article)
+    (let* ((data-header (or prepared-data-header
+                           (spam-fetch-article-header article))))
+      (if (arrayp data-header)
+       (cond
+        ((equal field 'from)
+         (mail-header-from data-header))
+        ((equal field 'message-id)
+         (mail-header-message-id data-header))
+        ((equal field 'subject)
+         (mail-header-subject data-header))
+        ((equal field 'references)
+         (mail-header-references data-header))
+        ((equal field 'date)
+         (mail-header-date data-header))
+        ((equal field 'xref)
+         (mail-header-xref data-header))
+        ((equal field 'extra)
+         (mail-header-extra data-header))
+        (t
+         nil))
+       (gnus-error 5 "Article %d has a nil data header" article)))))
+
+(defun spam-fetch-field-from-fast (article &optional prepared-data-header)
+  (spam-fetch-field-fast article 'from prepared-data-header))
+
+(defun spam-fetch-field-subject-fast (article &optional prepared-data-header)
+  (spam-fetch-field-fast article 'subject prepared-data-header))
+
+(defun spam-fetch-field-message-id-fast (article &optional prepared-data-header)
+  (spam-fetch-field-fast article 'message-id prepared-data-header))
+
+(defun spam-generate-fake-headers (article &optional dh)
+  (concat
+   (format "From: %s\n" (spam-fetch-field-fast article 'from dh))
+   (format "Subject: %s\n" (spam-fetch-field-fast article 'subject dh))
+   (format "Message-ID: %s\n" (spam-fetch-field-fast article 'message-id dh))
+   (format "Date: %s\n" (spam-fetch-field-fast article 'date dh))
+   (format "References: %s\n" (spam-fetch-field-fast article 'references dh))
+   (format "Xref: %s\n" (spam-fetch-field-fast article 'xref dh))
+   (when (spam-fetch-field-fast article 'extra dh)
+     (format "%s\n" (spam-fetch-field-fast article 'extra dh)))))
+
+(defun spam-insert-fake-headers (article)
+  (let ((dh (spam-fetch-article-header article)))
+    (if dh
+       (insert (spam-generate-fake-headers article dh))
+;      (debug article gnus-newsgroup-data)
+)))
+
+(defun spam-fetch-article-header (article)
+  (nth 3 (gnus-data-find article)))
 
 \f
 ;;;; Spam determination.
@@ -849,11 +905,13 @@ definitely a spam.")
     spam-use-regex-body
     spam-use-stat
     spam-use-bogofilter
+    spam-use-blackholes
     spam-use-spamoracle)
   "The spam-list-of-statistical-checks list contains all the mail
-splitters that need to have the full message body available.")
+splitters that need to have the full message body available.
+Note that you should fetch extra headers if you don't like this,
+e.g. fetch the 'Received' header for spam-use-blackholes.")
 
-;;;TODO: modify to invoke self with each check if invoked without specifics
 (defun spam-split (&rest specific-checks)
   "Split this message into the `spam' group if it is spam.
 This function can be used as an entry in the variable `nnmail-split-fancy',
@@ -875,7 +933,9 @@ See the Info node `(gnus)Fancy Mail Splitting' for more details."
        (save-excursion
          (save-restriction
            (dolist (check spam-list-of-statistical-checks)
-             (when (and (symbolp check) (symbol-value check))
+             (when (and (symbolp check)
+                        (or (symbol-value check)
+                            (memq check specific-checks)))
                (widen)
                (gnus-message 8 "spam-split: widening the buffer (%s requires it)"
                              (symbol-name check))
@@ -885,9 +945,11 @@ See the Info node `(gnus)Fancy Mail Splitting' for more details."
                  decision)
              (while (and list-of-checks (not decision))
                (let ((pair (pop list-of-checks)))
-                 (when (and (symbol-value (car pair))
-                            (or (null specific-checks)
-                                (memq (car pair) specific-checks)))
+                 (when (or
+                        ;; either, given specific checks, this is one of them
+                        (and specific-checks (memq (car pair) specific-checks))
+                        ;; or, given no specific checks, spam-use-CHECK is set
+                        (and (null specific-checks) (symbol-value (car pair))))
                    (gnus-message 5 "spam-split: calling the %s function"
                                  (symbol-name (cdr pair)))
                    (setq decision (funcall (cdr pair)))
@@ -896,8 +958,7 @@ See the Info node `(gnus)Fancy Mail Splitting' for more details."
                      (setq spam-split-last-successful-check (car pair)))
 
                    (when (eq decision 'spam)
-                     (if spam-split-symbolic-return
-                         (setq decision spam-split-group)
+                     (unless spam-split-symbolic-return
                        (gnus-error
                         5
                         (format "spam-split got %s but %s is nil"
@@ -914,44 +975,72 @@ See the Info node `(gnus)Fancy Mail Splitting' for more details."
   (let* ((group gnus-newsgroup-name)
         (autodetect (gnus-parameter-spam-autodetect group))
         (methods (gnus-parameter-spam-autodetect-methods group))
-        (first-method (nth 0 methods)))
-  (when (and autodetect
-            (not (equal first-method 'none)))
+        (first-method (nth 0 methods))
+        (articles (if spam-autodetect-recheck-messages
+                      gnus-newsgroup-articles
+                    gnus-newsgroup-unseen))
+        article-cannot-be-faked)
+
+    (dolist (check spam-list-of-statistical-checks)
+      (when (and (symbolp check)
+                (memq check methods))
+       (setq article-cannot-be-faked t)
+       (return)))
+
+    (when (memq 'default methods)
+      (setq article-cannot-be-faked t))
+
+    (when (and autodetect
+              (not (equal first-method 'none)))
     (mapcar
      (lambda (article)
        (let ((id (spam-fetch-field-message-id-fast article))
             (subject (spam-fetch-field-subject-fast article))
-            (sender (spam-fetch-field-from-fast article)))
-        (unless (and spam-log-to-registry
-                     (spam-log-registered-p id 'incoming))
-          (let* ((spam-split-symbolic-return t)
-                 (spam-split-symbolic-return-positive t)
-                 (split-return
-                  (with-temp-buffer
-                    (gnus-request-article-this-buffer
-                     article
-                     group)
-                    (if (or (null first-method)
-                            (equal first-method 'default))
-                        (spam-split)
-                      (apply 'spam-split methods)))))
-            (if (equal split-return 'spam)
-                (gnus-summary-mark-article article gnus-spam-mark))
-
-            (when (and split-return spam-log-to-registry)
-              (when (zerop (gnus-registry-group-count id))
-                (gnus-registry-add-group
-                 id group subject sender))
-
+            (sender (spam-fetch-field-from-fast article))
+            registry-lookup)
+        
+        (unless id
+          (gnus-error 5 "Article %d has no message ID!" article))
+        
+        (when (and id spam-log-to-registry)
+          (setq registry-lookup (spam-log-registration-type id 'incoming))
+          (when registry-lookup
+            (gnus-message
+             9
+             "spam-find-spam: message %s was already registered incoming"
+             id)))
+
+        (let* ((spam-split-symbolic-return t)
+               (spam-split-symbolic-return-positive t)
+               (split-return
+                (or registry-lookup
+                    (with-temp-buffer
+                      (if article-cannot-be-faked
+                          (gnus-request-article-this-buffer
+                           article
+                           group)
+                        ;; else, we fake the article
+                        (spam-insert-fake-headers article))
+                      (if (or (null first-method)
+                              (equal first-method 'default))
+                          (spam-split)
+                        (apply 'spam-split methods))))))
+          (if (equal split-return 'spam)
+              (gnus-summary-mark-article article gnus-spam-mark))
+          
+          (when (and id split-return spam-log-to-registry)
+            (when (zerop (gnus-registry-group-count id))
+              (gnus-registry-add-group
+               id group subject sender))
+              
+            (unless registry-lookup
               (spam-log-processing-to-registry
                id
                'incoming
                split-return
                spam-split-last-successful-check
                group))))))
-     (if spam-autodetect-recheck-messages
-        gnus-newsgroup-articles
-       gnus-newsgroup-unseen)))))
+    articles))))
 
 (defvar spam-registration-functions
   ;; first the ham register, second the spam register function
@@ -1098,8 +1187,8 @@ functions")
           type
           cell-list))
 
-      (gnus-message 5 (format "%s called with bad ID, type, classification, check, or group"
-                             "spam-log-processing-to-registry")))))
+      (gnus-error 5 (format "%s called with bad ID, type, classification, check, or group"
+                           "spam-log-processing-to-registry")))))
 
 ;;; check if a ham- or spam-processor registration has been done
 (defun spam-log-registered-p (id type)
@@ -1108,10 +1197,26 @@ functions")
             (spam-process-type-valid-p type))
        (cdr-safe (gnus-registry-fetch-extra id type))
       (progn
-       (gnus-message 5 (format "%s called with bad ID, type, classification, or check"
-                               "spam-log-registered-p"))
+       (gnus-error 5 (format "%s called with bad ID, type, classification, or check"
+                             "spam-log-registered-p"))
        nil))))
 
+;;; check what a ham- or spam-processor registration says
+;;; returns nil if conflicting registrations are found
+(defun spam-log-registration-type (id type)
+  (let ((count 0)
+       decision)
+    (dolist (reg (spam-log-registered-p id type))
+      (let ((classification (nth 0 reg)))
+       (when (spam-classification-valid-p classification)
+         (when (and decision
+                    (not (eq classification decision)))
+           (setq count (+ 1 count)))
+         (setq decision classification))))
+    (if (< 0 count)
+       nil
+      decision)))
+
 ;;; check if a ham- or spam-processor registration needs to be undone
 (defun spam-log-unregistration-needed-p (id type classification check)
   (when spam-log-to-registry
@@ -1128,8 +1233,8 @@ functions")
                (setq found t))))
          found)
       (progn
-       (gnus-message 5 (format "%s called with bad ID, type, classification, or check"
-                               "spam-log-unregistration-needed-p"))
+       (gnus-error 5 (format "%s called with bad ID, type, classification, or check"
+                             "spam-log-unregistration-needed-p"))
        nil))))
 
 
@@ -1152,8 +1257,8 @@ functions")
           type
           new-cell-list))
       (progn
-       (gnus-message 5 (format "%s called with bad ID, type, check, or group"
-                               "spam-log-undo-registration"))
+       (gnus-error 5 (format "%s called with bad ID, type, check, or group"
+                             "spam-log-undo-registration"))
        nil))))
 
 ;;; set up IMAP widening if it's necessary
@@ -1205,7 +1310,7 @@ functions")
 
 (defun spam-check-blackholes ()
   "Check the Received headers for blackholed relays."
-  (let ((headers (nnmail-fetch-field "received"))
+  (let ((headers (message-fetch-field "received"))
        (spam-split-group (if spam-split-symbolic-return
                              'spam
                            spam-split-group))
@@ -1270,6 +1375,12 @@ functions")
       (require 'bbdb)
       (require 'bbdb-com)
 
+      ;; when the BBDB changes, we want to clear out our cache
+      (defun spam-clear-cache-BBDB (&rest immaterial)
+       (spam-clear-cache 'spam-use-BBDB))
+
+      (add-hook 'bbdb-change-hook 'spam-clear-cache-BBDB)
+
       (defun spam-enter-ham-BBDB (addresses &optional remove)
        "Enter an address into the BBDB; implies ham (non-spam) sender"
        (dolist (from addresses)
@@ -1305,13 +1416,30 @@ functions")
 
       (defun spam-check-BBDB ()
        "Mail from people in the BBDB is classified as ham or non-spam"
-       (let ((who (nnmail-fetch-field "from"))
+       (let ((who (message-fetch-field "from"))
              (spam-split-group (if spam-split-symbolic-return
                                    'spam
-                                 spam-split-group)))
+                                 spam-split-group))
+             bbdb-cache bbdb-hashtable)
+         (when spam-cache-lookups
+           (setq bbdb-cache (gethash 'spam-use-BBDB spam-caches))
+           (unless bbdb-cache
+             (setq bbdb-cache
+                   ;; this is the expanded (bbdb-hashtable) macro
+                   ;; without the debugging support
+                   (with-current-buffer (bbdb-buffer)
+                     (save-excursion
+                       (save-window-excursion
+                         (bbdb-records nil t)
+                         bbdb-hashtable))))
+             (puthash 'spam-use-BBDB bbdb-cache spam-caches)))
          (when who
            (setq who (nth 1 (gnus-extract-address-components who)))
-           (if (bbdb-search-simple nil who)
+           (if
+               (if spam-cache-lookups
+                   (symbol-value
+                    (intern-soft who bbdb-cache))
+                 (bbdb-search-simple nil who))
                t
              (if spam-use-BBDB-exclusive
                  spam-split-group
@@ -1319,6 +1447,8 @@ functions")
 
   (file-error (progn
                (defalias 'bbdb-search-simple 'ignore)
+               (defalias 'bbdb-records 'ignore)
+               (defalias 'bbdb-buffer 'ignore)
                (defalias 'spam-check-BBDB 'ignore)
                (defalias 'spam-BBDB-register-routine 'ignore)
                (defalias 'spam-enter-ham-BBDB 'ignore)
@@ -1479,7 +1609,8 @@ Uses `gnus-newsgroup-name' if category is nil (for ham registration)."
 With a non-nil REMOVE, remove them."
   (interactive "sAddress: ")
   (spam-enter-list address spam-whitelist remove)
-  (setq spam-whitelist-cache nil))
+  (setq spam-whitelist-cache nil)
+  (spam-clear-cache 'spam-use-whitelist))
 
 ;;; address can be a list, too
 (defun spam-enter-blacklist (address &optional remove)
@@ -1487,7 +1618,8 @@ With a non-nil REMOVE, remove them."
 With a non-nil REMOVE, remove them."
   (interactive "sAddress: ")
   (spam-enter-list address spam-blacklist remove)
-  (setq spam-blacklist-cache nil))
+  (setq spam-blacklist-cache nil)
+  (spam-clear-cache 'spam-use-whitelist))
 
 (defun spam-enter-list (addresses file &optional remove)
   "Enter ADDRESSES into the given FILE.
@@ -1516,6 +1648,32 @@ REMOVE not nil, remove the ADDRESSES."
              (insert a "\n")))))
       (save-buffer))))
 
+(defun spam-filelist-build-cache (type)
+  (let ((cache (if (eq type 'spam-use-blacklist)
+                  spam-blacklist-cache
+                spam-whitelist-cache))
+       parsed-cache)
+    (unless (gethash type spam-caches)
+      (while cache
+       (let ((address (pop cache)))
+         (unless (zerop (length address)) ; 0 for a nil address too
+           (setq address (regexp-quote address))
+           ;; fix regexp-quote's treatment of user-intended regexes
+           (while (string-match "\\\\\\*" address)
+             (setq address (replace-match ".*" t t address))))
+         (push address parsed-cache)))
+      (puthash type parsed-cache spam-caches))))
+
+(defun spam-filelist-check-cache (type from)
+  (when (stringp from)
+    (spam-filelist-build-cache type)
+    (let (found)
+      (dolist (address (gethash type spam-caches))
+       (when (and address (string-match address from))
+         (setq found t)
+         (return)))
+      found)))
+
 ;;; returns t if the sender is in the whitelist, nil or
 ;;; spam-split-group otherwise
 (defun spam-check-whitelist ()
@@ -1525,7 +1683,7 @@ REMOVE not nil, remove the ADDRESSES."
                            spam-split-group)))
     (unless spam-whitelist-cache
       (setq spam-whitelist-cache (spam-parse-list spam-whitelist)))
-    (if (spam-from-listed-p spam-whitelist-cache)
+    (if (spam-from-listed-p 'spam-use-whitelist)
        t
       (if spam-use-whitelist-exclusive
          spam-split-group
@@ -1538,7 +1696,7 @@ REMOVE not nil, remove the ADDRESSES."
                            spam-split-group)))
     (unless spam-blacklist-cache
       (setq spam-blacklist-cache (spam-parse-list spam-blacklist)))
-    (and (spam-from-listed-p spam-blacklist-cache) spam-split-group)))
+    (and (spam-from-listed-p 'spam-use-blacklist) spam-split-group)))
 
 (defun spam-parse-list (file)
   (when (file-readable-p file)
@@ -1554,20 +1712,10 @@ REMOVE not nil, remove the ADDRESSES."
              (push (or pure-address address) contents)))))
       (nreverse contents))))
 
-(defun spam-from-listed-p (cache)
-  (let ((from (nnmail-fetch-field "from"))
+(defun spam-from-listed-p (type)
+  (let ((from (message-fetch-field "from"))
        found)
-    (while cache
-      (let ((address (pop cache)))
-       (unless (zerop (length address)) ; 0 for a nil address too
-         (setq address (regexp-quote address))
-         ;; fix regexp-quote's treatment of user-intended regexes
-         (while (string-match "\\\\\\*" address)
-           (setq address (replace-match ".*" t t address))))
-       (when (and address (string-match address from))
-         (setq found t
-               cache nil))))
-    found))
+    (spam-filelist-check-cache type from)))
 
 (defun spam-filelist-register-routine (articles blacklist &optional unregister)
   (let ((de-symbol (if blacklist 'spam-use-whitelist 'spam-use-blacklist))
@@ -1629,7 +1777,7 @@ REMOVE not nil, remove the ADDRESSES."
 \f
 ;;;; Bogofilter
 (defun spam-check-bogofilter-headers (&optional score)
-  (let ((header (nnmail-fetch-field spam-bogofilter-header))
+  (let ((header (message-fetch-field spam-bogofilter-header))
        (spam-split-group (if spam-split-symbolic-return
                              'spam
                            spam-split-group)))
@@ -1791,7 +1939,7 @@ REMOVE not nil, remove the ADDRESSES."
   (add-hook 'gnus-summary-prepare-exit-hook 'spam-summary-prepare-exit)
   (add-hook 'gnus-summary-prepare-hook 'spam-summary-prepare)
   (add-hook 'gnus-get-new-news-hook 'spam-setup-widening)
-  (add-hook 'gnus-summary-prepare-hook 'spam-find-spam))
+  (add-hook 'gnus-summary-prepared-hook 'spam-find-spam))
 
 (defun spam-unload-hook ()
   "Uninstall the spam.el hooks"
@@ -1809,8 +1957,4 @@ REMOVE not nil, remove the ADDRESSES."
 
 (provide 'spam)
 
-;;; spam.el ends here.
-
-(provide 'spam)
-
 ;;; spam.el ends here