X-Git-Url: https://cgit.sxemacs.org/?p=gnus;a=blobdiff_plain;f=lisp%2Fspam-stat.el;h=d6b20df78b8f1da09dca43339a65da80b1e2a629;hp=6e5ec9d981ad0f0f2efaa7548878f248ce259eed;hb=c9a393eeb329a99695566342a9f03b8a30000898;hpb=17670331c118a468aff4e08a4f688219ec4a8eec diff --git a/lisp/spam-stat.el b/lisp/spam-stat.el index 6e5ec9d98..d6b20df78 100644 --- a/lisp/spam-stat.el +++ b/lisp/spam-stat.el @@ -1,6 +1,7 @@ ;;; spam-stat.el --- detecting spam based on statistics -;; Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc. +;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, +;; 2010 Free Software Foundation, Inc. ;; Author: Alex Schroeder ;; Keywords: network @@ -8,20 +9,18 @@ ;; This file is part of GNU Emacs. -;; This is free software; you can redistribute it and/or modify it -;; under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) -;; any later version. +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. -;; This is distributed in the hope that it will be useful, but WITHOUT -;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public -;; License for more details. +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License -;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. +;; along with GNU Emacs. If not, see . ;;; Commentary: @@ -124,12 +123,14 @@ ;;; Code: (require 'mail-parse) +(defvar gnus-original-article-buffer) + (defgroup spam-stat nil "Statistical spam detection for Emacs. Use the functions to build a dictionary of words and their statistical distribution in spam and non-spam mails. Then use a function to determine whether a buffer contains spam or not." - :version "21.4" + :version "22.1" :group 'gnus) (defcustom spam-stat-file "~/.spam-stat.el" @@ -168,7 +169,7 @@ no effect when spam-stat is invoked through spam.el." :type 'string :group 'spam-stat) -(defcustom spam-stat-split-fancy-spam-threshhold 0.9 +(defcustom spam-stat-split-fancy-spam-threshold 0.9 "Spam score threshold in spam-stat-split-fancy." :type 'number :group 'spam-stat) @@ -178,13 +179,33 @@ no effect when spam-stat is invoked through spam.el." :type 'hook :group 'spam-stat) +(defcustom spam-stat-score-buffer-user-functions nil + "List of additional scoring functions. +Called one by one on the buffer. + +If all of these functions return non-nil answers, these numerical +answers are added to the computed spam stat score on the buffer. If +you defun such functions, make sure they don't return the buffer in a +narrowed state or such: use, for example, `save-excursion'. Each of +your functions is also passed the initial spam-stat score which might +aid in your scoring. + +Also be careful when defining such functions. If they take a long +time, they will slow down your mail splitting. Thus, if the buffer is +large, don't forget to use smaller regions, by wrapping your work in, +say, `with-spam-stat-max-buffer-size'." + :type '(repeat sexp) + :group 'spam-stat) + (defcustom spam-stat-process-directory-age 90 "Max. age of files to be processed in directory, in days. When using `spam-stat-process-spam-directory' or `spam-stat-process-non-spam-directory', only files that have been touched in this many days will be considered. Without this filter, re-training spam-stat with several thousand messages -will start to take a very long time.") +will start to take a very long time." + :type 'number + :group 'spam-stat) (defvar spam-stat-last-saved-at nil "Time stamp of last change of spam-stat-file on this run") @@ -246,6 +267,9 @@ Use `spam-stat-ngood', `spam-stat-nbad', `spam-stat-good', (defvar spam-stat-nbad 0 "The number of bad mails in the dictionary.") +(defvar spam-stat-error-holder nil + "A holder for condition-case errors while scoring buffers.") + (defsubst spam-stat-good (entry) "Return the number of times this word belongs to good mails." (aref entry 0)) @@ -352,6 +376,8 @@ Use `spam-stat-ngood', `spam-stat-nbad', `spam-stat-good', (spam-stat-buffer-words)) (setq spam-stat-dirty t)) +(autoload 'gnus-message "gnus-util") + (defun spam-stat-buffer-change-to-spam () "Consider current buffer no longer normal mail but spam." (setq spam-stat-nbad (1+ spam-stat-nbad) @@ -417,12 +443,12 @@ spam-stat (spam-stat-to-hash-table '(" spam-stat-ngood spam-stat-nbad)) (null spam-stat-last-saved-at) (not (equal spam-stat-last-saved-at (nth 5 (file-attributes spam-stat-file))))) - (progn + (progn (load-file spam-stat-file) (setq spam-stat-dirty nil - spam-stat-last-saved-at + spam-stat-last-saved-at (nth 5 (file-attributes spam-stat-file))))) - (t (message "Spam stat file not loaded: no change in disk.."))))) + (t (message "Spam stat file not loaded: no change in disk."))))) (defun spam-stat-to-hash-table (entries) "Turn list ENTRIES into a hash table and store as `spam-stat'. @@ -476,40 +502,64 @@ where DIFF is the difference between SCORE and 0.5." result)) (defun spam-stat-score-buffer () - "Return a score describing the spam-probability for this buffer." + "Return a score describing the spam-probability for this buffer. +Add user supplied modifications if supplied." + (interactive) ; helps in debugging. (setq spam-stat-score-data (spam-stat-buffer-words-with-scores)) (let* ((probs (mapcar 'cadr spam-stat-score-data)) - (prod (apply #'* probs))) - (/ prod (+ prod (apply #'* (mapcar #'(lambda (x) (- 1 x)) - probs)))))) + (prod (apply #'* probs)) + (score0 + (/ prod (+ prod (apply #'* (mapcar #'(lambda (x) (- 1 x)) + probs))))) + (score1s + (condition-case + spam-stat-error-holder + (spam-stat-score-buffer-user score0) + (error nil))) + (ans + (if score1s (+ score0 score1s) score0))) + (when (interactive-p) + (message "%S" ans)) + ans)) + +(defun spam-stat-score-buffer-user (&rest args) + (let* ((scores + (mapcar + (lambda (fn) + (apply fn args)) + spam-stat-score-buffer-user-functions))) + (if (memq nil scores) nil + (apply #'+ scores)))) (defun spam-stat-split-fancy () "Return the name of the spam group if the current mail is spam. Use this function on `nnmail-split-fancy'. If you are interested in the raw data used for the last run of `spam-stat-score-buffer', check the variable `spam-stat-score-data'." - (condition-case var + (condition-case spam-stat-error-holder (progn (set-buffer spam-stat-buffer) (goto-char (point-min)) - (when (> (spam-stat-score-buffer) spam-stat-split-fancy-spam-threshhold) + (when (> (spam-stat-score-buffer) spam-stat-split-fancy-spam-threshold) (when (boundp 'nnmail-split-trace) (mapc (lambda (entry) (push entry nnmail-split-trace)) spam-stat-score-data)) spam-stat-split-fancy-spam-group)) - (error (message "Error in spam-stat-split-fancy: %S" var) + (error (message "Error in spam-stat-split-fancy: %S" spam-stat-error-holder) nil))) ;; Testing (defun spam-stat-strip-xref () - "Strip the the Xref header." + "Strip the Xref header." (save-restriction (mail-narrow-to-head) (when (re-search-forward "^Xref:.*\n" nil t) (delete-region (match-beginning 0) (match-end 0))))) +(autoload 'time-to-number-of-days "time-date") + (defun spam-stat-process-directory (dir func) "Process all the regular files in directory DIR using function FUNC." (let* ((files (directory-files dir t "^[^.]")) @@ -624,5 +674,4 @@ COUNT defaults to 5" (provide 'spam-stat) -;;; arch-tag: ff1d2200-8ddb-42fb-bb7b-1b5e20448554 ;;; spam-stat.el ends here