;;; deuglify.el --- deuglify broken Outlook (Express) articles
-;; Copyright (C) 2002 Free Software Foundation, Inc.
-;; Copyright (C) 2001,2002 Raymond Scholz
+;; Copyright (C) 2001-2012 Free Software Foundation, Inc.
;; Author: Raymond Scholz <rscholz@zonix.de>
-;; Thomas Steffen (unwrapping algorithm,
-;; based on an idea of Stefan Monnier)
+;; Thomas Steffen
+;; (unwrapping algorithm, based on an idea of Stefan Monnier)
;; Keywords: mail, news
;; This file is part of GNU Emacs.
-;; GNU Emacs is free software; you can redistribute it and/or modify
+;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 2, or (at your option)
-;; any later version.
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
-;; along with GNU Emacs; see the file COPYING. If not, write to the
-;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA.
+;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;; > verb. This sentence no verb. This sentence no verb. This
;; > sentence no verb.
;;
-;; The function `gnus-outlook-unwrap-lines' tries to recognize those
+;; The function `gnus-article-outlook-unwrap-lines' tries to recognize those
;; erroneously wrapped lines and will unwrap them. I.e. putting the
;; wrapped parts ("no" in this example) back where they belong (at the
;; end of the cited line above).
;; > verb. This sentence no verb. This sentence no verb. This
;; > sentence no verb.
;;
-;; Unwrapping "You forgot in all your sentences." would be illegal as
+;; Unwrapping "You forgot in all your sentences." would be invalid as
;; this part wasn't intended to be cited text.
-;; `gnus-outlook-unwrap-lines' will only unwrap lines if the resulting
+;; `gnus-article-outlook-unwrap-lines' will only unwrap lines if the resulting
;; citation line will be of a certain maximum length. You can control
;; this by adjusting `gnus-outlook-deuglify-unwrap-max'. Also
;; unwrapping will only be done if the line above the (possibly)
;; > Bye, John
;;
;; Repairing the attribution line will be done by function
-;; `gnus-outlook-repair-attribution' which calls other function that
+;; `gnus-article-outlook-repair-attribution which calls other function that
;; try to recognize and repair broken attribution lines. See variable
;; `gnus-outlook-deuglify-attrib-cut-regexp' for stuff that should be
;; cut off from the beginning of an attribution line and variable
;;
;; Rearranging the article so that the cited text appears above the
;; new text will be done by function
-;; `gnus-outlook-rearrange-citation'. This function calls
-;; `gnus-outlook-repair-attribution' to find and repair an attribution
+;; `gnus-article-outlook-rearrange-citation'. This function calls
+;; `gnus-article-outlook-repair-attribution to find and repair an attribution
;; line.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Hey, John. There's no in all your sentences!
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
+;;
;; Usage
;; -----
;;
;; To automatically invoke deuglification on every article you read,
;; put something like that in your .gnus:
;;
-;; (add-hook 'gnus-article-decode-hook 'gnus-outlook-unwrap-lines)
+;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-unwrap-lines)
;;
;; or _one_ of the following lines:
;;
;; ;; repair broken attribution lines
-;; (add-hook 'gnus-article-decode-hook 'gnus-outlook-repair-attribution)
+;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-repair-attribution)
;;
;; ;; repair broken attribution lines and citations
-;; (add-hook 'gnus-article-decode-hook 'gnus-outlook-rearrange-citation)
+;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-rearrange-citation)
;;
;; Note that there always may be some false positives, so I suggest
;; using the manual invocation. After deuglification you may want to
;; -----------
;;
;; As I said before there may (or will) be a few false positives on
-;; unwrapping cited lines with `gnus-outlook-unwrap-lines'.
+;; unwrapping cited lines with `gnus-article-outlook-unwrap-lines'.
;;
-;; `gnus-outlook-repair-attribution' will only fix the first
+;; `gnus-article-outlook-repair-attribution will only fix the first
;; attribution line found in the article. Furthermore it fixed to
;; certain kinds of attributions. And there may be horribly many
;; false positives, vanishing lines and so on -- so don't trust your
;; eyes. Again I recommend manual invocation.
;;
-;; `gnus-outlook-rearrange-citation' carries all the limitations of
-;; `gnus-outlook-repair-attribution'.
+;; `gnus-article-outlook-rearrange-citation' carries all the limitations of
+;; `gnus-article-outlook-repair-attribution.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Renamed `gnus-outlook-deuglify-article' to
;; `gnus-article-outlook-deuglify-article'.
;; Made it easier to deuglify the article while being in Gnus' Article
-;; Edit Mode. (suggested by Phil Nitschke)
+;; Edit Mode. (suggested by Phil Nitschke)
;;
;;
;; Revision 1.3 2002/01/02 23:35:54 rscholz
;;; User Customizable Variables:
(defgroup gnus-outlook-deuglify nil
- "Deuglify articles generated by broken user agents like MS Outlook (Express).")
+ "Deuglify articles generated by broken user agents like MS Outlook (Express)."
+ :version "22.1"
+ :group 'gnus)
-;;;###autoload
(defcustom gnus-outlook-deuglify-unwrap-min 45
"Minimum length of the cited line above the (possibly) wrapped line."
- :type 'number
+ :version "22.1"
+ :type 'integer
:group 'gnus-outlook-deuglify)
-;;;###autoload
(defcustom gnus-outlook-deuglify-unwrap-max 95
"Maximum length of the cited line after unwrapping."
- :type 'number
+ :version "22.1"
+ :type 'integer
:group 'gnus-outlook-deuglify)
(defcustom gnus-outlook-deuglify-cite-marks ">|#%"
"Characters that indicate cited lines."
+ :version "22.1"
:type 'string
:group 'gnus-outlook-deuglify)
(defcustom gnus-outlook-deuglify-unwrap-stop-chars nil ;; ".?!" or nil
"Characters that inhibit unwrapping if they are the last one on the cited line above the possible wrapped line."
- :type 'string
+ :version "22.1"
+ :type '(radio (const :format "None " nil)
+ (string :value ".?!"))
:group 'gnus-outlook-deuglify)
(defcustom gnus-outlook-deuglify-no-wrap-chars "`"
"Characters that inhibit unwrapping if they are the first one in the possibly wrapped line."
+ :version "22.1"
:type 'string
:group 'gnus-outlook-deuglify)
(defcustom gnus-outlook-deuglify-attrib-cut-regexp
"\\(On \\|Am \\)?\\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\),[^,]+, "
"Regular expression matching the beginning of an attribution line that should be cut off."
+ :version "22.1"
:type 'string
:group 'gnus-outlook-deuglify)
(defcustom gnus-outlook-deuglify-attrib-verb-regexp
- "wrote\\|writes\\|says\\|schrieb\\|schreibt\\|meinte\\|skrev\\|a écrit\\|schreef"
+ "wrote\\|writes\\|says\\|schrieb\\|schreibt\\|meinte\\|skrev\\|a écrit\\|schreef\\|escribió"
"Regular expression matching the verb used in an attribution line."
+ :version "22.1"
:type 'string
:group 'gnus-outlook-deuglify)
(defcustom gnus-outlook-deuglify-attrib-end-regexp
": *\\|\\.\\.\\."
"Regular expression matching the end of an attribution line."
+ :version "22.1"
:type 'string
:group 'gnus-outlook-deuglify)
+(defcustom gnus-outlook-display-hook nil
+ "A hook called after an deuglified article has been prepared.
+It is run after `gnus-article-prepare-hook'."
+ :version "22.1"
+ :type 'hook
+ :group 'gnus-outlook-deuglify)
;; Functions
+(defun gnus-outlook-display-article-buffer ()
+ "Redisplay current buffer or article buffer."
+ (with-current-buffer (or gnus-article-buffer (current-buffer))
+ ;; "Emulate" `gnus-article-prepare-display' without calling
+ ;; it. Calling `gnus-article-prepare-display' on an already
+ ;; prepared article removes all MIME parts. I'm unsure whether
+ ;; this is a bug or not.
+ (gnus-article-highlight t)
+ (gnus-treat-article nil)
+ (gnus-run-hooks 'gnus-article-prepare-hook
+ 'gnus-outlook-display-hook)))
+
;;;###autoload
-(defun gnus-outlook-unwrap-lines ()
+(defun gnus-article-outlook-unwrap-lines (&optional nodisplay)
"Unwrap lines that appear to be wrapped citation lines.
You can control what lines will be unwrapped by frobbing
-`gnus-outlook-deuglify-unwrap-min' and
-`gnus-outlook-deuglify-unwrap-max', indicating the miminum and maximum
-length of an unwrapped citation line."
- (interactive)
- (save-excursion
- (let ((case-fold-search nil)
- (inhibit-read-only t)
- (cite-marks gnus-outlook-deuglify-cite-marks)
- (no-wrap gnus-outlook-deuglify-no-wrap-chars)
- (stop-chars gnus-outlook-deuglify-unwrap-stop-chars))
- (gnus-with-article-buffer
- (article-goto-body)
- (while (re-search-forward
- (concat
- "^\\([ \t" cite-marks "]*\\)"
- "\\([" cite-marks "].*[^\n " stop-chars "]\\)[ \t]?\n"
- "\\1\\([^\n " cite-marks no-wrap "]+.*\\)$")
+`gnus-outlook-deuglify-unwrap-min' and `gnus-outlook-deuglify-unwrap-max',
+indicating the minimum and maximum length of an unwrapped citation line. If
+NODISPLAY is non-nil, don't redisplay the article buffer."
+ (interactive "P")
+ (let ((case-fold-search nil)
+ (inhibit-read-only t)
+ (cite-marks gnus-outlook-deuglify-cite-marks)
+ (no-wrap gnus-outlook-deuglify-no-wrap-chars)
+ (stop-chars gnus-outlook-deuglify-unwrap-stop-chars))
+ (gnus-with-article-buffer
+ (article-goto-body)
+ (while (re-search-forward
+ (concat
+ "^\\([ \t" cite-marks "]*\\)"
+ "\\([" cite-marks "].*[^\n " stop-chars "]\\)[ \t]?\n"
+ "\\1\\([^\n " cite-marks no-wrap "]+.*\\)$")
nil t)
- (let ((len12 (- (match-end 2) (match-beginning 1)))
+ (let ((len12 (- (match-end 2) (match-beginning 1)))
(len3 (- (match-end 3) (match-beginning 3))))
- (if (and (> len12 gnus-outlook-deuglify-unwrap-min)
+ (when (and (> len12 gnus-outlook-deuglify-unwrap-min)
(< (+ len12 len3) gnus-outlook-deuglify-unwrap-max))
- (progn
- (replace-match "\\1\\2 \\3")
- (goto-char (match-beginning 0))))))))))
+ (replace-match "\\1\\2 \\3")
+ (goto-char (match-beginning 0)))))))
+ (unless nodisplay (gnus-outlook-display-article-buffer)))
(defun gnus-outlook-rearrange-article (attr-start)
- "Put the text from `attr-start' to the end of buffer at the top of the article buffer."
- (save-excursion
- (let ((inhibit-read-only t)
- (cite-marks gnus-outlook-deuglify-cite-marks))
- (gnus-with-article-buffer
- (article-goto-body)
- ;; article does not start with attribution
- (unless (= (point) attr-start)
- (gnus-kill-all-overlays)
- (let ((cur (point))
- ;; before signature or end of buffer
- (to (if (gnus-article-search-signature)
- (point)
- (point-max))))
- ;; handle the case where the full quote is below the
- ;; signature
- (if (< to attr-start)
- (setq to (point-max)))
- (transpose-regions cur attr-start attr-start to)))))))
+ "Put the text from ATTR-START to the end of buffer at the top of the article buffer."
+ ;; FIXME: 1. (*) text/plain ( ) text/html
+ (let ((inhibit-read-only t)
+ (cite-marks gnus-outlook-deuglify-cite-marks))
+ (gnus-with-article-buffer
+ (article-goto-body)
+ ;; article does not start with attribution
+ (unless (= (point) attr-start)
+ (gnus-kill-all-overlays)
+ (let ((cur (point))
+ ;; before signature or end of buffer
+ (to (if (gnus-article-search-signature)
+ (point)
+ (point-max))))
+ ;; handle the case where the full quote is below the
+ ;; signature
+ (when (< to attr-start)
+ (setq to (point-max)))
+ (save-excursion
+ (narrow-to-region attr-start to)
+ (goto-char attr-start)
+ (forward-line)
+ (unless (looking-at ">")
+ (message-indent-citation (point) (point-max) 'yank-only)
+ (goto-char (point-max))
+ (newline)
+ (setq to (point-max)))
+ (widen))
+ (transpose-regions cur attr-start attr-start to))))))
;; John Doe <john.doe@some.domain> wrote in message
;; news:a87usw8$dklsssa$2@some.news.server...
(defun gnus-outlook-repair-attribution-outlook ()
"Repair a broken attribution line (Outlook)."
- (save-excursion
- (let ((case-fold-search nil)
- (inhibit-read-only t)
- (cite-marks gnus-outlook-deuglify-cite-marks))
- (gnus-with-article-buffer
- (article-goto-body)
- (if (re-search-forward
+ (let ((case-fold-search nil)
+ (inhibit-read-only t)
+ (cite-marks gnus-outlook-deuglify-cite-marks))
+ (gnus-with-article-buffer
+ (article-goto-body)
+ (when (re-search-forward
(concat "^\\([^" cite-marks "].+\\)"
"\\(" gnus-outlook-deuglify-attrib-verb-regexp "\\)"
"\\(.*\n?[^\n" cite-marks "].*\\)?"
"\\(" gnus-outlook-deuglify-attrib-end-regexp "\\)$")
nil t)
- (progn
- (gnus-kill-all-overlays)
- (replace-match "\\1\\2\\4")
- (match-beginning 0)))))))
+ (gnus-kill-all-overlays)
+ (replace-match "\\1\\2\\4")
+ (match-beginning 0)))))
;; ----- Original Message -----
(defun gnus-outlook-repair-attribution-block ()
"Repair a big broken attribution block."
- (save-excursion
- (let ((case-fold-search nil)
- (inhibit-read-only t)
- (cite-marks gnus-outlook-deuglify-cite-marks))
- (gnus-with-article-buffer
- (article-goto-body)
- (if (re-search-forward
- (concat "^[" cite-marks " \t]*----* ?[^-]+ [^-]+ ?----*\n"
+ (let ((case-fold-search nil)
+ (inhibit-read-only t)
+ (cite-marks gnus-outlook-deuglify-cite-marks))
+ (gnus-with-article-buffer
+ (article-goto-body)
+ (when (re-search-forward
+ (concat "^[" cite-marks " \t]*--* ?[^-]+ [^-]+ ?--*\\s *\n"
"[^\n:]+:[ \t]*\\([^\n]+\\)\n"
"\\([^\n:]+:[ \t]*[^\n]+\n\\)+")
nil t)
- (progn
- (gnus-kill-all-overlays)
- (replace-match "\\1 wrote:\n")
- (match-beginning 0)))))))
+ (gnus-kill-all-overlays)
+ (replace-match "\\1 wrote:\n")
+ (match-beginning 0)))))
;; On Wed, 16 Jan 2002 23:23:30 +0100, John Doe <john.doe@some.domain> wrote:
(defun gnus-outlook-repair-attribution-other ()
"Repair a broken attribution line (other user agents than Outlook)."
- (save-excursion
- (let ((case-fold-search nil)
- (inhibit-read-only t)
- (cite-marks gnus-outlook-deuglify-cite-marks))
- (gnus-with-article-buffer
- (article-goto-body)
- (if (re-search-forward
+ (let ((case-fold-search nil)
+ (inhibit-read-only t)
+ (cite-marks gnus-outlook-deuglify-cite-marks))
+ (gnus-with-article-buffer
+ (article-goto-body)
+ (when (re-search-forward
(concat "^\\("gnus-outlook-deuglify-attrib-cut-regexp"\\)?"
"\\([^" cite-marks "].+\\)\n\\([^\n" cite-marks "].*\\)?"
"\\(" gnus-outlook-deuglify-attrib-verb-regexp "\\).*"
"\\(" gnus-outlook-deuglify-attrib-end-regexp "\\)$")
nil t)
- (progn
- (gnus-kill-all-overlays)
- (replace-match "\\4 \\5\\6\\7")
- (match-beginning 0)))))))
+ (gnus-kill-all-overlays)
+ (replace-match "\\4 \\5\\6\\7")
+ (match-beginning 0)))))
;;;###autoload
-(defun gnus-outlook-repair-attribution ()
- "Repair a broken attribution line."
- (interactive)
- (or
- (gnus-outlook-repair-attribution-other)
- (gnus-outlook-repair-attribution-block)
- (gnus-outlook-repair-attribution-outlook)))
-
-(defun gnus-outlook-rearrange-citation ()
- "Repair broken citations."
- (let ((attrib-start (gnus-outlook-repair-attribution)))
+(defun gnus-article-outlook-repair-attribution (&optional nodisplay)
+ "Repair a broken attribution line.
+If NODISPLAY is non-nil, don't redisplay the article buffer."
+ (interactive "P")
+ (let ((attrib-start
+ (or
+ (gnus-outlook-repair-attribution-other)
+ (gnus-outlook-repair-attribution-block)
+ (gnus-outlook-repair-attribution-outlook))))
+ (unless nodisplay (gnus-outlook-display-article-buffer))
+ attrib-start))
+
+(defun gnus-article-outlook-rearrange-citation (&optional nodisplay)
+ "Repair broken citations.
+If NODISPLAY is non-nil, don't redisplay the article buffer."
+ (interactive "P")
+ (let ((attrib-start (gnus-article-outlook-repair-attribution 'nodisplay)))
;; rearrange citations if an attribution line has been recognized
(if attrib-start
- (gnus-outlook-rearrange-article attrib-start))))
+ (gnus-outlook-rearrange-article attrib-start)))
+ (unless nodisplay (gnus-outlook-display-article-buffer)))
;;;###autoload
-(defun gnus-outlook-deuglify-article ()
- "Deuglify broken Outlook (Express) articles."
- (interactive)
+(defun gnus-outlook-deuglify-article (&optional nodisplay)
+ "Full deuglify of broken Outlook (Express) articles.
+Treat dumbquotes, unwrap lines, repair attribution and rearrange citation. If
+NODISPLAY is non-nil, don't redisplay the article buffer."
+ (interactive "P")
;; apply treatment of dumb quotes
(gnus-article-treat-dumbquotes)
;; repair wrapped cited lines
- (gnus-outlook-unwrap-lines)
- ;; repair attribution line
- (gnus-outlook-rearrange-citation))
+ (gnus-article-outlook-unwrap-lines 'nodisplay)
+ ;; repair attribution line and rearrange citation.
+ (gnus-article-outlook-rearrange-citation 'nodisplay)
+ (unless nodisplay (gnus-outlook-display-article-buffer)))
;;;###autoload
(defun gnus-article-outlook-deuglify-article ()
"Deuglify broken Outlook (Express) articles and redisplay."
(interactive)
- (gnus-outlook-deuglify-article)
- (with-current-buffer (or gnus-article-buffer (current-buffer))
- ;; "Emulate" `gnus-article-prepare-display' without calling
- ;; it. Calling `gnus-article-prepare-display' on an already
- ;; prepared article removes all MIME parts. I'm unsure whether
- ;; this is a bug or not.
- (gnus-article-highlight t)
- (gnus-treat-article nil)
- (gnus-run-hooks 'gnus-article-prepare-hook)))
+ (gnus-outlook-deuglify-article nil))
(provide 'deuglify)