X-Git-Url: http://cgit.sxemacs.org/?a=blobdiff_plain;f=lisp%2Fhtml2text.el;h=8c20547806e787164a2a5e05ac4dcfcd62364d8e;hb=c23b137e5e82992f99c3972fe871eaef9fb292a2;hp=27dc599c9b1d8ceb3fcf7169bebbb465ff3a4d02;hpb=a0d19b070402b1422b6a208419b761e35722b3fd;p=gnus diff --git a/lisp/html2text.el b/lisp/html2text.el index 27dc599c9..8c2054780 100644 --- a/lisp/html2text.el +++ b/lisp/html2text.el @@ -1,25 +1,23 @@ ;;; html2text.el --- a simple html to plain text converter -;; Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. +;; Copyright (C) 2002-2012 Free Software Foundation, Inc. ;; Author: Joakim Hove ;; This file is part of GNU Emacs. -;; GNU Emacs is free software; you can redistribute it and/or modify +;; GNU Emacs is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) -;; any later version. +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. ;; GNU Emacs is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License -;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -;; Boston, MA 02110-1301, USA. +;; along with GNU Emacs. If not, see . ;;; Commentary: @@ -93,7 +91,7 @@ completely verbatim - without any use of REGEXP.") This is a list of tags which should be removed, without any formatting. Note that tags in the list are presented *without* -any \"<\" or \">\". All occurences of a tag appearing in this +any \"<\" or \">\". All occurrences of a tag appearing in this list are removed, irrespective of whether it is a closing or opening tag, or if the tag has additional attributes. The deletion is done by the function `html2text-remove-tags'. @@ -125,7 +123,7 @@ If this list contains the element \"font\".") This is an alist where each dotted pair consists of a tag, and then the name of a function to be called when this tag is found. The function is called with the arguments p1, p2, p3 and p4. These are -demontrated below: +demonstrated below: \" This is bold text \" ^ ^ ^ ^ @@ -195,7 +193,7 @@ formatting, and then moved afterward.") ;; size=3 ((string-match "[^ ]=[^ ]" prev) (let ((attr (nth 0 (split-string prev "="))) - (value (nth 1 (split-string prev "=")))) + (value (substring prev (1+ (string-match "=" prev))))) (setq attr-list (cons (list attr value) attr-list)))) ;; size= 3 ((string-match "[^ ]=\\'" prev) @@ -206,7 +204,7 @@ formatting, and then moved afterward.") ;; size=3 ((string-match "[^ ]=[^ ]" this) (let ((attr (nth 0 (split-string this "="))) - (value (nth 1 (split-string this "=")))) + (value (substring prev (1+ (string-match "=" this))))) (setq attr-list (cons (list attr value) attr-list)))) ;; size =3 ((string-match "\\`=[^ ]" this) @@ -360,7 +358,8 @@ formatting, and then moved afterward.") (delete-region p1 p4) (when href (goto-char p1) - (insert (substring href 1 -1 )) + (insert (if (string-match "\\`['\"].*['\"]\\'" href) + (substring href 1 -1) href)) (put-text-property p1 (point) 'face 'bold)))) ;; @@ -385,10 +384,10 @@ formatting, and then moved afterward.") (setq refill-start (point)) (goto-char p2) (re-search-backward ".+[^<][^b][^r][^>]$" refill-start t) - (next-line 1) + (forward-line 1) (end-of-line) ;; refill-stop should ideally be adjusted to - ;; accomodate the "
" strings which are removed + ;; accommodate the "
" strings which are removed ;; between refill-start and refill-stop. Can simply ;; be returned from my-replace-string (setq refill-stop (+ (point) @@ -411,7 +410,7 @@ fashion, quite close to pure guess-work. It does work in some cases though." (while (re-search-forward "^
$" nil t) (delete-region (match-beginning 0) (match-end 0))) ;; Removing lonely
on a single line, if they are left intact we - ;; dont have any paragraphs at all. + ;; don't have any paragraphs at all. (goto-char (point-min)) (while (not (eobp)) (let ((p1 (point))) @@ -456,7 +455,9 @@ See the documentation for that variable." (p3) (p4)) (search-backward "<" (point-min) t) (setq p1 (point)) - (search-forward (format "" tag) (point-max) t) + (unless (search-forward (format "" tag) (point-max) t) + (goto-char p2) + (insert (format "" tag))) (setq p4 (point)) (search-backward " ;; (provide 'html2text) -;;; arch-tag: e9e57b79-35d4-4de1-a647-e7e01fe56d1e + ;;; html2text.el ends here