X-Git-Url: https://cgit.sxemacs.org/?a=blobdiff_plain;f=lisp%2Fhtml2text.el;h=68e75196c876bef171139d3c95fd2c9a3d814f81;hb=c6b6290917b2629807df2f4bfa382e849495e5df;hp=de9cf6474817605d995dc189a9cde75f18bc75a1;hpb=980ec083c4ae7b05170514b431b0a07311946114;p=gnus diff --git a/lisp/html2text.el b/lisp/html2text.el index de9cf6474..68e75196c 100644 --- a/lisp/html2text.el +++ b/lisp/html2text.el @@ -1,25 +1,23 @@ -;;; html2text.el --- a simple html to plain text converter +;;; html2text.el --- a simple html to plain text converter -*- coding: utf-8 -*- -;; Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. +;; Copyright (C) 2002-2013 Free Software Foundation, Inc. ;; Author: Joakim Hove ;; This file is part of GNU Emacs. -;; GNU Emacs is free software; you can redistribute it and/or modify +;; GNU Emacs is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) -;; any later version. +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. ;; GNU Emacs is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License -;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -;; Boston, MA 02110-1301, USA. +;; along with GNU Emacs. If not, see . ;;; Commentary: @@ -67,14 +65,14 @@ ("–" . "-") ("‰" . "%%") ("±" . "+-") - ("£" . "£") + ("£" . "£") (""" . "\"") ("»" . ">>") ("&rdquo" . "\"") ("®" . "(R)") ("›" . ")") ("’" . "'") - ("§" . "§") + ("§" . "§") ("¹" . "^1") ("²" . "^2") ("³" . "^3") @@ -125,7 +123,7 @@ If this list contains the element \"font\".") This is an alist where each dotted pair consists of a tag, and then the name of a function to be called when this tag is found. The function is called with the arguments p1, p2, p3 and p4. These are -demontrated below: +demonstrated below: \" This is bold text \" ^ ^ ^ ^ @@ -195,7 +193,7 @@ formatting, and then moved afterward.") ;; size=3 ((string-match "[^ ]=[^ ]" prev) (let ((attr (nth 0 (split-string prev "="))) - (value (nth 1 (split-string prev "=")))) + (value (substring prev (1+ (string-match "=" prev))))) (setq attr-list (cons (list attr value) attr-list)))) ;; size= 3 ((string-match "[^ ]=\\'" prev) @@ -206,7 +204,7 @@ formatting, and then moved afterward.") ;; size=3 ((string-match "[^ ]=[^ ]" this) (let ((attr (nth 0 (split-string this "="))) - (value (nth 1 (split-string this "=")))) + (value (substring prev (1+ (string-match "=" this))))) (setq attr-list (cons (list attr value) attr-list)))) ;; size =3 ((string-match "\\`=[^ ]" this) @@ -360,7 +358,8 @@ formatting, and then moved afterward.") (delete-region p1 p4) (when href (goto-char p1) - (insert (substring href 1 -1 )) + (insert (if (string-match "\\`['\"].*['\"]\\'" href) + (substring href 1 -1) href)) (put-text-property p1 (point) 'face 'bold)))) ;; @@ -385,10 +384,10 @@ formatting, and then moved afterward.") (setq refill-start (point)) (goto-char p2) (re-search-backward ".+[^<][^b][^r][^>]$" refill-start t) - (next-line 1) + (forward-line 1) (end-of-line) ;; refill-stop should ideally be adjusted to - ;; accomodate the "
" strings which are removed + ;; accommodate the "
" strings which are removed ;; between refill-start and refill-stop. Can simply ;; be returned from my-replace-string (setq refill-stop (+ (point) @@ -411,7 +410,7 @@ fashion, quite close to pure guess-work. It does work in some cases though." (while (re-search-forward "^
$" nil t) (delete-region (match-beginning 0) (match-end 0))) ;; Removing lonely
on a single line, if they are left intact we - ;; dont have any paragraphs at all. + ;; don't have any paragraphs at all. (goto-char (point-min)) (while (not (eobp)) (let ((p1 (point))) @@ -456,7 +455,9 @@ See the documentation for that variable." (p3) (p4)) (search-backward "<" (point-min) t) (setq p1 (point)) - (search-forward (format "" tag) (point-max) t) + (unless (search-forward (format "" tag) (point-max) t) + (goto-char p2) + (insert (format "" tag))) (setq p4 (point)) (search-backward " ;; (provide 'html2text) -;;; arch-tag: e9e57b79-35d4-4de1-a647-e7e01fe56d1e + ;;; html2text.el ends here