X-Git-Url: https://cgit.sxemacs.org/?a=blobdiff_plain;f=lisp%2Fhtml2text.el;h=6411eb62564d983ddb8a9279622f8eefd79335b1;hb=02bea9e4a1bdc9ed57def6e2fd615a2323b6abcc;hp=7a58e38b60552ff5b6285f32322e3f45121e87f3;hpb=fa8afa0628f8922d4cb80d3b6a7de7aa3ba3a6e5;p=gnus diff --git a/lisp/html2text.el b/lisp/html2text.el index 7a58e38b6..6411eb625 100644 --- a/lisp/html2text.el +++ b/lisp/html2text.el @@ -1,24 +1,23 @@ ;;; html2text.el --- a simple html to plain text converter -;; Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. + +;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. ;; Author: Joakim Hove ;; This file is part of GNU Emacs. -;; GNU Emacs is free software; you can redistribute it and/or modify +;; GNU Emacs is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) -;; any later version. +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. ;; GNU Emacs is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License -;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. +;; along with GNU Emacs. If not, see . ;;; Commentary: @@ -42,8 +41,42 @@ (defvar html2text-format-single-element-list '(("hr" . html2text-clean-hr))) (defvar html2text-replace-list - '((" " . " ") (">" . ">") ("<" . "<") (""" . "\"") - ("&" . "&") ("'" . "'")) + '(("´" . "`") + ("&" . "&") + ("'" . "'") + ("¦" . "|") + ("¢" . "c") + ("ˆ" . "^") + ("©" . "(C)") + ("¤" . "(#)") + ("°" . "degree") + ("÷" . "/") + ("€" . "e") + ("½" . "1/2") + (">" . ">") + ("¿" . "?") + ("«" . "<<") + ("&ldquo" . "\"") + ("‹" . "(") + ("‘" . "`") + ("<" . "<") + ("—" . "--") + (" " . " ") + ("–" . "-") + ("‰" . "%%") + ("±" . "+-") + ("£" . "£") + (""" . "\"") + ("»" . ">>") + ("&rdquo" . "\"") + ("®" . "(R)") + ("›" . ")") + ("’" . "'") + ("§" . "§") + ("¹" . "^1") + ("²" . "^2") + ("³" . "^3") + ("˜" . "~")) "The map of entity to text. This is an alist were each element is a dotted pair consisting of an @@ -58,7 +91,7 @@ completely verbatim - without any use of REGEXP.") This is a list of tags which should be removed, without any formatting. Note that tags in the list are presented *without* -any \"<\" or \">\". All occurences of a tag appearing in this +any \"<\" or \">\". All occurrences of a tag appearing in this list are removed, irrespective of whether it is a closing or opening tag, or if the tag has additional attributes. The deletion is done by the function `html2text-remove-tags'. @@ -350,10 +383,10 @@ formatting, and then moved afterward.") (setq refill-start (point)) (goto-char p2) (re-search-backward ".+[^<][^b][^r][^>]$" refill-start t) - (next-line 1) + (forward-line 1) (end-of-line) ;; refill-stop should ideally be adjusted to - ;; accomodate the "
" strings which are removed + ;; accommodate the "
" strings which are removed ;; between refill-start and refill-stop. Can simply ;; be returned from my-replace-string (setq refill-stop (+ (point) @@ -421,7 +454,9 @@ See the documentation for that variable." (p3) (p4)) (search-backward "<" (point-min) t) (setq p1 (point)) - (search-forward (format "" tag) (point-max) t) + (unless (search-forward (format "" tag) (point-max) t) + (goto-char p2) + (insert (format "" tag))) (setq p4 (point)) (search-backward " ;; (provide 'html2text) -;;; arch-tag: e9e57b79-35d4-4de1-a647-e7e01fe56d1e + ;;; html2text.el ends here