From 4ebfe404dc0192f5c96ea5ab6fc277b350e1a294 Mon Sep 17 00:00:00 2001 From: Katsumi Yamaoka Date: Tue, 7 Dec 2010 05:04:47 +0000 Subject: [PATCH] mm-util.el (mm-extra-numeric-entities): New variable. mm-url.el (mm-url-decode-entities): mm-decode.el (mm-shr): Use it to decode extra numeric entities. lpath.el: Fbind completion-at-point for Emacs 22 and XEmacs. --- lisp/ChangeLog | 9 +++++++++ lisp/lpath.el | 10 +++++----- lisp/mm-decode.el | 28 ++++++++++++++++++++-------- lisp/mm-url.el | 19 +++++++++++-------- lisp/mm-util.el | 15 +++++++++++++++ 5 files changed, 60 insertions(+), 21 deletions(-) diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 2483ca777..95e2e380a 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,12 @@ +2010-12-07 Katsumi Yamaoka + + * mm-util.el (mm-extra-numeric-entities): New variable. + + * mm-url.el (mm-url-decode-entities): + * mm-decode.el (mm-shr): Use it to decode extra numeric entities. + + * lpath.el: Fbind completion-at-point for Emacs 22 and XEmacs. + 2010-12-07 Stefan Monnier * message.el: Use completion-at-point. diff --git a/lisp/lpath.el b/lisp/lpath.el index 4fa928b02..14aa069dc 100644 --- a/lisp/lpath.el +++ b/lisp/lpath.el @@ -30,8 +30,8 @@ '(Info-index Info-index-next Info-menu bbdb-complete-name bookmark-default-handler bookmark-get-bookmark-record bookmark-make-record-default - bookmark-prop-get display-time-event-handler epg-check-configuration - find-coding-system frame-device gnutls-negotiate + bookmark-prop-get completion-at-point display-time-event-handler + epg-check-configuration find-coding-system frame-device gnutls-negotiate libxml-parse-html-region recenter-top-bottom rmail-swap-buffers-maybe shr-insert-document w3-do-setup w3-parse-buffer w3-prepare-buffer w3-region w3m-detect-meta-charset w3m-region)) @@ -57,9 +57,9 @@ '(bookmark-default-handler bookmark-get-bookmark-record bookmark-make-record-default bookmark-prop-get clear-string codepage-setup coding-system-from-name - cp-supported-codepages create-image delete-overlay detect-coding-string - display-time-event-handler epg-check-configuration event-click-count - event-end event-start find-coding-systems-for-charsets + completion-at-point cp-supported-codepages create-image delete-overlay + detect-coding-string display-time-event-handler epg-check-configuration + event-click-count event-end event-start find-coding-systems-for-charsets find-coding-systems-region find-coding-systems-string find-image float-time gnutls-negotiate help-buffer ido-completing-read image-size image-type-available-p insert-image libxml-parse-html-region diff --git a/lisp/mm-decode.el b/lisp/mm-decode.el index bd9e70414..216ed6624 100644 --- a/lisp/mm-decode.el +++ b/lisp/mm-decode.el @@ -1699,7 +1699,7 @@ If RECURSIVE, search recursively." (when handle (mm-with-part handle (buffer-string)))))) - shr-inhibit-images shr-blocked-images charset) + shr-inhibit-images shr-blocked-images charset char) (if (and (boundp 'gnus-summary-buffer) (buffer-name gnus-summary-buffer)) (with-current-buffer gnus-summary-buffer @@ -1714,13 +1714,25 @@ If RECURSIVE, search recursively." (narrow-to-region (point) (point)) (shr-insert-document (mm-with-part handle - (when (and charset - (setq charset (mm-charset-to-coding-system charset)) - (not (eq charset 'ascii))) - (insert (prog1 - (mm-decode-coding-string (buffer-string) charset) - (erase-buffer) - (mm-enable-multibyte)))) + (insert (prog1 + (if (and charset + (setq charset + (mm-charset-to-coding-system charset)) + (not (eq charset 'ascii))) + (mm-decode-coding-string (buffer-string) charset) + (mm-string-as-multibyte (buffer-string))) + (erase-buffer) + (mm-enable-multibyte))) + (goto-char (point-min)) + (setq case-fold-search t) + (while (re-search-forward + "&#\\(?:x\\([89][0-9a-f]\\)\\|\\(1[2-5][0-9]\\)\\);" nil t) + (when (setq char + (cdr (assq (if (match-beginning 1) + (string-to-number (match-string 1) 16) + (string-to-number (match-string 2))) + mm-extra-numeric-entities))) + (replace-match (char-to-string char)))) (libxml-parse-html-region (point-min) (point-max)))) (mm-handle-set-undisplayer handle diff --git a/lisp/mm-url.el b/lisp/mm-url.el index 0da136e1e..0c2b80c9c 100644 --- a/lisp/mm-url.el +++ b/lisp/mm-url.el @@ -365,16 +365,19 @@ If FOLLOW-REFRESH is non-nil, redirect refresh url in META." (defun mm-url-decode-entities () "Decode all HTML entities." (goto-char (point-min)) - (while (re-search-forward "&\\(#[0-9]+\\|#x[0-9a-f]+\\|[a-z]+[0-9]*\\);" nil t) + (while (re-search-forward "&\\(#[0-9]+\\|#x[0-9a-f]+\\|[a-z]+[0-9]*\\);" + nil t) (let* ((entity (match-string 1)) (elem (if (eq (aref entity 0) ?\#) - (let ((c (mm-ucs-to-char - ;; Hex number: ㈒ - (if (eq (aref entity 1) ?x) - (string-to-number (substring entity 2) - 16) - ;; Decimal number:  - (string-to-number (substring entity 1)))))) + (let ((c + ;; Hex number: ㈒ + (if (eq (aref entity 1) ?x) + (string-to-number (substring entity 2) + 16) + ;; Decimal number:  + (string-to-number (substring entity 1))))) + (setq c (or (cdr (assq c mm-extra-numeric-entities)) + (mm-ucs-to-char c))) (if (mm-char-or-char-int-p c) c ?#)) (or (cdr (assq (intern entity) mm-url-html-entities)) diff --git a/lisp/mm-util.el b/lisp/mm-util.el index 2f6464d43..af4ac588d 100644 --- a/lisp/mm-util.el +++ b/lisp/mm-util.el @@ -866,6 +866,21 @@ variable is set, it overrides the default priority." Setting it to nil is useful on Emacsen supporting Unicode if sending mail with multiple parts is preferred to sending a Unicode one.") +(defvar mm-extra-numeric-entities + (mapcar + (lambda (item) + (cons (car item) (mm-ucs-to-char (cdr item)))) + '((#x80 . #x20AC) (#x82 . #x201A) (#x83 . #x0192) (#x84 . #x201E) + (#x85 . #x2026) (#x86 . #x2020) (#x87 . #x2021) (#x88 . #x02C6) + (#x89 . #x2030) (#x8A . #x0160) (#x8B . #x2039) (#x8C . #x0152) + (#x8E . #x017D) (#x91 . #x2018) (#x92 . #x2019) (#x93 . #x201C) + (#x94 . #x201D) (#x95 . #x2022) (#x96 . #x2013) (#x97 . #x2014) + (#x98 . #x02DC) (#x99 . #x2122) (#x9A . #x0161) (#x9B . #x203A) + (#x9C . #x0153) (#x9E . #x017E) (#x9F . #x0178))) + "*Alist of extra numeric entities and characters other than ISO 10646. +This table is used for decoding extra numeric entities to characters, +like \"€\" to the euro sign, mainly in html messages.") + ;;; Internal variables: ;;; Functions: -- 2.25.1