;;; mm-util.el --- Utility functions for Mule and low level things
-;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
-;; 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+;; Copyright (C) 1998-2012 Free Software Foundation, Inc.
;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
;;; Code:
-;; For Emacs < 22.2.
+;; For Emacs <22.2 and XEmacs.
(eval-and-compile
(unless (fboundp 'declare-function) (defmacro declare-function (&rest r))))
(require 'timer)))
(defvar mm-mime-mule-charset-alist )
+;; Note this is not presently used on Emacs >= 23, which is good,
+;; since it means standalone message-mode (which requires mml and
+;; hence mml-util) does not load gnus-util.
+(autoload 'gnus-completing-read "gnus-util")
;; Emulate functions that are not available in every (X)Emacs version.
;; The name of a function is prefixed with mm-, like `mm-char-int' for
. ,(lambda (prompt)
"Return a charset."
(intern
- (completing-read
+ (gnus-completing-read
prompt
- (mapcar (lambda (e) (list (symbol-name (car e))))
+ (mapcar (lambda (e) (symbol-name (car e)))
mm-mime-mule-charset-alist)
- nil t))))
+ t))))
;; `subst-char-in-string' is not available in XEmacs 21.4.
(subst-char-in-string
. ,(lambda (from to string &optional inplace)
(defalias 'mm-decode-coding-region 'decode-coding-region)
(defalias 'mm-encode-coding-region 'encode-coding-region)))
-;; `string-to-multibyte' is available only in Emacs 22.1 or greater.
-(defalias 'mm-string-to-multibyte
- (cond
- ((featurep 'xemacs)
- 'identity)
- ((fboundp 'string-to-multibyte)
- 'string-to-multibyte)
- (t
- (lambda (string)
- "Return a multibyte string with the same individual chars as STRING."
- (mapconcat
- (lambda (ch) (mm-string-as-multibyte (char-to-string ch)))
- string "")))))
+;; `string-to-multibyte' is available only in Emacs.
+(defalias 'mm-string-to-multibyte (if (featurep 'xemacs)
+ 'identity
+ 'string-to-multibyte))
;; `char-or-char-int-p' is an XEmacs function, not available in Emacs.
(eval-and-compile
(t 'identity))))
;; `ucs-to-char' is a function that Mule-UCS provides.
-(if (featurep 'xemacs)
- (cond ((and (fboundp 'unicode-to-char) ;; XEmacs 21.5.
- (subrp (symbol-function 'unicode-to-char)))
- (if (featurep 'mule)
- (defalias 'mm-ucs-to-char 'unicode-to-char)
+(eval-and-compile
+ (if (featurep 'xemacs)
+ (cond ((and (fboundp 'unicode-to-char) ;; XEmacs 21.5.
+ (subrp (symbol-function 'unicode-to-char)))
+ (if (featurep 'mule)
+ (defalias 'mm-ucs-to-char 'unicode-to-char)
+ (defun mm-ucs-to-char (codepoint)
+ "Convert Unicode codepoint to character."
+ (or (unicode-to-char codepoint) ?#))))
+ ((featurep 'mule)
+ (defun mm-ucs-to-char (codepoint)
+ "Convert Unicode codepoint to character."
+ (if (fboundp 'ucs-to-char) ;; Mule-UCS is loaded.
+ (progn
+ (defalias 'mm-ucs-to-char
+ (lambda (codepoint)
+ "Convert Unicode codepoint to character."
+ (condition-case nil
+ (or (ucs-to-char codepoint) ?#)
+ (error ?#))))
+ (mm-ucs-to-char codepoint))
+ (condition-case nil
+ (or (int-to-char codepoint) ?#)
+ (error ?#)))))
+ (t
(defun mm-ucs-to-char (codepoint)
"Convert Unicode codepoint to character."
- (or (unicode-to-char codepoint) ?#))))
- ((featurep 'mule)
- (defun mm-ucs-to-char (codepoint)
- "Convert Unicode codepoint to character."
- (if (fboundp 'ucs-to-char) ;; Mule-UCS is loaded.
- (progn
- (defalias 'mm-ucs-to-char
- (lambda (codepoint)
- "Convert Unicode codepoint to character."
- (condition-case nil
- (or (ucs-to-char codepoint) ?#)
- (error ?#))))
- (mm-ucs-to-char codepoint))
(condition-case nil
(or (int-to-char codepoint) ?#)
(error ?#)))))
- (t
- (defun mm-ucs-to-char (codepoint)
- "Convert Unicode codepoint to character."
- (condition-case nil
- (or (int-to-char codepoint) ?#)
- (error ?#)))))
- (if (let ((char (make-char 'japanese-jisx0208 36 34)))
- (eq char (decode-char 'ucs char)))
- ;; Emacs 23.
- (defalias 'mm-ucs-to-char 'identity)
- (defun mm-ucs-to-char (codepoint)
- "Convert Unicode codepoint to character."
- (or (decode-char 'ucs codepoint) ?#))))
+ (if (let ((char (make-char 'japanese-jisx0208 36 34)))
+ (eq char (decode-char 'ucs char)))
+ ;; Emacs 23.
+ (defalias 'mm-ucs-to-char 'identity)
+ (defun mm-ucs-to-char (codepoint)
+ "Convert Unicode codepoint to character."
+ (or (decode-char 'ucs codepoint) ?#)))))
;; Fixme: This seems always to be used to read a MIME charset, so it
;; should be re-named and fixed (in Emacs) to offer completion only on
;; Actually, there should be an `mm-coding-system-mime-charset'.
(eval-and-compile
(defalias 'mm-read-coding-system
- (cond
- ((fboundp 'read-coding-system)
- (if (and (featurep 'xemacs)
- (<= (string-to-number emacs-version) 21.1))
- (lambda (prompt &optional default-coding-system)
- (read-coding-system prompt))
- 'read-coding-system))
- (t (lambda (prompt &optional default-coding-system)
- "Prompt the user for a coding system."
- (completing-read
- prompt (mapcar (lambda (s) (list (symbol-name (car s))))
- mm-mime-mule-charset-alist)))))))
+ (if (featurep 'emacs) 'read-coding-system
+ (cond
+ ((fboundp 'read-coding-system)
+ (if (and (featurep 'xemacs)
+ (<= (string-to-number emacs-version) 21.1))
+ (lambda (prompt &optional default-coding-system)
+ (read-coding-system prompt))
+ 'read-coding-system))
+ (t (lambda (prompt &optional default-coding-system)
+ "Prompt the user for a coding system."
+ (gnus-completing-read
+ prompt (mapcar (lambda (s) (symbol-name (car s)))
+ mm-mime-mule-charset-alist))))))))
(defvar mm-coding-system-list nil)
(defun mm-get-coding-system-list ()
;; no-MULE XEmacs:
(car (memq cs (mm-get-coding-system-list))))))
-(defun mm-codepage-setup (number &optional alias)
- "Create a coding system cpNUMBER.
-The coding system is created using `codepage-setup'. If ALIAS is
-non-nil, an alias is created and added to
-`mm-charset-synonym-alist'. If ALIAS is a string, it's used as
-the alias. Else windows-NUMBER is used."
- (interactive
- (let ((completion-ignore-case t)
- (candidates (if (fboundp 'cp-supported-codepages)
- (cp-supported-codepages)
- ;; Removed in Emacs 23 (unicode), so signal an error:
- (error "`codepage-setup' not present in this Emacs version"))))
- (list (completing-read "Setup DOS Codepage: (default 437) " candidates
- nil t nil nil "437"))))
- (when alias
- (setq alias (if (stringp alias)
- (intern alias)
- (intern (format "windows-%s" number)))))
- (let* ((cp (intern (format "cp%s" number))))
- (unless (mm-coding-system-p cp)
- (if (fboundp 'codepage-setup) ; silence compiler
- (codepage-setup number)
- (error "`codepage-setup' not present in this Emacs version")))
- (when (and alias
- ;; Don't add alias if setup of cp failed.
- (mm-coding-system-p cp))
- (add-to-list 'mm-charset-synonym-alist (cons alias cp)))))
-
(defvar mm-charset-synonym-alist
`(
;; Not in XEmacs, but it's not a proper MIME charset anyhow.
See `mm-codepage-iso-8859-list' and `mm-codepage-ibm-list'.")
+(defun mm-codepage-setup (number &optional alias)
+ "Create a coding system cpNUMBER.
+The coding system is created using `codepage-setup'. If ALIAS is
+non-nil, an alias is created and added to
+`mm-charset-synonym-alist'. If ALIAS is a string, it's used as
+the alias. Else windows-NUMBER is used."
+ (interactive
+ (let ((completion-ignore-case t)
+ (candidates (if (fboundp 'cp-supported-codepages)
+ (cp-supported-codepages)
+ ;; Removed in Emacs 23 (unicode), so signal an error:
+ (error "`codepage-setup' not present in this Emacs version"))))
+ (list (gnus-completing-read "Setup DOS Codepage" candidates
+ t nil nil "437"))))
+ (when alias
+ (setq alias (if (stringp alias)
+ (intern alias)
+ (intern (format "windows-%s" number)))))
+ (let* ((cp (intern (format "cp%s" number))))
+ (unless (mm-coding-system-p cp)
+ (if (fboundp 'codepage-setup) ; silence compiler
+ (codepage-setup number)
+ (error "`codepage-setup' not present in this Emacs version")))
+ (when (and alias
+ ;; Don't add alias if setup of cp failed.
+ (mm-coding-system-p cp))
+ (add-to-list 'mm-charset-synonym-alist (cons alias cp)))))
+
(defcustom mm-codepage-iso-8859-list
(list 1250 ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft
;; Outlook users in Czech republic. Use this to allow reading of
- ;; their e-mails. cp1250 should be defined by M-x codepage-setup
- ;; (Emacs 21).
+ ;; their e-mails.
'(1252 . 1) ;; Windows-1252 is a superset of iso-8859-1 (West
;; Europe). See also `gnus-article-dumbquotes-map'.
'(1254 . 9) ;; Windows-1254 is a superset of iso-8859-9 (Turkish).
;; Cf. http://thread.gmane.org/v9lkng5nwy.fsf@marauder.physik.uni-ulm.de
"List of IBM codepage numbers.
-The codepage mappings slighly differ between IBM and other vendors.
+The codepage mappings slightly differ between IBM and other vendors.
See \"ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/IBM/README.TXT\".
If an element is a number corresponding to a supported windows
(defcustom mm-charset-eval-alist
(if (featurep 'xemacs)
nil ;; I don't know what would be useful for XEmacs.
- '(;; Emacs 21 offers 1250 1251 1253 1257. Emacs 22 provides autoloads for
- ;; 1250-1258 (i.e. `mm-codepage-setup' does nothing).
+ '(;; Emacs 22 provides autoloads for 1250-1258
+ ;; (i.e. `mm-codepage-setup' does nothing).
(windows-1250 . (mm-codepage-setup 1250 t))
(windows-1251 . (mm-codepage-setup 1251 t))
(windows-1253 . (mm-codepage-setup 1253 t))
(let ((cs (cdr (assq charset mm-charset-override-alist))))
(and cs (mm-coding-system-p cs) cs))))
;; ascii
- ((eq charset 'us-ascii)
+ ((or (eq charset 'us-ascii)
+ (string-match "ansi.x3.4" (symbol-name charset)))
'ascii)
;; Check to see whether we can handle this charset. (This depends
;; on there being some coding system matching each `mime-charset'
Setting it to nil is useful on Emacsen supporting Unicode if sending
mail with multiple parts is preferred to sending a Unicode one.")
+(defvar mm-extra-numeric-entities
+ (mapcar
+ (lambda (item)
+ (cons (car item) (mm-ucs-to-char (cdr item))))
+ '((#x80 . #x20AC) (#x82 . #x201A) (#x83 . #x0192) (#x84 . #x201E)
+ (#x85 . #x2026) (#x86 . #x2020) (#x87 . #x2021) (#x88 . #x02C6)
+ (#x89 . #x2030) (#x8A . #x0160) (#x8B . #x2039) (#x8C . #x0152)
+ (#x8E . #x017D) (#x91 . #x2018) (#x92 . #x2019) (#x93 . #x201C)
+ (#x94 . #x201D) (#x95 . #x2022) (#x96 . #x2013) (#x97 . #x2014)
+ (#x98 . #x02DC) (#x99 . #x2122) (#x9A . #x0161) (#x9B . #x203A)
+ (#x9C . #x0153) (#x9E . #x017E) (#x9F . #x0178)))
+ "*Alist of extra numeric entities and characters other than ISO 10646.
+This table is used for decoding extra numeric entities to characters,
+like \"€\" to the euro sign, mainly in html messages.")
+
;;; Internal variables:
;;; Functions:
"Set the multibyte flag of the current buffer.
Only do this if the default value of `enable-multibyte-characters' is
non-nil. This is a no-op in XEmacs."
- (set-buffer-multibyte t)))
+ (set-buffer-multibyte 'to)))
(if (featurep 'xemacs)
(defalias 'mm-disable-multibyte 'ignore)
(defun mm-charset-after (&optional pos)
"Return charset of a character in current buffer at position POS.
-If POS is nil, it defauls to the current point.
+If POS is nil, it defaults to the current point.
If POS is out of range, the value is nil.
If the charset is `composition', return the actual one."
(let ((char (char-after pos)) charset)
(if (eq charset 'unknown)
(error "The message contains non-printable characters, please use attachment"))
(if (and (fboundp 'coding-system-get) (fboundp 'get-charset-property))
- ;; This exists in Emacs 20.
(or
(and (mm-preferred-coding-system charset)
(or (coding-system-get
;; This is for XEmacs.
(mm-mule-charset-to-mime-charset charset)))
+;; `delete-dups' is not available in XEmacs 21.4.
(if (fboundp 'delete-dups)
(defalias 'mm-delete-duplicates 'delete-dups)
(defun mm-delete-duplicates (list)
prog t (list t err-file) nil args)
jka-compr-acceptable-retval-list)
(erase-buffer)
- (insert (mapconcat
- 'identity
- (delete "" (split-string
- (prog2
- (insert-file-contents err-file)
- (buffer-string)
- (erase-buffer))))
- " ")
+ (insert (mapconcat 'identity
+ (split-string
+ (prog2
+ (insert-file-contents err-file)
+ (buffer-string)
+ (erase-buffer)) t)
+ " ")
"\n")
(setq err-msg
(format "Error while executing \"%s %s < %s\""
(unless filename
(setq filename buffer-file-name))
(save-excursion
- (let ((decomp (unless ;; No worth to examine charset of tar files.
+ (let ((decomp (unless ;; Not worth it to examine charset of tar files.
(and filename
(string-match
"\\.\\(?:tar\\.[^.]+\\|tbz\\|tgz\\)\\'"
(insert decomp)
(setq filename (file-name-sans-extension filename)))
(goto-char (point-min))
- (prog1
+ (unwind-protect
(cond
((boundp 'set-auto-coding-function) ;; Emacs
(if filename