;;; mm-util.el --- Utility functions for Mule and low level things
;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004,
-;; 2005, 2006 Free Software Foundation, Inc.
+;; 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 2, or (at your option)
+;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;;; Code:
+;; For Emacs < 22.2.
+(eval-and-compile
+ (unless (fboundp 'declare-function) (defmacro declare-function (&rest r))))
+
(eval-when-compile (require 'cl))
(require 'mail-prsvr)
(eval-and-compile
- (mapcar
+ (if (featurep 'xemacs)
+ (unless (ignore-errors
+ (require 'timer-funcs))
+ (require 'timer))
+ (require 'timer)))
+
+(defvar mm-mime-mule-charset-alist )
+
+(eval-and-compile
+ (mapc
(lambda (elem)
(let ((nfunc (intern (format "mm-%s" (car elem)))))
(if (fboundp (car elem))
(defalias nfunc (car elem))
(defalias nfunc (cdr elem)))))
- '((decode-coding-string . (lambda (s a) s))
- (encode-coding-string . (lambda (s a) s))
- (encode-coding-region . ignore)
- (coding-system-list . ignore)
- (decode-coding-region . ignore)
+ '((coding-system-list . ignore)
(char-int . identity)
(coding-system-equal . equal)
(annotationp . ignore)
;; (string-to-multibyte s) ~= (decode-coding-string s 'binary)
;; (string-make-multibyte s) ~= (decode-coding-string s locale-coding-system)
(string-as-multibyte . identity)
- (string-to-multibyte
- . (lambda (string)
- "Return a multibyte string with the same individual chars as string."
- (mapconcat
- (lambda (ch) (mm-string-as-multibyte (char-to-string ch)))
- string "")))
(multibyte-string-p . ignore)
(insert-byte . insert-char)
(multibyte-char-to-unibyte . identity)
+ (set-buffer-multibyte . ignore)
(special-display-p
. (lambda (buffer-name)
"Returns non-nil if a buffer named BUFFER-NAME gets a special frame."
(string-match (car elem) buffer-name)
(throw 'return (cdr elem))))))))))))
+(eval-and-compile
+ (if (featurep 'xemacs)
+ (if (featurep 'file-coding)
+ ;; Don't modify string if CODING-SYSTEM is nil.
+ (progn
+ (defun mm-decode-coding-string (str coding-system)
+ (if coding-system
+ (decode-coding-string str coding-system)
+ str))
+ (defun mm-encode-coding-string (str coding-system)
+ (if coding-system
+ (encode-coding-string str coding-system)
+ str))
+ (defun mm-decode-coding-region (start end coding-system)
+ (if coding-system
+ (decode-coding-region start end coding-system)))
+ (defun mm-encode-coding-region (start end coding-system)
+ (if coding-system
+ (encode-coding-region start end coding-system))))
+ (defun mm-decode-coding-string (str coding-system) str)
+ (defun mm-encode-coding-string (str coding-system) str)
+ (defalias 'mm-decode-coding-region 'ignore)
+ (defalias 'mm-encode-coding-region 'ignore))
+ (defalias 'mm-decode-coding-string 'decode-coding-string)
+ (defalias 'mm-encode-coding-string 'encode-coding-string)
+ (defalias 'mm-decode-coding-region 'decode-coding-region)
+ (defalias 'mm-encode-coding-region 'encode-coding-region)))
+
+(defalias 'mm-string-to-multibyte
+ (cond
+ ((featurep 'xemacs)
+ 'identity)
+ ((fboundp 'string-to-multibyte)
+ 'string-to-multibyte)
+ (t
+ (lambda (string)
+ "Return a multibyte string with the same individual chars as string."
+ (mapconcat
+ (lambda (ch) (mm-string-as-multibyte (char-to-string ch)))
+ string "")))))
+
(eval-and-compile
(defalias 'mm-char-or-char-int-p
(cond
the alias. Else windows-NUMBER is used."
(interactive
(let ((completion-ignore-case t)
- (candidates (cp-supported-codepages)))
+ (candidates (if (fboundp 'cp-supported-codepages)
+ (cp-supported-codepages)
+ ;; Removed in Emacs 23 (unicode), sosignal an error:
+ (error "`codepage-setup' is obsolete in this Emacs version."))))
(list (completing-read "Setup DOS Codepage: (default 437) " candidates
nil t nil nil "437"))))
(when alias
`(
;; Not in XEmacs, but it's not a proper MIME charset anyhow.
,@(unless (mm-coding-system-p 'x-ctext)
- '((x-ctext . ctext)))
- ;; ISO-8859-15 is very similar to ISO-8859-1. But it's _different_!
+ '((x-ctext . ctext)))
+ ;; ISO-8859-15 is very similar to ISO-8859-1. But it's _different_ in 8
+ ;; positions!
,@(unless (mm-coding-system-p 'iso-8859-15)
- '((iso-8859-15 . iso-8859-1)))
+ '((iso-8859-15 . iso-8859-1)))
;; BIG-5HKSCS is similar to, but different than, BIG-5.
,@(unless (mm-coding-system-p 'big5-hkscs)
'((big5-hkscs . big5)))
- ;; Windows-1252 is actually a superset of Latin-1. See also
- ;; `gnus-article-dumbquotes-map'.
- ,@(unless (mm-coding-system-p 'windows-1252)
- (if (mm-coding-system-p 'cp1252)
- '((windows-1252 . cp1252))
- '((windows-1252 . iso-8859-1))))
- ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft
- ;; Outlook users in Czech republic. Use this to allow reading of their
- ;; e-mails. cp1250 should be defined by M-x codepage-setup.
- ,@(if (and (not (mm-coding-system-p 'windows-1250))
- (mm-coding-system-p 'cp1250))
- '((windows-1250 . cp1250)))
;; A Microsoft misunderstanding.
- ,@(if (and (not (mm-coding-system-p 'unicode))
- (mm-coding-system-p 'utf-16-le))
- '((unicode . utf-16-le)))
+ ,@(when (and (not (mm-coding-system-p 'unicode))
+ (mm-coding-system-p 'utf-16-le))
+ '((unicode . utf-16-le)))
;; A Microsoft misunderstanding.
,@(unless (mm-coding-system-p 'ks_c_5601-1987)
(if (mm-coding-system-p 'cp949)
'((ks_c_5601-1987 . cp949))
'((ks_c_5601-1987 . euc-kr))))
+ ;; Windows-31J is Windows Codepage 932.
+ ,@(when (and (not (mm-coding-system-p 'windows-31j))
+ (mm-coding-system-p 'cp932))
+ '((windows-31j . cp932)))
+ ;; Charset name: GBK, Charset aliases: CP936, MS936, windows-936
+ ;; http://www.iana.org/assignments/charset-reg/GBK
+ ;; Emacs 22.1 has cp936, but not gbk, so we alias it:
+ ,@(when (and (not (mm-coding-system-p 'gbk))
+ (mm-coding-system-p 'cp936))
+ '((gbk . cp936)))
+ ;; ISO8859-1 is a bogus name for ISO-8859-1
+ ,@(when (and (not (mm-coding-system-p 'iso8859-1))
+ (mm-coding-system-p 'iso-8859-1))
+ '((iso8859-1 . iso-8859-1)))
)
- "A mapping from unknown or invalid charset names to the real charset names.")
+ "A mapping from unknown or invalid charset names to the real charset names.
+
+See `mm-codepage-iso-8859-list' and `mm-codepage-ibm-list'.")
+
+(defcustom mm-codepage-iso-8859-list
+ (list 1250 ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft
+ ;; Outlook users in Czech republic. Use this to allow reading of
+ ;; their e-mails. cp1250 should be defined by M-x codepage-setup
+ ;; (Emacs 21).
+ '(1252 . 1) ;; Windows-1252 is a superset of iso-8859-1 (West
+ ;; Europe). See also `gnus-article-dumbquotes-map'.
+ '(1254 . 9) ;; Windows-1254 is a superset of iso-8859-9 (Turkish).
+ '(1255 . 8));; Windows-1255 is a superset of iso-8859-8 (Hebrew).
+ "A list of Windows codepage numbers and iso-8859 charset numbers.
+
+If an element is a number corresponding to a supported windows
+codepage, appropriate entries to `mm-charset-synonym-alist' are
+added by `mm-setup-codepage-iso-8859'. An element may also be a
+cons cell where the car is a codepage number and the cdr is the
+corresponding number of an iso-8859 charset."
+ :type '(list (set :inline t
+ (const 1250 :tag "Central and East European")
+ (const (1252 . 1) :tag "West European")
+ (const (1254 . 9) :tag "Turkish")
+ (const (1255 . 8) :tag "Hebrew"))
+ (repeat :inline t
+ :tag "Other options"
+ (choice
+ (integer :tag "Windows codepage number")
+ (cons (integer :tag "Windows codepage number")
+ (integer :tag "iso-8859 charset number")))))
+ :version "22.1" ;; Gnus 5.10.9
+ :group 'mime)
+
+(defcustom mm-codepage-ibm-list
+ (list 437 ;; (US etc.)
+ 860 ;; (Portugal)
+ 861 ;; (Iceland)
+ 862 ;; (Israel)
+ 863 ;; (Canadian French)
+ 865 ;; (Nordic)
+ 852 ;;
+ 850 ;; (Latin 1)
+ 855 ;; (Cyrillic)
+ 866 ;; (Cyrillic - Russian)
+ 857 ;; (Turkish)
+ 864 ;; (Arabic)
+ 869 ;; (Greek)
+ 874);; (Thai)
+ ;; In Emacs 23 (unicode), cp... and ibm... are aliases.
+ ;; Cf. http://thread.gmane.org/v9lkng5nwy.fsf@marauder.physik.uni-ulm.de
+ "List of IBM codepage numbers.
+
+The codepage mappings slighly differ between IBM and other vendors.
+See \"ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/IBM/README.TXT\".
+
+If an element is a number corresponding to a supported windows
+codepage, appropriate entries to `mm-charset-synonym-alist' are
+added by `mm-setup-codepage-ibm'."
+ :type '(list (set :inline t
+ (const 437 :tag "US etc.")
+ (const 860 :tag "Portugal")
+ (const 861 :tag "Iceland")
+ (const 862 :tag "Israel")
+ (const 863 :tag "Canadian French")
+ (const 865 :tag "Nordic")
+ (const 852)
+ (const 850 :tag "Latin 1")
+ (const 855 :tag "Cyrillic")
+ (const 866 :tag "Cyrillic - Russian")
+ (const 857 :tag "Turkish")
+ (const 864 :tag "Arabic")
+ (const 869 :tag "Greek")
+ (const 874 :tag "Thai"))
+ (repeat :inline t
+ :tag "Other options"
+ (integer :tag "Codepage number")))
+ :version "22.1" ;; Gnus 5.10.9
+ :group 'mime)
+
+(defun mm-setup-codepage-iso-8859 (&optional list)
+ "Add appropriate entries to `mm-charset-synonym-alist'.
+Unless LIST is given, `mm-codepage-iso-8859-list' is used."
+ (unless list
+ (setq list mm-codepage-iso-8859-list))
+ (dolist (i list)
+ (let (cp windows iso)
+ (if (consp i)
+ (setq cp (intern (format "cp%d" (car i)))
+ windows (intern (format "windows-%d" (car i)))
+ iso (intern (format "iso-8859-%d" (cdr i))))
+ (setq cp (intern (format "cp%d" i))
+ windows (intern (format "windows-%d" i))))
+ (unless (mm-coding-system-p windows)
+ (if (mm-coding-system-p cp)
+ (add-to-list 'mm-charset-synonym-alist (cons windows cp))
+ (add-to-list 'mm-charset-synonym-alist (cons windows iso)))))))
+
+(defun mm-setup-codepage-ibm (&optional list)
+ "Add appropriate entries to `mm-charset-synonym-alist'.
+Unless LIST is given, `mm-codepage-ibm-list' is used."
+ (unless list
+ (setq list mm-codepage-ibm-list))
+ (dolist (number list)
+ (let ((ibm (intern (format "ibm%d" number)))
+ (cp (intern (format "cp%d" number))))
+ (when (and (not (mm-coding-system-p ibm))
+ (mm-coding-system-p cp))
+ (add-to-list 'mm-charset-synonym-alist (cons ibm cp))))))
+
+;; Initialize:
+(mm-setup-codepage-iso-8859)
+(mm-setup-codepage-ibm)
(defcustom mm-charset-override-alist
- `((iso-8859-1 . windows-1252))
+ '((iso-8859-1 . windows-1252)
+ (iso-8859-8 . windows-1255)
+ (iso-8859-9 . windows-1254))
"A mapping from undesired charset names to their replacement.
You may add pairs like (iso-8859-1 . windows-1252) here,
superset of iso-8859-1."
:type '(list (set :inline t
(const (iso-8859-1 . windows-1252))
+ (const (iso-8859-8 . windows-1255))
+ (const (iso-8859-9 . windows-1254))
(const (undecided . windows-1252)))
(repeat :inline t
:tag "Other options"
(cons (symbol :tag "From charset")
(symbol :tag "To charset"))))
- :version "23.0" ;; No Gnus
+ :version "22.1" ;; Gnus 5.10.9
:group 'mime)
(defcustom mm-charset-eval-alist
evaluated. This allows to load additional libraries providing
charsets on demand. If supported by your Emacs version, you
could use `autoload-coding-system' here."
- :version "23.0" ;; No Gnus
+ :version "22.1" ;; Gnus 5.10.9
:type '(list (set :inline t
(const (windows-1250 . (mm-codepage-setup 1250 t)))
(const (windows-1251 . (mm-codepage-setup 1251 t)))
(cons (symbol :tag "charset")
(symbol :tag "form"))))
:group 'mime)
+(put 'mm-charset-eval-alist 'risky-local-variable t)
(defvar mm-binary-coding-system
(cond
(iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978)
(euc-kr korean-ksc5601)
(gb2312 chinese-gb2312)
+ (gbk chinese-gbk)
+ (gb18030 gb18030-2-byte
+ gb18030-4-byte-bmp gb18030-4-byte-smp
+ gb18030-4-byte-ext-1 gb18030-4-byte-ext-2)
(big5 chinese-big5-1 chinese-big5-2)
(tibetan tibetan)
(thai-tis620 thai-tis620)
(iso-2022-jp-3 latin-jisx0201 japanese-jisx0208-1978 japanese-jisx0208
japanese-jisx0213-1 japanese-jisx0213-2)
(shift_jis latin-jisx0201 katakana-jisx0201 japanese-jisx0208)
- ,(if (or (not (fboundp 'charsetp)) ;; non-Mule case
- (charsetp 'unicode-a)
- (not (mm-coding-system-p 'mule-utf-8)))
- '(utf-8 unicode-a unicode-b unicode-c unicode-d unicode-e)
- ;; If we have utf-8 we're in Mule 5+.
- (append '(utf-8)
- (delete 'ascii
- (coding-system-get 'mule-utf-8 'safe-charsets)))))
+ ,(cond ((fboundp 'unicode-precedence-list)
+ (cons 'utf-8 (delq 'ascii (mapcar 'charset-name
+ (unicode-precedence-list)))))
+ ((or (not (fboundp 'charsetp)) ;; non-Mule case
+ (charsetp 'unicode-a)
+ (not (mm-coding-system-p 'mule-utf-8)))
+ '(utf-8 unicode-a unicode-b unicode-c unicode-d unicode-e))
+ (t ;; If we have utf-8 we're in Mule 5+.
+ (append '(utf-8)
+ (delete 'ascii
+ (coding-system-get 'mule-utf-8 'safe-charsets))))))
"Alist of MIME-charset/MULE-charsets.")
(defun mm-enrich-utf-8-by-mule-ucs ()
This function will run when the `un-define' module is loaded under
XEmacs, and fill the `utf-8' entry in `mm-mime-mule-charset-alist'
with Mule charsets. It is completely useless for Emacs."
- (unless (cdr (delete '(mm-enrich-utf-8-by-mule-ucs)
- (assoc "un-define" after-load-alist)))
- (setq after-load-alist
- (delete '("un-define") after-load-alist)))
(when (boundp 'unicode-basic-translation-charset-order-list)
(condition-case nil
(let ((val (delq
cs mime mule alist)
(while css
(setq cs (pop css)
- mime (or (coding-system-get cs :mime-charset) ; Emacs 23 (unicode)
+ mime (or (coding-system-get cs :mime-charset); Emacs 23 (unicode)
(coding-system-get cs 'mime-charset)))
(when (and mime
(not (eq t (setq mule
If ALLOW-OVERRIDE is given, use `mm-charset-override-alist' to
map undesired charset names to their replacement. This should
only be used for decoding, not for encoding."
- ;; OVERRIDE is used (only) in `mm-decode-body'.
+ ;; OVERRIDE is used (only) in `mm-decode-body' and `mm-decode-string'.
(when (stringp charset)
(setq charset (intern (downcase charset))))
(when lbt
(autoload 'latin-unity-massage-name "latin-unity")
(autoload 'latin-unity-maybe-remap "latin-unity")
(autoload 'latin-unity-representations-feasible-region "latin-unity")
- (autoload 'latin-unity-representations-present-region "latin-unity")
- (defvar latin-unity-coding-systems)
- (defvar latin-unity-ucs-list))
+ (autoload 'latin-unity-representations-present-region "latin-unity"))
+
+(defvar latin-unity-coding-systems)
+(defvar latin-unity-ucs-list)
(defun mm-xemacs-find-mime-charset-1 (begin end)
"Determine which MIME charset to use to send region as message.
(when (featurep 'xemacs)
`(and (featurep 'mule) (mm-xemacs-find-mime-charset-1 ,begin ,end))))
+(declare-function mm-delete-duplicates "mm-util" (list))
+
(defun mm-find-mime-charset-region (b e &optional hack-charsets)
"Return the MIME charsets needed to encode the region between B and E.
nil means ASCII, a single-element list represents an appropriate MIME
;; Otherwise, we'll get nil, and the next setq will get invoked.
(setq charsets (mm-xemacs-find-mime-charset b e))
+ ;; Fixme: won't work for unibyte Emacs 23:
+
;; We're not multibyte, or a single coding system won't cover it.
(setq charsets
(mm-delete-duplicates
(memq 'iso-8859-15 charsets)
(memq 'iso-8859-15 hack-charsets)
(save-excursion (mm-iso-8859-x-to-15-region b e)))
- (mapcar (lambda (x) (setq charsets (delq (car x) charsets)))
- mm-iso-8859-15-compatible))
+ (dolist (x mm-iso-8859-15-compatible)
+ (setq charsets (delq (car x) charsets))))
(if (and (memq 'iso-2022-jp-2 charsets)
(memq 'iso-2022-jp-2 hack-charsets))
(setq charsets (delq 'iso-2022-jp charsets)))
(defmacro mm-with-unibyte-buffer (&rest forms)
"Create a temporary buffer, and evaluate FORMS there like `progn'.
Use unibyte mode for this."
- `(let (default-enable-multibyte-characters)
- (with-temp-buffer ,@forms)))
+ `(with-temp-buffer
+ (mm-disable-multibyte)
+ ,@forms))
(put 'mm-with-unibyte-buffer 'lisp-indent-function 0)
(put 'mm-with-unibyte-buffer 'edebug-form-spec '(body))
(defmacro mm-with-multibyte-buffer (&rest forms)
"Create a temporary buffer, and evaluate FORMS there like `progn'.
Use multibyte mode for this."
- `(let ((default-enable-multibyte-characters t))
- (with-temp-buffer ,@forms)))
+ `(with-temp-buffer
+ (mm-enable-multibyte)
+ ,@forms))
(put 'mm-with-multibyte-buffer 'lisp-indent-function 0)
(put 'mm-with-multibyte-buffer 'edebug-form-spec '(body))
;; Remove composition since the base charsets have been included.
;; Remove eight-bit-*, treat them as ascii.
(let ((css (find-charset-region b e)))
- (mapcar (lambda (cs) (setq css (delq cs css)))
- '(composition eight-bit-control eight-bit-graphic
- control-1))
- css))
+ (dolist (cs
+ '(composition eight-bit-control eight-bit-graphic control-1)
+ css)
+ (setq css (delq cs css)))))
(t
;; We are in a unibyte buffer or XEmacs non-mule, so we futz around a bit.
(save-excursion
(nreverse out)))
(defvar mm-inhibit-file-name-handlers
- '(jka-compr-handler image-file-handler)
+ '(jka-compr-handler image-file-handler epa-file-handler)
"A list of handlers doing (un)compression (etc) thingies.")
(defun mm-insert-file-contents (filename &optional visit beg end replace
inhibit-file-name-handlers)))
(write-region start end filename append visit lockname)))
+(autoload 'gmm-write-region "gmm-utils")
+
;; It is not a MIME function, but some MIME functions use it.
(if (and (fboundp 'make-temp-file)
(ignore-errors
(setq file (concat file suffix)))
(if dir-flag
(make-directory file)
- (if (featurep 'xemacs)
- ;; NOTE: This is unsafe if XEmacs users
- ;; don't use a secure temp directory.
- (if (file-exists-p file)
- (signal 'file-already-exists
- (list "File exists" file))
- (write-region "" nil file nil 'silent))
- (write-region "" nil file nil 'silent
- nil 'excl)))
+ ;; NOTE: This is unsafe if Emacs 20
+ ;; users and XEmacs users don't use
+ ;; a secure temp directory.
+ (gmm-write-region "" nil file nil 'silent
+ nil 'excl))
nil)
(file-already-exists t)
;; The XEmacs version of `make-directory' issues
(if (eq (point) end) 'ascii (mm-guess-charset))
(goto-char point)))))
+(declare-function mm-detect-coding-region "mm-util" (start end))
+
(if (fboundp 'coding-system-get)
(defun mm-detect-mime-charset-region (start end)
"Detect MIME charset of the text in the region between START and END."
(funcall (symbol-value 'set-auto-coding-function)
nil (- (point-max) (point-min)))
(error nil)))))
- ((featurep 'file-coding) ;; XEmacs
+ ((and (featurep 'xemacs) (featurep 'file-coding)) ;; XEmacs
(let ((case-fold-search t)
(end (point-at-eol))
codesys start)