1 ;;; mm-util.el --- Utility functions for Mule and low level things
2 ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004
3 ;; Free Software Foundation, Inc.
5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
6 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
7 ;; This file is part of GNU Emacs.
9 ;; GNU Emacs is free software; you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published by
11 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; GNU Emacs is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;; GNU General Public License for more details.
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs; see the file COPYING. If not, write to the
21 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22 ;; Boston, MA 02111-1307, USA.
28 (eval-when-compile (require 'cl))
34 (let ((nfunc (intern (format "mm-%s" (car elem)))))
35 (if (fboundp (car elem))
36 (defalias nfunc (car elem))
37 (defalias nfunc (cdr elem)))))
38 '((decode-coding-string . (lambda (s a) s))
39 (encode-coding-string . (lambda (s a) s))
40 (encode-coding-region . ignore)
41 (coding-system-list . ignore)
42 (decode-coding-region . ignore)
44 (coding-system-equal . equal)
45 (annotationp . ignore)
46 (set-buffer-file-coding-system . ignore)
53 (mapcar (lambda (e) (list (symbol-name (car e))))
54 mm-mime-mule-charset-alist)
57 . (lambda (from to string &optional inplace) ;; stolen (and renamed) from nnheader.el
58 "Replace characters in STRING from FROM to TO.
59 Unless optional argument INPLACE is non-nil, return a new string."
60 (let ((string (if inplace string (copy-sequence string)))
63 ;; Replace all occurrences of FROM with TO.
65 (when (= (aref string idx) from)
69 (string-as-unibyte . identity)
70 (string-make-unibyte . identity)
71 (string-as-multibyte . identity)
72 (multibyte-string-p . ignore)
73 ;; It is not a MIME function, but some MIME functions use it.
74 (make-temp-file . (lambda (prefix &optional dir-flag)
75 (let ((file (expand-file-name
76 (make-temp-name prefix)
77 (if (fboundp 'temp-directory)
79 temporary-file-directory))))
81 (make-directory file))
83 (insert-byte . insert-char)
84 (multibyte-char-to-unibyte . identity))))
87 (defalias 'mm-char-or-char-int-p
89 ((fboundp 'char-or-char-int-p) 'char-or-char-int-p)
90 ((fboundp 'char-valid-p) 'char-valid-p)
93 ;; Fixme: This seems always to be used to read a MIME charset, so it
94 ;; should be re-named and fixed (in Emacs) to offer completion only on
95 ;; proper charset names (base coding systems which have a
96 ;; mime-charset defined). XEmacs doesn't believe in mime-charset;
98 ;; `(or (coding-system-get 'iso-8859-1 'mime-charset)
99 ;; (coding-system-get 'iso-8859-1 :mime-charset))'
100 ;; Actually, there should be an `mm-coding-system-mime-charset'.
102 (defalias 'mm-read-coding-system
104 ((fboundp 'read-coding-system)
105 (if (and (featurep 'xemacs)
106 (<= (string-to-number emacs-version) 21.1))
107 (lambda (prompt &optional default-coding-system)
108 (read-coding-system prompt))
109 'read-coding-system))
110 (t (lambda (prompt &optional default-coding-system)
111 "Prompt the user for a coding system."
113 prompt (mapcar (lambda (s) (list (symbol-name (car s))))
114 mm-mime-mule-charset-alist)))))))
116 (defvar mm-coding-system-list nil)
117 (defun mm-get-coding-system-list ()
118 "Get the coding system list."
119 (or mm-coding-system-list
120 (setq mm-coding-system-list (mm-coding-system-list))))
122 (defun mm-coding-system-p (cs)
123 "Return non-nil if CS is a symbol naming a coding system.
124 In XEmacs, also return non-nil if CS is a coding system object."
125 (if (fboundp 'find-coding-system)
126 (find-coding-system cs)
127 (if (fboundp 'coding-system-p)
129 ;; Is this branch ever actually useful?
130 (memq cs (mm-get-coding-system-list)))))
132 (defvar mm-charset-synonym-alist
134 ;; Not in XEmacs, but it's not a proper MIME charset anyhow.
135 ,@(unless (mm-coding-system-p 'x-ctext)
136 '((x-ctext . ctext)))
137 ;; ISO-8859-15 is very similar to ISO-8859-1. But it's _different_!
138 ,@(unless (mm-coding-system-p 'iso-8859-15)
139 '((iso-8859-15 . iso-8859-1)))
140 ;; BIG-5HKSCS is similar to, but different than, BIG-5.
141 ,@(unless (mm-coding-system-p 'big5-hkscs)
142 '((big5-hkscs . big5)))
143 ;; Windows-1252 is actually a superset of Latin-1. See also
144 ;; `gnus-article-dumbquotes-map'.
145 ,@(unless (mm-coding-system-p 'windows-1252)
146 (if (mm-coding-system-p 'cp1252)
147 '((windows-1252 . cp1252))
148 '((windows-1252 . iso-8859-1))))
149 ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft
150 ;; Outlook users in Czech republic. Use this to allow reading of their
151 ;; e-mails. cp1250 should be defined by M-x codepage-setup.
152 ,@(if (and (not (mm-coding-system-p 'windows-1250))
153 (mm-coding-system-p 'cp1250))
154 '((windows-1250 . cp1250)))
155 ;; A Microsoft misunderstanding.
156 ,@(unless (mm-coding-system-p 'ks_c_5601-1987)
157 (if (mm-coding-system-p 'cp949)
158 '((ks_c_5601-1987 . cp949))
159 '((ks_c_5601-1987 . euc-kr))))
161 "A mapping from invalid charset names to the real charset names.")
163 (defvar mm-binary-coding-system
165 ((mm-coding-system-p 'binary) 'binary)
166 ((mm-coding-system-p 'no-conversion) 'no-conversion)
168 "100% binary coding system.")
170 (defvar mm-text-coding-system
171 (or (if (memq system-type '(windows-nt ms-dos ms-windows))
172 (and (mm-coding-system-p 'raw-text-dos) 'raw-text-dos)
173 (and (mm-coding-system-p 'raw-text) 'raw-text))
174 mm-binary-coding-system)
175 "Text-safe coding system (For removing ^M).")
177 (defvar mm-text-coding-system-for-write nil
178 "Text coding system for write.")
180 (defvar mm-auto-save-coding-system
182 ((mm-coding-system-p 'utf-8-emacs) ; Mule 7
183 (if (memq system-type '(windows-nt ms-dos ms-windows))
184 (if (mm-coding-system-p 'utf-8-emacs-dos)
185 'utf-8-emacs-dos mm-binary-coding-system)
187 ((mm-coding-system-p 'emacs-mule)
188 (if (memq system-type '(windows-nt ms-dos ms-windows))
189 (if (mm-coding-system-p 'emacs-mule-dos)
190 'emacs-mule-dos mm-binary-coding-system)
192 ((mm-coding-system-p 'escape-quoted) 'escape-quoted)
193 (t mm-binary-coding-system))
194 "Coding system of auto save file.")
196 (defvar mm-universal-coding-system mm-auto-save-coding-system
197 "The universal coding system.")
199 ;; Fixme: some of the cars here aren't valid MIME charsets. That
200 ;; should only matter with XEmacs, though.
201 (defvar mm-mime-mule-charset-alist
203 (iso-8859-1 latin-iso8859-1)
204 (iso-8859-2 latin-iso8859-2)
205 (iso-8859-3 latin-iso8859-3)
206 (iso-8859-4 latin-iso8859-4)
207 (iso-8859-5 cyrillic-iso8859-5)
208 ;; Non-mule (X)Emacs uses the last mule-charset for 8bit characters.
209 ;; The fake mule-charset, gnus-koi8-r, tells Gnus that the default
210 ;; charset is koi8-r, not iso-8859-5.
211 (koi8-r cyrillic-iso8859-5 gnus-koi8-r)
212 (iso-8859-6 arabic-iso8859-6)
213 (iso-8859-7 greek-iso8859-7)
214 (iso-8859-8 hebrew-iso8859-8)
215 (iso-8859-9 latin-iso8859-9)
216 (iso-8859-14 latin-iso8859-14)
217 (iso-8859-15 latin-iso8859-15)
218 (viscii vietnamese-viscii-lower)
219 (iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978)
220 (euc-kr korean-ksc5601)
221 (gb2312 chinese-gb2312)
222 (big5 chinese-big5-1 chinese-big5-2)
224 (thai-tis620 thai-tis620)
225 (iso-2022-7bit ethiopic arabic-1-column arabic-2-column)
226 (iso-2022-jp-2 latin-iso8859-1 greek-iso8859-7
227 latin-jisx0201 japanese-jisx0208-1978
228 chinese-gb2312 japanese-jisx0208
229 korean-ksc5601 japanese-jisx0212
231 (iso-2022-int-1 latin-iso8859-1 greek-iso8859-7
232 latin-jisx0201 japanese-jisx0208-1978
233 chinese-gb2312 japanese-jisx0208
234 korean-ksc5601 japanese-jisx0212
235 chinese-cns11643-1 chinese-cns11643-2)
236 (iso-2022-int-1 latin-iso8859-1 latin-iso8859-2
237 cyrillic-iso8859-5 greek-iso8859-7
238 latin-jisx0201 japanese-jisx0208-1978
239 chinese-gb2312 japanese-jisx0208
240 korean-ksc5601 japanese-jisx0212
241 chinese-cns11643-1 chinese-cns11643-2
242 chinese-cns11643-3 chinese-cns11643-4
243 chinese-cns11643-5 chinese-cns11643-6
245 ,(if (or (not (fboundp 'charsetp)) ;; non-Mule case
246 (charsetp 'unicode-a)
247 (not (mm-coding-system-p 'mule-utf-8)))
248 '(utf-8 unicode-a unicode-b unicode-c unicode-d unicode-e)
249 ;; If we have utf-8 we're in Mule 5+.
252 (coding-system-get 'mule-utf-8 'safe-charsets)))))
253 "Alist of MIME-charset/MULE-charsets.")
255 ;; Correct by construction, but should be unnecessary:
257 (when (and (not (featurep 'xemacs))
258 (fboundp 'coding-system-list)
259 (fboundp 'sort-coding-systems))
260 (setq mm-mime-mule-charset-alist
265 (when (and (or (coding-system-get cs :mime-charset) ; Emacs 22
266 (coding-system-get cs 'mime-charset))
267 (not (eq t (coding-system-get cs 'safe-charsets))))
268 (list (cons (or (coding-system-get cs :mime-charset)
269 (coding-system-get cs 'mime-charset))
271 (coding-system-get cs 'safe-charsets))))))
272 (sort-coding-systems (coding-system-list 'base-only))))))
274 (defvar mm-hack-charsets '(iso-8859-15 iso-2022-jp-2)
275 "A list of special charsets.
276 Valid elements include:
277 `iso-8859-15' convert ISO-8859-1, -9 to ISO-8859-15 if ISO-8859-15 exists.
278 `iso-2022-jp-2' convert ISO-2022-jp to ISO-2022-jp-2 if ISO-2022-jp-2 exists."
281 (defvar mm-iso-8859-15-compatible
282 '((iso-8859-1 "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE")
283 (iso-8859-9 "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE\xD0\xDD\xDE\xF0\xFD\xFE"))
284 "ISO-8859-15 exchangeable coding systems and inconvertible characters.")
286 (defvar mm-iso-8859-x-to-15-table
287 (and (fboundp 'coding-system-p)
288 (mm-coding-system-p 'iso-8859-15)
291 (if (mm-coding-system-p (car cs))
292 (let ((c (string-to-char
293 (decode-coding-string "\341" (car cs)))))
294 (cons (char-charset c)
297 (decode-coding-string "\341" 'iso-8859-15)) c)
298 (string-to-list (decode-coding-string (car (cdr cs))
301 mm-iso-8859-15-compatible))
302 "A table of the difference character between ISO-8859-X and ISO-8859-15.")
304 (defcustom mm-coding-system-priorities
305 (if (boundp 'current-language-environment)
306 (let ((lang (symbol-value 'current-language-environment)))
307 (cond ((string= lang "Japanese")
308 ;; Japanese users may prefer iso-2022-jp to shift-jis.
309 '(iso-2022-jp iso-2022-jp-2 japanese-shift-jis
310 iso-latin-1 utf-8)))))
311 "Preferred coding systems for encoding outgoing messages.
313 More than one suitable coding system may be found for some text.
314 By default, the coding system with the highest priority is used
315 to encode outgoing messages (see `sort-coding-systems'). If this
316 variable is set, it overrides the default priority."
317 :type '(repeat (symbol :tag "Coding system"))
321 (defvar mm-use-find-coding-systems-region
322 (fboundp 'find-coding-systems-region)
323 "Use `find-coding-systems-region' to find proper coding systems.
325 Setting it to nil is useful on Emacsen supporting Unicode if sending
326 mail with multiple parts is preferred to sending a Unicode one.")
328 ;;; Internal variables:
332 (defun mm-mule-charset-to-mime-charset (charset)
333 "Return the MIME charset corresponding to the given Mule CHARSET."
334 (if (and (fboundp 'find-coding-systems-for-charsets)
335 (fboundp 'sort-coding-systems))
337 (dolist (cs (sort-coding-systems
339 (find-coding-systems-for-charsets (list charset)))))
342 (setq mime (or (coding-system-get cs :mime-charset)
343 (coding-system-get cs 'mime-charset))))))
345 (let ((alist mm-mime-mule-charset-alist)
348 (when (memq charset (cdar alist))
349 (setq out (caar alist)
354 (defun mm-charset-to-coding-system (charset &optional lbt)
355 "Return coding-system corresponding to CHARSET.
356 CHARSET is a symbol naming a MIME charset.
357 If optional argument LBT (`unix', `dos' or `mac') is specified, it is
358 used as the line break code type of the coding system."
359 (when (stringp charset)
360 (setq charset (intern (downcase charset))))
362 (setq charset (intern (format "%s-%s" charset lbt))))
366 ;; Running in a non-MULE environment.
367 ((or (null (mm-get-coding-system-list))
368 (not (fboundp 'coding-system-get)))
371 ((eq charset 'us-ascii)
373 ;; Check to see whether we can handle this charset. (This depends
374 ;; on there being some coding system matching each `mime-charset'
375 ;; property defined, as there should be.)
376 ((and (mm-coding-system-p charset)
377 ;;; Doing this would potentially weed out incorrect charsets.
379 ;;; (eq charset (coding-system-get charset 'mime-charset))
382 ;; Translate invalid charsets.
383 ((let ((cs (cdr (assq charset mm-charset-synonym-alist))))
384 (and cs (mm-coding-system-p cs) cs)))
385 ;; Last resort: search the coding system list for entries which
386 ;; have the right mime-charset in case the canonical name isn't
387 ;; defined (though it should be).
389 ;; mm-get-coding-system-list returns a list of cs without lbt.
391 (dolist (c (mm-get-coding-system-list))
393 (eq charset (or (coding-system-get c :mime-charset)
394 (coding-system-get c 'mime-charset))))
399 (defvar mm-emacs-mule (and (not (featurep 'xemacs))
400 (boundp 'default-enable-m