X-Git-Url: http://cgit.sxemacs.org/?a=blobdiff_plain;f=lisp%2Fmm-util.el;h=7a944bbc1d8fc1c92b6fc32c8e378eb4faefb89a;hb=6d3039252bb175eba53a2028cbf3c0e90112388d;hp=6c4d2e41b6fd4d1f850a1f6137cfc8facfcda502;hpb=d12343929c3e033a5bed3d2284c20d3a19dfe18a;p=gnus diff --git a/lisp/mm-util.el b/lisp/mm-util.el index 6c4d2e41b..7a944bbc1 100644 --- a/lisp/mm-util.el +++ b/lisp/mm-util.el @@ -1,7 +1,7 @@ ;;; mm-util.el --- Utility functions for Mule and low level things ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, -;; 2005, 2006 Free Software Foundation, Inc. +;; 2005, 2006, 2007, 2008 Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen ;; MORIOKA Tomohiko @@ -9,7 +9,7 @@ ;; GNU Emacs is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) +;; the Free Software Foundation; either version 3, or (at your option) ;; any later version. ;; GNU Emacs is distributed in the hope that it will be useful, @@ -26,6 +26,10 @@ ;;; Code: +;; For Emacs < 22.2. +(eval-and-compile + (unless (fboundp 'declare-function) (defmacro declare-function (&rest r)))) + (eval-when-compile (require 'cl)) (require 'mail-prsvr) @@ -36,18 +40,16 @@ (require 'timer)) (require 'timer))) +(defvar mm-mime-mule-charset-alist ) + (eval-and-compile - (mapcar + (mapc (lambda (elem) (let ((nfunc (intern (format "mm-%s" (car elem))))) (if (fboundp (car elem)) (defalias nfunc (car elem)) (defalias nfunc (cdr elem))))) - '((decode-coding-string . (lambda (s a) s)) - (encode-coding-string . (lambda (s a) s)) - (encode-coding-region . ignore) - (coding-system-list . ignore) - (decode-coding-region . ignore) + '((coding-system-list . ignore) (char-int . identity) (coding-system-equal . equal) (annotationp . ignore) @@ -100,15 +102,10 @@ ;; (string-to-multibyte s) ~= (decode-coding-string s 'binary) ;; (string-make-multibyte s) ~= (decode-coding-string s locale-coding-system) (string-as-multibyte . identity) - (string-to-multibyte - . (lambda (string) - "Return a multibyte string with the same individual chars as string." - (mapconcat - (lambda (ch) (mm-string-as-multibyte (char-to-string ch))) - string ""))) (multibyte-string-p . ignore) (insert-byte . insert-char) (multibyte-char-to-unibyte . identity) + (set-buffer-multibyte . ignore) (special-display-p . (lambda (buffer-name) "Returns non-nil if a buffer named BUFFER-NAME gets a special frame." @@ -125,6 +122,47 @@ (string-match (car elem) buffer-name) (throw 'return (cdr elem)))))))))))) +(eval-and-compile + (if (featurep 'xemacs) + (if (featurep 'file-coding) + ;; Don't modify string if CODING-SYSTEM is nil. + (progn + (defun mm-decode-coding-string (str coding-system) + (if coding-system + (decode-coding-string str coding-system) + str)) + (defun mm-encode-coding-string (str coding-system) + (if coding-system + (encode-coding-string str coding-system) + str)) + (defun mm-decode-coding-region (start end coding-system) + (if coding-system + (decode-coding-region start end coding-system))) + (defun mm-encode-coding-region (start end coding-system) + (if coding-system + (encode-coding-region start end coding-system)))) + (defun mm-decode-coding-string (str coding-system) str) + (defun mm-encode-coding-string (str coding-system) str) + (defalias 'mm-decode-coding-region 'ignore) + (defalias 'mm-encode-coding-region 'ignore)) + (defalias 'mm-decode-coding-string 'decode-coding-string) + (defalias 'mm-encode-coding-string 'encode-coding-string) + (defalias 'mm-decode-coding-region 'decode-coding-region) + (defalias 'mm-encode-coding-region 'encode-coding-region))) + +(defalias 'mm-string-to-multibyte + (cond + ((featurep 'xemacs) + 'identity) + ((fboundp 'string-to-multibyte) + 'string-to-multibyte) + (t + (lambda (string) + "Return a multibyte string with the same individual chars as string." + (mapconcat + (lambda (ch) (mm-string-as-multibyte (char-to-string ch))) + string ""))))) + (eval-and-compile (defalias 'mm-char-or-char-int-p (cond @@ -182,7 +220,10 @@ non-nil, an alias is created and added to the alias. Else windows-NUMBER is used." (interactive (let ((completion-ignore-case t) - (candidates (cp-supported-codepages))) + (candidates (if (fboundp 'cp-supported-codepages) + (cp-supported-codepages) + ;; Removed in Emacs 23 (unicode), sosignal an error: + (error "`codepage-setup' is obsolete in this Emacs version.")))) (list (completing-read "Setup DOS Codepage: (default 437) " candidates nil t nil nil "437")))) (when alias @@ -202,49 +243,150 @@ the alias. Else windows-NUMBER is used." ;; Not in XEmacs, but it's not a proper MIME charset anyhow. ,@(unless (mm-coding-system-p 'x-ctext) '((x-ctext . ctext))) - ;; ISO-8859-15 is very similar to ISO-8859-1. But it's _different_! + ;; ISO-8859-15 is very similar to ISO-8859-1. But it's _different_ in 8 + ;; positions! ,@(unless (mm-coding-system-p 'iso-8859-15) '((iso-8859-15 . iso-8859-1))) ;; BIG-5HKSCS is similar to, but different than, BIG-5. ,@(unless (mm-coding-system-p 'big5-hkscs) '((big5-hkscs . big5))) - ;; Windows-1252 is actually a superset of iso-8859-1 (West Europe). See - ;; also `gnus-article-dumbquotes-map'. - ,@(unless (mm-coding-system-p 'windows-1252) - (if (mm-coding-system-p 'cp1252) - '((windows-1252 . cp1252)) - '((windows-1252 . iso-8859-1)))) - ;; Windows-1254 is a superset of iso-8859-9 (Turkish). - ,@(unless (mm-coding-system-p 'windows-1254) - (if (mm-coding-system-p 'cp1254) - '((windows-1254 . cp1254)) - '((windows-1254 . iso-8859-9)))) - ;; Windows-1255 is a superset of iso-8859-8 (Hebrew). - ,@(unless (mm-coding-system-p 'windows-1255) - (if (mm-coding-system-p 'cp1255) - '((windows-1255 . cp1255)) - '((windows-1255 . iso-8859-8)))) - ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft - ;; Outlook users in Czech republic. Use this to allow reading of their - ;; e-mails. cp1250 should be defined by M-x codepage-setup. - ,@(if (and (not (mm-coding-system-p 'windows-1250)) - (mm-coding-system-p 'cp1250)) - '((windows-1250 . cp1250))) ;; A Microsoft misunderstanding. - ,@(if (and (not (mm-coding-system-p 'unicode)) - (mm-coding-system-p 'utf-16-le)) - '((unicode . utf-16-le))) + ,@(when (and (not (mm-coding-system-p 'unicode)) + (mm-coding-system-p 'utf-16-le)) + '((unicode . utf-16-le))) ;; A Microsoft misunderstanding. ,@(unless (mm-coding-system-p 'ks_c_5601-1987) (if (mm-coding-system-p 'cp949) '((ks_c_5601-1987 . cp949)) '((ks_c_5601-1987 . euc-kr)))) ;; Windows-31J is Windows Codepage 932. - ,@(if (and (not (mm-coding-system-p 'windows-31j)) - (mm-coding-system-p 'cp932)) - '((windows-31j . cp932))) + ,@(when (and (not (mm-coding-system-p 'windows-31j)) + (mm-coding-system-p 'cp932)) + '((windows-31j . cp932))) + ;; Charset name: GBK, Charset aliases: CP936, MS936, windows-936 + ;; http://www.iana.org/assignments/charset-reg/GBK + ;; Emacs 22.1 has cp936, but not gbk, so we alias it: + ,@(when (and (not (mm-coding-system-p 'gbk)) + (mm-coding-system-p 'cp936)) + '((gbk . cp936))) + ;; ISO8859-1 is a bogus name for ISO-8859-1 + ,@(when (and (not (mm-coding-system-p 'iso8859-1)) + (mm-coding-system-p 'iso-8859-1)) + '((iso8859-1 . iso-8859-1))) ) - "A mapping from unknown or invalid charset names to the real charset names.") + "A mapping from unknown or invalid charset names to the real charset names. + +See `mm-codepage-iso-8859-list' and `mm-codepage-ibm-list'.") + +(defcustom mm-codepage-iso-8859-list + (list 1250 ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft + ;; Outlook users in Czech republic. Use this to allow reading of + ;; their e-mails. cp1250 should be defined by M-x codepage-setup + ;; (Emacs 21). + '(1252 . 1) ;; Windows-1252 is a superset of iso-8859-1 (West + ;; Europe). See also `gnus-article-dumbquotes-map'. + '(1254 . 9) ;; Windows-1254 is a superset of iso-8859-9 (Turkish). + '(1255 . 8));; Windows-1255 is a superset of iso-8859-8 (Hebrew). + "A list of Windows codepage numbers and iso-8859 charset numbers. + +If an element is a number corresponding to a supported windows +codepage, appropriate entries to `mm-charset-synonym-alist' are +added by `mm-setup-codepage-iso-8859'. An element may also be a +cons cell where the car is a codepage number and the cdr is the +corresponding number of an iso-8859 charset." + :type '(list (set :inline t + (const 1250 :tag "Central and East European") + (const (1252 . 1) :tag "West European") + (const (1254 . 9) :tag "Turkish") + (const (1255 . 8) :tag "Hebrew")) + (repeat :inline t + :tag "Other options" + (choice + (integer :tag "Windows codepage number") + (cons (integer :tag "Windows codepage number") + (integer :tag "iso-8859 charset number"))))) + :version "22.1" ;; Gnus 5.10.9 + :group 'mime) + +(defcustom mm-codepage-ibm-list + (list 437 ;; (US etc.) + 860 ;; (Portugal) + 861 ;; (Iceland) + 862 ;; (Israel) + 863 ;; (Canadian French) + 865 ;; (Nordic) + 852 ;; + 850 ;; (Latin 1) + 855 ;; (Cyrillic) + 866 ;; (Cyrillic - Russian) + 857 ;; (Turkish) + 864 ;; (Arabic) + 869 ;; (Greek) + 874);; (Thai) + ;; In Emacs 23 (unicode), cp... and ibm... are aliases. + ;; Cf. http://thread.gmane.org/v9lkng5nwy.fsf@marauder.physik.uni-ulm.de + "List of IBM codepage numbers. + +The codepage mappings slighly differ between IBM and other vendors. +See \"ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/IBM/README.TXT\". + +If an element is a number corresponding to a supported windows +codepage, appropriate entries to `mm-charset-synonym-alist' are +added by `mm-setup-codepage-ibm'." + :type '(list (set :inline t + (const 437 :tag "US etc.") + (const 860 :tag "Portugal") + (const 861 :tag "Iceland") + (const 862 :tag "Israel") + (const 863 :tag "Canadian French") + (const 865 :tag "Nordic") + (const 852) + (const 850 :tag "Latin 1") + (const 855 :tag "Cyrillic") + (const 866 :tag "Cyrillic - Russian") + (const 857 :tag "Turkish") + (const 864 :tag "Arabic") + (const 869 :tag "Greek") + (const 874 :tag "Thai")) + (repeat :inline t + :tag "Other options" + (integer :tag "Codepage number"))) + :version "22.1" ;; Gnus 5.10.9 + :group 'mime) + +(defun mm-setup-codepage-iso-8859 (&optional list) + "Add appropriate entries to `mm-charset-synonym-alist'. +Unless LIST is given, `mm-codepage-iso-8859-list' is used." + (unless list + (setq list mm-codepage-iso-8859-list)) + (dolist (i list) + (let (cp windows iso) + (if (consp i) + (setq cp (intern (format "cp%d" (car i))) + windows (intern (format "windows-%d" (car i))) + iso (intern (format "iso-8859-%d" (cdr i)))) + (setq cp (intern (format "cp%d" i)) + windows (intern (format "windows-%d" i)))) + (unless (mm-coding-system-p windows) + (if (mm-coding-system-p cp) + (add-to-list 'mm-charset-synonym-alist (cons windows cp)) + (add-to-list 'mm-charset-synonym-alist (cons windows iso))))))) + +(defun mm-setup-codepage-ibm (&optional list) + "Add appropriate entries to `mm-charset-synonym-alist'. +Unless LIST is given, `mm-codepage-ibm-list' is used." + (unless list + (setq list mm-codepage-ibm-list)) + (dolist (number list) + (let ((ibm (intern (format "ibm%d" number))) + (cp (intern (format "cp%d" number)))) + (when (and (not (mm-coding-system-p ibm)) + (mm-coding-system-p cp)) + (add-to-list 'mm-charset-synonym-alist (cons ibm cp)))))) + +;; Initialize: +(mm-setup-codepage-iso-8859) +(mm-setup-codepage-ibm) (defcustom mm-charset-override-alist '((iso-8859-1 . windows-1252) @@ -293,6 +435,7 @@ could use `autoload-coding-system' here." (cons (symbol :tag "charset") (symbol :tag "form")))) :group 'mime) +(put 'mm-charset-eval-alist 'risky-local-variable t) (defvar mm-binary-coding-system (cond @@ -353,6 +496,10 @@ could use `autoload-coding-system' here." (iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978) (euc-kr korean-ksc5601) (gb2312 chinese-gb2312) + (gbk chinese-gbk) + (gb18030 gb18030-2-byte + gb18030-4-byte-bmp gb18030-4-byte-smp + gb18030-4-byte-ext-1 gb18030-4-byte-ext-2) (big5 chinese-big5-1 chinese-big5-2) (tibetan tibetan) (thai-tis620 thai-tis620) @@ -421,7 +568,7 @@ with Mule charsets. It is completely useless for Emacs." cs mime mule alist) (while css (setq cs (pop css) - mime (or (coding-system-get cs :mime-charset) ; Emacs 23 (unicode) + mime (or (coding-system-get cs :mime-charset); Emacs 23 (unicode) (coding-system-get cs 'mime-charset))) (when (and mime (not (eq t (setq mule @@ -751,9 +898,10 @@ This affects whether coding conversion should be attempted generally." (autoload 'latin-unity-massage-name "latin-unity") (autoload 'latin-unity-maybe-remap "latin-unity") (autoload 'latin-unity-representations-feasible-region "latin-unity") - (autoload 'latin-unity-representations-present-region "latin-unity") - (defvar latin-unity-coding-systems) - (defvar latin-unity-ucs-list)) + (autoload 'latin-unity-representations-present-region "latin-unity")) + +(defvar latin-unity-coding-systems) +(defvar latin-unity-ucs-list) (defun mm-xemacs-find-mime-charset-1 (begin end) "Determine which MIME charset to use to send region as message. @@ -824,6 +972,8 @@ But this is very much a corner case, so don't worry about it." (when (featurep 'xemacs) `(and (featurep 'mule) (mm-xemacs-find-mime-charset-1 ,begin ,end)))) +(declare-function mm-delete-duplicates "mm-util" (list)) + (defun mm-find-mime-charset-region (b e &optional hack-charsets) "Return the MIME charsets needed to encode the region between B and E. nil means ASCII, a single-element list represents an appropriate MIME @@ -870,6 +1020,8 @@ charset, and a longer list means no appropriate charset." ;; Otherwise, we'll get nil, and the next setq will get invoked. (setq charsets (mm-xemacs-find-mime-charset b e)) + ;; Fixme: won't work for unibyte Emacs 23: + ;; We're not multibyte, or a single coding system won't cover it. (setq charsets (mm-delete-duplicates @@ -880,8 +1032,8 @@ charset, and a longer list means no appropriate charset." (memq 'iso-8859-15 charsets) (memq 'iso-8859-15 hack-charsets) (save-excursion (mm-iso-8859-x-to-15-region b e))) - (mapcar (lambda (x) (setq charsets (delq (car x) charsets))) - mm-iso-8859-15-compatible)) + (dolist (x mm-iso-8859-15-compatible) + (setq charsets (delq (car x) charsets)))) (if (and (memq 'iso-2022-jp-2 charsets) (memq 'iso-2022-jp-2 hack-charsets)) (setq charsets (delq 'iso-2022-jp charsets))) @@ -901,16 +1053,18 @@ charset, and a longer list means no appropriate charset." (defmacro mm-with-unibyte-buffer (&rest forms) "Create a temporary buffer, and evaluate FORMS there like `progn'. Use unibyte mode for this." - `(let (default-enable-multibyte-characters) - (with-temp-buffer ,@forms))) + `(with-temp-buffer + (mm-disable-multibyte) + ,@forms)) (put 'mm-with-unibyte-buffer 'lisp-indent-function 0) (put 'mm-with-unibyte-buffer 'edebug-form-spec '(body)) (defmacro mm-with-multibyte-buffer (&rest forms) "Create a temporary buffer, and evaluate FORMS there like `progn'. Use multibyte mode for this." - `(let ((default-enable-multibyte-characters t)) - (with-temp-buffer ,@forms))) + `(with-temp-buffer + (mm-enable-multibyte) + ,@forms)) (put 'mm-with-multibyte-buffer 'lisp-indent-function 0) (put 'mm-with-multibyte-buffer 'edebug-form-spec '(body)) @@ -963,10 +1117,10 @@ Emacs 23 (unicode)." ;; Remove composition since the base charsets have been included. ;; Remove eight-bit-*, treat them as ascii. (let ((css (find-charset-region b e))) - (mapcar (lambda (cs) (setq css (delq cs css))) - '(composition eight-bit-control eight-bit-graphic - control-1)) - css)) + (dolist (cs + '(composition eight-bit-control eight-bit-graphic control-1) + css) + (setq css (delq cs css))))) (t ;; We are in a unibyte buffer or XEmacs non-mule, so we futz around a bit. (save-excursion @@ -1000,7 +1154,7 @@ Emacs 23 (unicode)." (nreverse out))) (defvar mm-inhibit-file-name-handlers - '(jka-compr-handler image-file-handler) + '(jka-compr-handler image-file-handler epa-file-handler) "A list of handlers doing (un)compression (etc) thingies.") (defun mm-insert-file-contents (filename &optional visit beg end replace @@ -1074,6 +1228,8 @@ If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'." inhibit-file-name-handlers))) (write-region start end filename append visit lockname))) +(autoload 'gmm-write-region "gmm-utils") + ;; It is not a MIME function, but some MIME functions use it. (if (and (fboundp 'make-temp-file) (ignore-errors @@ -1119,15 +1275,11 @@ If SUFFIX is non-nil, add that at the end of the file name." (setq file (concat file suffix))) (if dir-flag (make-directory file) - (if (featurep 'xemacs) - ;; NOTE: This is unsafe if XEmacs users - ;; don't use a secure temp directory. - (if (file-exists-p file) - (signal 'file-already-exists - (list "File exists" file)) - (write-region "" nil file nil 'silent)) - (write-region "" nil file nil 'silent - nil 'excl))) + ;; NOTE: This is unsafe if Emacs 20 + ;; users and XEmacs users don't use + ;; a secure temp directory. + (gmm-write-region "" nil file nil 'silent + nil 'excl)) nil) (file-already-exists t) ;; The XEmacs version of `make-directory' issues @@ -1169,6 +1321,8 @@ If SUFFIX is non-nil, add that at the end of the file name." (if (eq (point) end) 'ascii (mm-guess-charset)) (goto-char point))))) +(declare-function mm-detect-coding-region "mm-util" (start end)) + (if (fboundp 'coding-system-get) (defun mm-detect-mime-charset-region (start end) "Detect MIME charset of the text in the region between START and END." @@ -1311,7 +1465,7 @@ gzip, bzip2, etc. are allowed." (funcall (symbol-value 'set-auto-coding-function) nil (- (point-max) (point-min))) (error nil))))) - ((featurep 'file-coding) ;; XEmacs + ((and (featurep 'xemacs) (featurep 'file-coding)) ;; XEmacs (let ((case-fold-search t) (end (point-at-eol)) codesys start)