X-Git-Url: http://cgit.sxemacs.org/?a=blobdiff_plain;f=lisp%2Fmm-util.el;h=7a944bbc1d8fc1c92b6fc32c8e378eb4faefb89a;hb=6d3039252bb175eba53a2028cbf3c0e90112388d;hp=6c4d2e41b6fd4d1f850a1f6137cfc8facfcda502;hpb=d12343929c3e033a5bed3d2284c20d3a19dfe18a;p=gnus

diff --git a/lisp/mm-util.el b/lisp/mm-util.el
index 6c4d2e41b..7a944bbc1 100644
--- a/lisp/mm-util.el
+++ b/lisp/mm-util.el
@@ -1,7 +1,7 @@
 ;;; mm-util.el --- Utility functions for Mule and low level things
 
 ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004,
-;;   2005, 2006 Free Software Foundation, Inc.
+;;   2005, 2006, 2007, 2008 Free Software Foundation, Inc.
 
 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
 ;;	MORIOKA Tomohiko <morioka@jaist.ac.jp>
@@ -9,7 +9,7 @@
 
 ;; GNU Emacs is free software; you can redistribute it and/or modify
 ;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 2, or (at your option)
+;; the Free Software Foundation; either version 3, or (at your option)
 ;; any later version.
 
 ;; GNU Emacs is distributed in the hope that it will be useful,
@@ -26,6 +26,10 @@
 
 ;;; Code:
 
+;; For Emacs < 22.2.
+(eval-and-compile
+  (unless (fboundp 'declare-function) (defmacro declare-function (&rest r))))
+
 (eval-when-compile (require 'cl))
 (require 'mail-prsvr)
 
@@ -36,18 +40,16 @@
 	(require 'timer))
     (require 'timer)))
 
+(defvar mm-mime-mule-charset-alist )
+
 (eval-and-compile
-  (mapcar
+  (mapc
    (lambda (elem)
      (let ((nfunc (intern (format "mm-%s" (car elem)))))
        (if (fboundp (car elem))
 	   (defalias nfunc (car elem))
 	 (defalias nfunc (cdr elem)))))
-   '((decode-coding-string . (lambda (s a) s))
-     (encode-coding-string . (lambda (s a) s))
-     (encode-coding-region . ignore)
-     (coding-system-list . ignore)
-     (decode-coding-region . ignore)
+   '((coding-system-list . ignore)
      (char-int . identity)
      (coding-system-equal . equal)
      (annotationp . ignore)
@@ -100,15 +102,10 @@
      ;; (string-to-multibyte s)   ~= (decode-coding-string s 'binary)
      ;; (string-make-multibyte s) ~= (decode-coding-string s locale-coding-system)
      (string-as-multibyte . identity)
-     (string-to-multibyte
-      . (lambda (string)
-	  "Return a multibyte string with the same individual chars as string."
-	  (mapconcat
-	   (lambda (ch) (mm-string-as-multibyte (char-to-string ch)))
-	   string "")))
      (multibyte-string-p . ignore)
      (insert-byte . insert-char)
      (multibyte-char-to-unibyte . identity)
+     (set-buffer-multibyte . ignore)
      (special-display-p
       . (lambda (buffer-name)
 	  "Returns non-nil if a buffer named BUFFER-NAME gets a special frame."
@@ -125,6 +122,47 @@
 			    (string-match (car elem) buffer-name)
 			    (throw 'return (cdr elem))))))))))))
 
+(eval-and-compile
+  (if (featurep 'xemacs)
+      (if (featurep 'file-coding)
+	  ;; Don't modify string if CODING-SYSTEM is nil.
+	  (progn
+	    (defun mm-decode-coding-string (str coding-system)
+	      (if coding-system
+		  (decode-coding-string str coding-system)
+		str))
+	    (defun mm-encode-coding-string (str coding-system)
+	      (if coding-system
+		  (encode-coding-string str coding-system)
+		str))
+	    (defun mm-decode-coding-region (start end coding-system)
+	      (if coding-system
+		  (decode-coding-region start end coding-system)))
+	    (defun mm-encode-coding-region (start end coding-system)
+	      (if coding-system
+		  (encode-coding-region start end coding-system))))
+	(defun mm-decode-coding-string (str coding-system) str)
+	(defun mm-encode-coding-string (str coding-system) str)
+	(defalias 'mm-decode-coding-region 'ignore)
+	(defalias 'mm-encode-coding-region 'ignore))
+    (defalias 'mm-decode-coding-string 'decode-coding-string)
+    (defalias 'mm-encode-coding-string 'encode-coding-string)
+    (defalias 'mm-decode-coding-region 'decode-coding-region)
+    (defalias 'mm-encode-coding-region 'encode-coding-region)))
+
+(defalias 'mm-string-to-multibyte
+  (cond
+   ((featurep 'xemacs)
+    'identity)
+   ((fboundp 'string-to-multibyte)
+    'string-to-multibyte)
+   (t
+    (lambda (string)
+      "Return a multibyte string with the same individual chars as string."
+      (mapconcat
+       (lambda (ch) (mm-string-as-multibyte (char-to-string ch)))
+       string "")))))
+
 (eval-and-compile
   (defalias 'mm-char-or-char-int-p
     (cond
@@ -182,7 +220,10 @@ non-nil, an alias is created and added to
 the alias.  Else windows-NUMBER is used."
   (interactive
    (let ((completion-ignore-case t)
-	 (candidates (cp-supported-codepages)))
+	 (candidates (if (fboundp 'cp-supported-codepages)
+			 (cp-supported-codepages)
+		       ;; Removed in Emacs 23 (unicode), sosignal an error:
+		       (error "`codepage-setup' is obsolete in this Emacs version."))))
      (list (completing-read "Setup DOS Codepage: (default 437) " candidates
 			    nil t nil nil "437"))))
   (when alias
@@ -202,49 +243,150 @@ the alias.  Else windows-NUMBER is used."
     ;; Not in XEmacs, but it's not a proper MIME charset anyhow.
     ,@(unless (mm-coding-system-p 'x-ctext)
 	'((x-ctext . ctext)))
-    ;; ISO-8859-15 is very similar to ISO-8859-1.  But it's _different_!
+    ;; ISO-8859-15 is very similar to ISO-8859-1.  But it's _different_ in 8
+    ;; positions!
     ,@(unless (mm-coding-system-p 'iso-8859-15)
 	'((iso-8859-15 . iso-8859-1)))
     ;; BIG-5HKSCS is similar to, but different than, BIG-5.
     ,@(unless (mm-coding-system-p 'big5-hkscs)
 	'((big5-hkscs . big5)))
-    ;; Windows-1252 is actually a superset of iso-8859-1 (West Europe).  See
-    ;; also `gnus-article-dumbquotes-map'.
-    ,@(unless (mm-coding-system-p 'windows-1252)
-	(if (mm-coding-system-p 'cp1252)
-	    '((windows-1252 . cp1252))
-	  '((windows-1252 . iso-8859-1))))
-    ;; Windows-1254 is a superset of iso-8859-9 (Turkish).
-    ,@(unless (mm-coding-system-p 'windows-1254)
-	(if (mm-coding-system-p 'cp1254)
-	    '((windows-1254 . cp1254))
-	  '((windows-1254 . iso-8859-9))))
-    ;; Windows-1255 is a superset of iso-8859-8 (Hebrew).
-    ,@(unless (mm-coding-system-p 'windows-1255)
-	(if (mm-coding-system-p 'cp1255)
-	    '((windows-1255 . cp1255))
-	  '((windows-1255 . iso-8859-8))))
-    ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft
-    ;; Outlook users in Czech republic. Use this to allow reading of their
-    ;; e-mails. cp1250 should be defined by M-x codepage-setup.
-    ,@(if (and (not (mm-coding-system-p 'windows-1250))
-	       (mm-coding-system-p 'cp1250))
-	  '((windows-1250 . cp1250)))
     ;; A Microsoft misunderstanding.
-    ,@(if (and (not (mm-coding-system-p 'unicode))
-	       (mm-coding-system-p 'utf-16-le))
-	  '((unicode . utf-16-le)))
+    ,@(when (and (not (mm-coding-system-p 'unicode))
+		 (mm-coding-system-p 'utf-16-le))
+	'((unicode . utf-16-le)))
     ;; A Microsoft misunderstanding.
     ,@(unless (mm-coding-system-p 'ks_c_5601-1987)
 	(if (mm-coding-system-p 'cp949)
 	    '((ks_c_5601-1987 . cp949))
 	  '((ks_c_5601-1987 . euc-kr))))
     ;; Windows-31J is Windows Codepage 932.
-    ,@(if (and (not (mm-coding-system-p 'windows-31j))
-	       (mm-coding-system-p 'cp932))
-	  '((windows-31j . cp932)))
+    ,@(when (and (not (mm-coding-system-p 'windows-31j))
+		 (mm-coding-system-p 'cp932))
+	'((windows-31j . cp932)))
+    ;; Charset name: GBK, Charset aliases: CP936, MS936, windows-936
+    ;; http://www.iana.org/assignments/charset-reg/GBK
+    ;; Emacs 22.1 has cp936, but not gbk, so we alias it:
+    ,@(when (and (not (mm-coding-system-p 'gbk))
+		 (mm-coding-system-p 'cp936))
+	'((gbk . cp936)))
+    ;; ISO8859-1 is a bogus name for ISO-8859-1
+    ,@(when (and (not (mm-coding-system-p 'iso8859-1))
+		 (mm-coding-system-p 'iso-8859-1))
+	'((iso8859-1 . iso-8859-1)))
     )
-  "A mapping from unknown or invalid charset names to the real charset names.")
+  "A mapping from unknown or invalid charset names to the real charset names.
+
+See `mm-codepage-iso-8859-list' and `mm-codepage-ibm-list'.")
+
+(defcustom mm-codepage-iso-8859-list
+  (list 1250 ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft
+	;; Outlook users in Czech republic.  Use this to allow reading of
+	;; their e-mails.  cp1250 should be defined by M-x codepage-setup
+	;; (Emacs 21).
+	'(1252 . 1) ;; Windows-1252 is a superset of iso-8859-1 (West
+	            ;; Europe).  See also `gnus-article-dumbquotes-map'.
+	'(1254 . 9) ;; Windows-1254 is a superset of iso-8859-9 (Turkish).
+	'(1255 . 8));; Windows-1255 is a superset of iso-8859-8 (Hebrew).
+  "A list of Windows codepage numbers and iso-8859 charset numbers.
+
+If an element is a number corresponding to a supported windows
+codepage, appropriate entries to `mm-charset-synonym-alist' are
+added by `mm-setup-codepage-iso-8859'.  An element may also be a
+cons cell where the car is a codepage number and the cdr is the
+corresponding number of an iso-8859 charset."
+  :type '(list (set :inline t
+		    (const 1250 :tag "Central and East European")
+		    (const (1252 . 1) :tag "West European")
+		    (const (1254 . 9) :tag "Turkish")
+		    (const (1255 . 8) :tag "Hebrew"))
+	       (repeat :inline t
+		       :tag "Other options"
+		       (choice
+			(integer :tag "Windows codepage number")
+			(cons (integer :tag "Windows codepage number")
+			      (integer :tag "iso-8859 charset  number")))))
+  :version "22.1" ;; Gnus 5.10.9
+  :group 'mime)
+
+(defcustom mm-codepage-ibm-list
+  (list 437 ;; (US etc.)
+	860 ;; (Portugal)
+	861 ;; (Iceland)
+	862 ;; (Israel)
+	863 ;; (Canadian French)
+	865 ;; (Nordic)
+	852 ;;
+	850 ;; (Latin 1)
+	855 ;; (Cyrillic)
+	866 ;; (Cyrillic - Russian)
+	857 ;; (Turkish)
+	864 ;; (Arabic)
+	869 ;; (Greek)
+	874);; (Thai)
+  ;; In Emacs 23 (unicode), cp... and ibm... are aliases.
+  ;; Cf. http://thread.gmane.org/v9lkng5nwy.fsf@marauder.physik.uni-ulm.de
+  "List of IBM codepage numbers.
+
+The codepage mappings slighly differ between IBM and other vendors.
+See \"ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/IBM/README.TXT\".
+
+If an element is a number corresponding to a supported windows
+codepage, appropriate entries to `mm-charset-synonym-alist' are
+added by `mm-setup-codepage-ibm'."
+  :type '(list (set :inline t
+		    (const 437 :tag "US etc.")
+		    (const 860 :tag "Portugal")
+		    (const 861 :tag "Iceland")
+		    (const 862 :tag "Israel")
+		    (const 863 :tag "Canadian French")
+		    (const 865 :tag "Nordic")
+		    (const 852)
+		    (const 850 :tag "Latin 1")
+		    (const 855 :tag "Cyrillic")
+		    (const 866 :tag "Cyrillic - Russian")
+		    (const 857 :tag "Turkish")
+		    (const 864 :tag "Arabic")
+		    (const 869 :tag "Greek")
+		    (const 874 :tag "Thai"))
+	       (repeat :inline t
+		       :tag "Other options"
+		       (integer :tag "Codepage number")))
+  :version "22.1" ;; Gnus 5.10.9
+  :group 'mime)
+
+(defun mm-setup-codepage-iso-8859 (&optional list)
+  "Add appropriate entries to `mm-charset-synonym-alist'.
+Unless LIST is given, `mm-codepage-iso-8859-list' is used."
+  (unless list
+    (setq list mm-codepage-iso-8859-list))
+  (dolist (i list)
+    (let (cp windows iso)
+      (if (consp i)
+	  (setq cp (intern (format "cp%d" (car i)))
+		windows (intern (format "windows-%d" (car i)))
+		iso (intern (format "iso-8859-%d" (cdr i))))
+	(setq cp (intern (format "cp%d" i))
+	      windows (intern (format "windows-%d" i))))
+      (unless (mm-coding-system-p windows)
+	(if (mm-coding-system-p cp)
+	    (add-to-list 'mm-charset-synonym-alist (cons windows cp))
+	  (add-to-list 'mm-charset-synonym-alist (cons windows iso)))))))
+
+(defun mm-setup-codepage-ibm (&optional list)
+  "Add appropriate entries to `mm-charset-synonym-alist'.
+Unless LIST is given, `mm-codepage-ibm-list' is used."
+  (unless list
+    (setq list mm-codepage-ibm-list))
+  (dolist (number list)
+    (let ((ibm (intern (format "ibm%d" number)))
+	  (cp  (intern (format "cp%d" number))))
+      (when (and (not (mm-coding-system-p ibm))
+		 (mm-coding-system-p cp))
+	(add-to-list 'mm-charset-synonym-alist (cons ibm cp))))))
+
+;; Initialize:
+(mm-setup-codepage-iso-8859)
+(mm-setup-codepage-ibm)
 
 (defcustom mm-charset-override-alist
   '((iso-8859-1 . windows-1252)
@@ -293,6 +435,7 @@ could use `autoload-coding-system' here."
 		       (cons (symbol :tag "charset")
 			     (symbol :tag "form"))))
   :group 'mime)
+(put 'mm-charset-eval-alist 'risky-local-variable t)
 
 (defvar mm-binary-coding-system
   (cond
@@ -353,6 +496,10 @@ could use `autoload-coding-system' here."
     (iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978)
     (euc-kr korean-ksc5601)
     (gb2312 chinese-gb2312)
+    (gbk chinese-gbk)
+    (gb18030 gb18030-2-byte
+	     gb18030-4-byte-bmp gb18030-4-byte-smp
+	     gb18030-4-byte-ext-1 gb18030-4-byte-ext-2)
     (big5 chinese-big5-1 chinese-big5-2)
     (tibetan tibetan)
     (thai-tis620 thai-tis620)
@@ -421,7 +568,7 @@ with Mule charsets.  It is completely useless for Emacs."
 	  cs mime mule alist)
       (while css
 	(setq cs (pop css)
-	      mime (or (coding-system-get cs :mime-charset) ; Emacs 23 (unicode)
+	      mime (or (coding-system-get cs :mime-charset); Emacs 23 (unicode)
 		       (coding-system-get cs 'mime-charset)))
 	(when (and mime
 		   (not (eq t (setq mule
@@ -751,9 +898,10 @@ This affects whether coding conversion should be attempted generally."
   (autoload 'latin-unity-massage-name "latin-unity")
   (autoload 'latin-unity-maybe-remap "latin-unity")
   (autoload 'latin-unity-representations-feasible-region "latin-unity")
-  (autoload 'latin-unity-representations-present-region "latin-unity")
-  (defvar latin-unity-coding-systems)
-  (defvar latin-unity-ucs-list))
+  (autoload 'latin-unity-representations-present-region "latin-unity"))
+
+(defvar latin-unity-coding-systems)
+(defvar latin-unity-ucs-list)
 
 (defun mm-xemacs-find-mime-charset-1 (begin end)
   "Determine which MIME charset to use to send region as message.
@@ -824,6 +972,8 @@ But this is very much a corner case, so don't worry about it."
   (when (featurep 'xemacs)
     `(and (featurep 'mule) (mm-xemacs-find-mime-charset-1 ,begin ,end))))
 
+(declare-function mm-delete-duplicates "mm-util" (list))
+
 (defun mm-find-mime-charset-region (b e &optional hack-charsets)
   "Return the MIME charsets needed to encode the region between B and E.
 nil means ASCII, a single-element list represents an appropriate MIME
@@ -870,6 +1020,8 @@ charset, and a longer list means no appropriate charset."
 	;; Otherwise, we'll get nil, and the next setq will get invoked.
 	(setq charsets (mm-xemacs-find-mime-charset b e))
 
+	;; Fixme: won't work for unibyte Emacs 23:
+
 	;; We're not multibyte, or a single coding system won't cover it.
 	(setq charsets
 	      (mm-delete-duplicates
@@ -880,8 +1032,8 @@ charset, and a longer list means no appropriate charset."
 	     (memq 'iso-8859-15 charsets)
 	     (memq 'iso-8859-15 hack-charsets)
 	     (save-excursion (mm-iso-8859-x-to-15-region b e)))
-	(mapcar (lambda (x) (setq charsets (delq (car x) charsets)))
-		mm-iso-8859-15-compatible))
+	(dolist (x mm-iso-8859-15-compatible)
+	  (setq charsets (delq (car x) charsets))))
     (if (and (memq 'iso-2022-jp-2 charsets)
 	     (memq 'iso-2022-jp-2 hack-charsets))
 	(setq charsets (delq 'iso-2022-jp charsets)))
@@ -901,16 +1053,18 @@ charset, and a longer list means no appropriate charset."
 (defmacro mm-with-unibyte-buffer (&rest forms)
   "Create a temporary buffer, and evaluate FORMS there like `progn'.
 Use unibyte mode for this."
-  `(let (default-enable-multibyte-characters)
-     (with-temp-buffer ,@forms)))
+  `(with-temp-buffer
+     (mm-disable-multibyte)
+     ,@forms))
 (put 'mm-with-unibyte-buffer 'lisp-indent-function 0)
 (put 'mm-with-unibyte-buffer 'edebug-form-spec '(body))
 
 (defmacro mm-with-multibyte-buffer (&rest forms)
   "Create a temporary buffer, and evaluate FORMS there like `progn'.
 Use multibyte mode for this."
-  `(let ((default-enable-multibyte-characters t))
-     (with-temp-buffer ,@forms)))
+  `(with-temp-buffer
+     (mm-enable-multibyte)
+     ,@forms))
 (put 'mm-with-multibyte-buffer 'lisp-indent-function 0)
 (put 'mm-with-multibyte-buffer 'edebug-form-spec '(body))
 
@@ -963,10 +1117,10 @@ Emacs 23 (unicode)."
     ;; Remove composition since the base charsets have been included.
     ;; Remove eight-bit-*, treat them as ascii.
     (let ((css (find-charset-region b e)))
-      (mapcar (lambda (cs) (setq css (delq cs css)))
-	      '(composition eight-bit-control eight-bit-graphic
-			    control-1))
-      css))
+      (dolist (cs
+	       '(composition eight-bit-control eight-bit-graphic control-1)
+	       css)
+	(setq css (delq cs css)))))
    (t
     ;; We are in a unibyte buffer or XEmacs non-mule, so we futz around a bit.
     (save-excursion
@@ -1000,7 +1154,7 @@ Emacs 23 (unicode)."
     (nreverse out)))
 
 (defvar mm-inhibit-file-name-handlers
-  '(jka-compr-handler image-file-handler)
+  '(jka-compr-handler image-file-handler epa-file-handler)
   "A list of handlers doing (un)compression (etc) thingies.")
 
 (defun mm-insert-file-contents (filename &optional visit beg end replace
@@ -1074,6 +1228,8 @@ If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'."
 	   inhibit-file-name-handlers)))
     (write-region start end filename append visit lockname)))
 
+(autoload 'gmm-write-region "gmm-utils")
+
 ;; It is not a MIME function, but some MIME functions use it.
 (if (and (fboundp 'make-temp-file)
 	 (ignore-errors
@@ -1119,15 +1275,11 @@ If SUFFIX is non-nil, add that at the end of the file name."
 			     (setq file (concat file suffix)))
 			 (if dir-flag
 			     (make-directory file)
-			   (if (featurep 'xemacs)
-			       ;; NOTE: This is unsafe if XEmacs users
-			       ;; don't use a secure temp directory.
-			       (if (file-exists-p file)
-				   (signal 'file-already-exists
-					   (list "File exists" file))
-				 (write-region "" nil file nil 'silent))
-			     (write-region "" nil file nil 'silent
-					   nil 'excl)))
+			   ;; NOTE: This is unsafe if Emacs 20
+			   ;; users and XEmacs users don't use
+			   ;; a secure temp directory.
+			   (gmm-write-region "" nil file nil 'silent
+					     nil 'excl))
 			 nil)
 		     (file-already-exists t)
 		     ;; The XEmacs version of `make-directory' issues
@@ -1169,6 +1321,8 @@ If SUFFIX is non-nil, add that at the end of the file name."
 	  (if (eq (point) end) 'ascii (mm-guess-charset))
 	(goto-char point)))))
 
+(declare-function mm-detect-coding-region "mm-util" (start end))
+
 (if (fboundp 'coding-system-get)
     (defun mm-detect-mime-charset-region (start end)
       "Detect MIME charset of the text in the region between START and END."
@@ -1311,7 +1465,7 @@ gzip, bzip2, etc. are allowed."
 		    (funcall (symbol-value 'set-auto-coding-function)
 			     nil (- (point-max) (point-min)))
 		  (error nil)))))
-	   ((featurep 'file-coding) ;; XEmacs
+	   ((and (featurep 'xemacs) (featurep 'file-coding)) ;; XEmacs
 	    (let ((case-fold-search t)
 		  (end (point-at-eol))
 		  codesys start)