X-Git-Url: http://cgit.sxemacs.org/?a=blobdiff_plain;f=lisp%2Fmm-util.el;h=c75bf954e8be9a61e94d9fbe3609094fa2657298;hb=26859be9854a8990c748765dc59c95f2ae4a11e7;hp=c21b391c74a440c68e62260796a9586b1a542a1a;hpb=7a04a7d854222863fa4ee19af2262498ae512a89;p=gnus diff --git a/lisp/mm-util.el b/lisp/mm-util.el index c21b391c7..c75bf954e 100644 --- a/lisp/mm-util.el +++ b/lisp/mm-util.el @@ -1,5 +1,6 @@ ;;; mm-util.el --- Utility functions for Mule and low level things -;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. +;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 +;; Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen ;; MORIOKA Tomohiko @@ -43,9 +44,6 @@ (coding-system-equal . equal) (annotationp . ignore) (set-buffer-file-coding-system . ignore) - (make-char - . (lambda (charset int) - (int-to-char int))) (read-charset . (lambda (prompt) "Return a charset." @@ -56,9 +54,10 @@ mm-mime-mule-charset-alist) nil t)))) (subst-char-in-string - . (lambda (from to string) ;; stolen (and renamed) from nnheader.el - "Replace characters in STRING from FROM to TO." - (let ((string (substring string 0)) ;Copy string. + . (lambda (from to string &optional inplace) ;; stolen (and renamed) from nnheader.el + "Replace characters in STRING from FROM to TO. + Unless optional argument INPLACE is non-nil, return a new string." + (let ((string (if inplace string (copy-sequence string))) (len (length string)) (idx 0)) ;; Replace all occurrences of FROM with TO. @@ -70,7 +69,19 @@ (string-as-unibyte . identity) (string-make-unibyte . identity) (string-as-multibyte . identity) - (multibyte-string-p . ignore)))) + (multibyte-string-p . ignore) + ;; It is not a MIME function, but some MIME functions use it. + (make-temp-file . (lambda (prefix &optional dir-flag) + (let ((file (expand-file-name + (make-temp-name prefix) + (if (fboundp 'temp-directory) + (temp-directory) + temporary-file-directory)))) + (if dir-flag + (make-directory file)) + file))) + (insert-byte . insert-char) + (multibyte-char-to-unibyte . identity)))) (eval-and-compile (defalias 'mm-char-or-char-int-p @@ -79,6 +90,14 @@ ((fboundp 'char-valid-p) 'char-valid-p) (t 'identity)))) +;; Fixme: This seems always to be used to read a MIME charset, so it +;; should be re-named and fixed (in Emacs) to offer completion only on +;; proper charset names (base coding systems which have a +;; mime-charset defined). XEmacs doesn't believe in mime-charset; +;; test with +;; `(or (coding-system-get 'iso-8859-1 'mime-charset) +;; (coding-system-get 'iso-8859-1 :mime-charset))' +;; Actually, there should be an `mm-coding-system-mime-charset'. (eval-and-compile (defalias 'mm-read-coding-system (cond @@ -100,25 +119,27 @@ (or mm-coding-system-list (setq mm-coding-system-list (mm-coding-system-list)))) -(defun mm-coding-system-p (sym) - "Return non-nil if SYM is a coding system." - (or (and (fboundp 'coding-system-p) (coding-system-p sym)) - (memq sym (mm-get-coding-system-list)))) +(defun mm-coding-system-p (cs) + "Return non-nil if CS is a symbol naming a coding system. +In XEmacs, also return non-nil if CS is a coding system object." + (if (fboundp 'find-coding-system) + (find-coding-system cs) + (if (fboundp 'coding-system-p) + (coding-system-p cs) + ;; Is this branch ever actually useful? + (memq cs (mm-get-coding-system-list))))) (defvar mm-charset-synonym-alist `( - ;; Perfectly fine? A valid MIME name, anyhow. - ,@(unless (mm-coding-system-p 'big5) - '((big5 . cn-big5))) ;; Not in XEmacs, but it's not a proper MIME charset anyhow. ,@(unless (mm-coding-system-p 'x-ctext) '((x-ctext . ctext))) - ;; Apparently not defined in Emacs 20, but is a valid MIME name. - ,@(unless (mm-coding-system-p 'gb2312) - '((gb2312 . cn-gb-2312))) - ;; ISO-8859-15 is very similar to ISO-8859-1. - ,@(unless (mm-coding-system-p 'iso-8859-15) ; Emacs 21 defines it. + ;; ISO-8859-15 is very similar to ISO-8859-1. But it's _different_! + ,@(unless (mm-coding-system-p 'iso-8859-15) '((iso-8859-15 . iso-8859-1))) + ;; BIG-5HKSCS is similar to, but different than, BIG-5. + ,@(unless (mm-coding-system-p 'big5-hkscs) + '((big5-hkscs . big5))) ;; Windows-1252 is actually a superset of Latin-1. See also ;; `gnus-article-dumbquotes-map'. ,@(unless (mm-coding-system-p 'windows-1252) @@ -131,6 +152,11 @@ ,@(if (and (not (mm-coding-system-p 'windows-1250)) (mm-coding-system-p 'cp1250)) '((windows-1250 . cp1250))) + ;; A Microsoft misunderstanding. + ,@(unless (mm-coding-system-p 'ks_c_5601-1987) + (if (mm-coding-system-p 'cp949) + '((ks_c_5601-1987 . cp949)) + '((ks_c_5601-1987 . euc-kr)))) ) "A mapping from invalid charset names to the real charset names.") @@ -153,6 +179,11 @@ (defvar mm-auto-save-coding-system (cond + ((mm-coding-system-p 'utf-8-emacs) ; Mule 7 + (if (memq system-type '(windows-nt ms-dos ms-windows)) + (if (mm-coding-system-p 'utf-8-emacs-dos) + 'utf-8-emacs-dos mm-binary-coding-system) + 'utf-8-emacs)) ((mm-coding-system-p 'emacs-mule) (if (memq system-type '(windows-nt ms-dos ms-windows)) (if (mm-coding-system-p 'emacs-mule-dos) @@ -231,9 +262,11 @@ 'nconc (mapcar (lambda (cs) - (when (and (coding-system-get cs 'mime-charset) + (when (and (or (coding-system-get cs :mime-charset) ; Emacs 22 + (coding-system-get cs 'mime-charset)) (not (eq t (coding-system-get cs 'safe-charsets)))) - (list (cons (coding-system-get cs 'mime-charset) + (list (cons (or (coding-system-get cs :mime-charset) + (coding-system-get cs 'mime-charset)) (delq 'ascii (coding-system-get cs 'safe-charsets)))))) (sort-coding-systems (coding-system-list 'base-only)))))) @@ -275,15 +308,16 @@ Valid elements include: ;; Japanese users may prefer iso-2022-jp to shift-jis. '(iso-2022-jp iso-2022-jp-2 japanese-shift-jis iso-latin-1 utf-8))))) - "Preferred coding systems for encoding outgoing mails. + "Preferred coding systems for encoding outgoing messages. -More than one suitable coding systems may be found for some texts. By -default, a coding system with the highest priority is used to encode -outgoing mails (see `sort-coding-systems'). If this variable is set, -it overrides the default priority." +More than one suitable coding system may be found for some text. +By default, the coding system with the highest priority is used +to encode outgoing messages (see `sort-coding-systems'). If this +variable is set, it overrides the default priority." :type '(repeat (symbol :tag "Coding system")) :group 'mime) +;; ?? (defvar mm-use-find-coding-systems-region (fboundp 'find-coding-systems-region) "Use `find-coding-systems-region' to find proper coding systems. @@ -297,12 +331,16 @@ mail with multiple parts is preferred to sending a Unicode one.") (defun mm-mule-charset-to-mime-charset (charset) "Return the MIME charset corresponding to the given Mule CHARSET." - (if (fboundp 'find-coding-systems-for-charsets) + (if (and (fboundp 'find-coding-systems-for-charsets) + (fboundp 'sort-coding-systems)) (let (mime) - (dolist (cs (find-coding-systems-for-charsets (list charset))) + (dolist (cs (sort-coding-systems + (copy-sequence + (find-coding-systems-for-charsets (list charset))))) (unless mime (when cs - (setq mime (coding-system-get cs 'mime-charset))))) + (setq mime (or (coding-system-get cs :mime-charset) + (coding-system-get cs 'mime-charset)))))) mime) (let ((alist mm-mime-mule-charset-alist) out) @@ -326,7 +364,8 @@ used as the line break code type of the coding system." ((null charset) charset) ;; Running in a non-MULE environment. - ((null (mm-get-coding-system-list)) + ((or (null (mm-get-coding-system-list)) + (not (fboundp 'coding-system-get))) charset) ;; ascii ((eq charset 'us-ascii) @@ -351,31 +390,24 @@ used as the line break code type of the coding system." ;; Do we need -lbt? (dolist (c (mm-get-coding-system-list)) (if (and (null cs) - (eq charset (coding-system-get c 'mime-charset))) + (eq charset (or (coding-system-get c :mime-charset) + (coding-system-get c 'mime-charset)))) (setq cs c))) cs)))) -(defsubst mm-replace-chars-in-string (string from to) - (mm-subst-char-in-string from to string)) - (eval-and-compile (defvar mm-emacs-mule (and (not (featurep 'xemacs)) (boundp 'default-enable-multibyte-characters) default-enable-multibyte-characters (fboundp 'set-buffer-multibyte)) - "Emacs mule.") - - (defvar mm-mule4-p (and mm-emacs-mule - (fboundp 'charsetp) - (not (charsetp 'eight-bit-control))) - "Mule version 4.") + "True in Emacs with Mule.") (if mm-emacs-mule (defun mm-enable-multibyte () "Set the multibyte flag of the current buffer. Only do this if the default value of `enable-multibyte-characters' is non-nil. This is a no-op in XEmacs." - (set-buffer-multibyte t)) + (set-buffer-multibyte 'to)) (defalias 'mm-enable-multibyte 'ignore)) (if mm-emacs-mule @@ -383,27 +415,14 @@ non-nil. This is a no-op in XEmacs." "Unset the multibyte flag of in the current buffer. This is a no-op in XEmacs." (set-buffer-multibyte nil)) - (defalias 'mm-disable-multibyte 'ignore)) - - (if mm-mule4-p - (defun mm-enable-multibyte-mule4 () - "Enable multibyte in the current buffer. -Only used in Emacs Mule 4." - (set-buffer-multibyte t)) - (defalias 'mm-enable-multibyte-mule4 'ignore)) - - (if mm-mule4-p - (defun mm-disable-multibyte-mule4 () - "Disable multibyte in the current buffer. -Only used in Emacs Mule 4." - (set-buffer-multibyte nil)) - (defalias 'mm-disable-multibyte-mule4 'ignore))) + (defalias 'mm-disable-multibyte 'ignore))) (defun mm-preferred-coding-system (charset) ;; A typo in some Emacs versions. (or (get-charset-property charset 'preferred-coding-system) (get-charset-property charset 'prefered-coding-system))) +;; Mule charsets shouldn't be used. (defsubst mm-guess-charset () "Guess Mule charset from the language environment." (or @@ -434,7 +453,7 @@ If the charset is `composition', return the actual one." (setq charset 'ascii) ;; charset-after is fake in some Emacsen. (setq charset (and (fboundp 'char-charset) (char-charset char))) - (if (eq charset 'composition) + (if (eq charset 'composition) ; Mule 4 (let ((p (or pos (point)))) (cadr (find-charset-region p (1+ p)))) (if (and charset (not (memq charset '(ascii eight-bit-control @@ -450,8 +469,10 @@ If the charset is `composition', return the actual one." ;; This exists in Emacs 20. (or (and (mm-preferred-coding-system charset) - (coding-system-get - (mm-preferred-coding-system charset) 'mime-charset)) + (or (coding-system-get + (mm-preferred-coding-system charset) :mime-charset) + (coding-system-get + (mm-preferred-coding-system charset) 'mime-charset))) (and (eq charset 'ascii) 'us-ascii) (mm-preferred-coding-system charset) @@ -468,13 +489,23 @@ If the charset is `composition', return the actual one." (setq result (cons head result))) (nreverse result))) -(if (and (not (featurep 'xemacs)) - (boundp 'enable-multibyte-characters)) - (defalias 'mm-multibyte-p - (lambda () - "Say whether multibyte is enabled in the current buffer." - enable-multibyte-characters)) - (defalias 'mm-multibyte-p (lambda () (featurep 'mule)))) +;; Fixme: This is used in places when it should be testing the +;; default multibyteness. See mm-default-multibyte-p. +(eval-and-compile + (if (and (not (featurep 'xemacs)) + (boundp 'enable-multibyte-characters)) + (defun mm-multibyte-p () + "Non-nil if multibyte is enabled in the current buffer." + enable-multibyte-characters) + (defun mm-multibyte-p () (featurep 'mule)))) + +(defun mm-default-multibyte-p () + "Return non-nil if the session is multibyte. +This affects whether coding conversion should be attempted generally." + (if (featurep 'mule) + (if (boundp 'default-enable-multibyte-characters) + default-enable-multibyte-characters + t))) (defun mm-iso-8859-x-to-15-region (&optional b e) (if (fboundp 'char-charset) @@ -498,8 +529,14 @@ If the charset is `composition', return the actual one." (not inconvertible)))) (defun mm-sort-coding-systems-predicate (a b) - (> (length (memq a mm-coding-system-priorities)) - (length (memq b mm-coding-system-priorities)))) + (let ((priorities + (mapcar (lambda (cs) + ;; Note: invalid entries are dropped silently + (and (coding-system-p cs) + (coding-system-base cs))) + mm-coding-system-priorities))) + (> (length (memq a priorities)) + (length (memq b priorities))))) (defun mm-find-mime-charset-region (b e &optional hack-charsets) "Return the MIME charsets needed to encode the region between B and E. @@ -515,18 +552,35 @@ charset, and a longer list means no appropriate charset." (when mm-coding-system-priorities (setq systems (sort systems 'mm-sort-coding-systems-predicate))) - ;; Fixme: The `mime-charset' (`x-ctext') of `compound-text' - ;; is not in the IANA list. (setq systems (delq 'compound-text systems)) (unless (equal systems '(undecided)) (while systems - (let ((cs (coding-system-get (pop systems) 'mime-charset))) - (if cs + (let* ((head (pop systems)) + (cs (or (coding-system-get head :mime-charset) + (coding-system-get head 'mime-charset)))) + ;; The mime-charset (`x-ctext') of + ;; `compound-text' is not in the IANA list. We + ;; shouldn't normally use anything here with a + ;; mime-charset having an `x-' prefix. + ;; Fixme: Allow this to be overridden, since + ;; there is existing use of x-ctext. + ;; Also people apparently need the coding system + ;; `iso-2022-jp-3' (which Mule-UCS defines with + ;; mime-charset, though it's not valid). + (if (and cs + (not (string-match "^[Xx]-" (symbol-name cs))) + ;; UTF-16 of any variety is invalid for + ;; text parts and, unfortunately, has + ;; mime-charset defined both in Mule-UCS + ;; and versions of Emacs. (The name + ;; might be `mule-utf-16...' or + ;; `utf-16...'.) + (not (string-match "utf-16" (symbol-name cs)))) (setq systems nil charsets (list cs)))))) charsets)) - ;; Otherwise we're not multibyte, XEmacs or a single coding - ;; system won't cover it. + ;; Otherwise we're not multibyte, we're XEmacs, or a single + ;; coding system won't cover it. (setq charsets (mm-delete-duplicates (mapcar 'mm-mime-charset @@ -552,13 +606,13 @@ Use unibyte mode for this." (put 'mm-with-unibyte-buffer 'edebug-form-spec '(body)) (defmacro mm-with-unibyte-current-buffer (&rest forms) - "Evaluate FORMS with current current buffer temporarily made unibyte. + "Evaluate FORMS with current buffer temporarily made unibyte. Also bind `default-enable-multibyte-characters' to nil. Equivalent to `progn' in XEmacs" (let ((multibyte (make-symbol "multibyte")) (buffer (make-symbol "buffer"))) `(if mm-emacs-mule - (let ((,multibyte enable-multibyte-characters) + (let ((,multibyte enable-multibyte-characters) (,buffer (current-buffer))) (unwind-protect (let (default-enable-multibyte-characters) @@ -571,25 +625,6 @@ Equivalent to `progn' in XEmacs" (put 'mm-with-unibyte-current-buffer 'lisp-indent-function 0) (put 'mm-with-unibyte-current-buffer 'edebug-form-spec '(body)) -(defmacro mm-with-unibyte-current-buffer-mule4 (&rest forms) - "Evaluate FORMS there like `progn' in current buffer. -Mule4 only." - (let ((multibyte (make-symbol "multibyte")) - (buffer (make-symbol "buffer"))) - `(if mm-mule4-p - (let ((,multibyte enable-multibyte-characters) - (,buffer (current-buffer))) - (unwind-protect - (let (default-enable-multibyte-characters) - (set-buffer-multibyte nil) - ,@forms) - (set-buffer ,buffer) - (set-buffer-multibyte ,multibyte))) - (let (default-enable-multibyte-characters) - ,@forms)))) -(put 'mm-with-unibyte-current-buffer-mule4 'lisp-indent-function 0) -(put 'mm-with-unibyte-current-buffer-mule4 'edebug-form-spec '(body)) - (defmacro mm-with-unibyte (&rest forms) "Eval the FORMS with the default value of `enable-multibyte-characters' nil, ." `(let (default-enable-multibyte-characters) @@ -631,21 +666,6 @@ Mule4 only." mm-mime-mule-charset-alist))))) (list 'ascii (or charset 'latin-iso8859-1))))))))) -(if (fboundp 'shell-quote-argument) - (defalias 'mm-quote-arg 'shell-quote-argument) - (defun mm-quote-arg (arg) - "Return a version of ARG that is safe to evaluate in a shell." - (let ((pos 0) new-pos accum) - ;; *** bug: we don't handle newline characters properly - (while (setq new-pos (string-match "[]*[;!'`\"$\\& \t{} |()<>]" arg pos)) - (push (substring arg pos new-pos) accum) - (push "\\" accum) - (push (list (aref arg new-pos)) accum) - (setq pos (1+ new-pos))) - (if (= pos 0) - arg - (apply 'concat (nconc (nreverse accum) (list (substring arg pos)))))))) - (defun mm-auto-mode-alist () "Return an `auto-mode-alist' with only the .gz (etc) thingies." (let ((alist auto-mode-alist) @@ -662,11 +682,11 @@ Mule4 only." (defun mm-insert-file-contents (filename &optional visit beg end replace inhibit) - "Like `insert-file-contents', q.v., but only reads in the file. + "Like `insert-file-contents', but only reads in the file. A buffer may be modified in several ways after reading into the buffer due to advanced Emacs features, such as file-name-handlers, format decoding, -find-file-hooks, etc. -If INHIBIT is non-nil, inhibit mm-inhibit-file-name-handlers. +`find-file-hooks', etc. +If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'. This function ensures that none of these modifications will take place." (let ((format-alist nil) (auto-mode-alist (if inhibit nil (mm-auto-mode-alist))) @@ -704,7 +724,8 @@ If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'." (append mm-inhibit-file-name-handlers inhibit-file-name-handlers) inhibit-file-name-handlers))) - (append-to-file start end filename))) + (write-region start end filename t 'no-message) + (message "Appended to %s" filename))) (defun mm-write-region (start end filename &optional append visit lockname coding-system inhibit) @@ -734,6 +755,7 @@ If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'." (push dir result)) (push path result)))) +;; Fixme: This doesn't look useful where it's used. (if (fboundp 'detect-coding-region) (defun mm-detect-coding-region (start end) "Like `detect-coding-region' except returning the best one." @@ -759,44 +781,6 @@ If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'." (let ((cs (mm-detect-coding-region start end))) cs))) -(defun mm-guess-mime-charset () - "Guess the default MIME charset from the language environment." - (let ((language-info - (and (boundp 'current-language-environment) - (assoc current-language-environment - language-info-alist))) - item) - (cond - ((null language-info) - 'iso-8859-1) - ((setq item - (cadr - (or (assq 'coding-priority language-info) - (assq 'coding-system language-info)))) - (if (fboundp 'coding-system-get) - (or (coding-system-get item 'mime-charset) - item) - item)) - ((setq item (car (last (assq 'charset language-info)))) - (if (eq item 'ascii) - 'iso-8859-1 - (mm-mime-charset item))) - (t - 'iso-8859-1)))) - -;; It is not a MIME function, but some MIME functions use it. -(defalias 'mm-make-temp-file - (if (fboundp 'make-temp-file) - 'make-temp-file - (lambda (prefix &optional dir-flag) - (let ((file (expand-file-name - (make-temp-name prefix) - (if (fboundp 'temp-directory) - (temp-directory) - temporary-file-directory)))) - (if dir-flag - (make-directory file)) - file)))) (provide 'mm-util)