X-Git-Url: https://cgit.sxemacs.org/?p=gnus;a=blobdiff_plain;f=lisp%2Fmm-util.el;h=435c3bba00fd251e0b5fc2c73d16092d532cdeac;hp=fdd1d567bfd9181293dac15b1044eb46c34df1c6;hb=034742263e7ca5aed088d9b9dcfe4c456d6a26ea;hpb=bdfb666064f11032744c4ea8e183274fa138bd50 diff --git a/lisp/mm-util.el b/lisp/mm-util.el index fdd1d567b..435c3bba0 100644 --- a/lisp/mm-util.el +++ b/lisp/mm-util.el @@ -1,31 +1,32 @@ ;;; mm-util.el --- Utility functions for Mule and low level things -;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, -;; 2005, 2006, 2007 Free Software Foundation, Inc. +;; Copyright (C) 1998-2011 Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen ;; MORIOKA Tomohiko ;; This file is part of GNU Emacs. -;; GNU Emacs is free software; you can redistribute it and/or modify +;; GNU Emacs is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 3, or (at your option) -;; any later version. +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. ;; GNU Emacs is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License -;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -;; Boston, MA 02110-1301, USA. +;; along with GNU Emacs. If not, see . ;;; Commentary: ;;; Code: +;; For Emacs <22.2 and XEmacs. +(eval-and-compile + (unless (fboundp 'declare-function) (defmacro declare-function (&rest r)))) + (eval-when-compile (require 'cl)) (require 'mail-prsvr) @@ -36,6 +37,16 @@ (require 'timer)) (require 'timer))) +(defvar mm-mime-mule-charset-alist ) +;; Note this is not presently used on Emacs >= 23, which is good, +;; since it means standalone message-mode (which requires mml and +;; hence mml-util) does not load gnus-util. +(autoload 'gnus-completing-read "gnus-util") + +;; Emulate functions that are not available in every (X)Emacs version. +;; The name of a function is prefixed with mm-, like `mm-char-int' for +;; `char-int' that is a native XEmacs function, not available in Emacs. +;; Gnus programs all should use mm- functions, not the original ones. (eval-and-compile (mapc (lambda (elem) @@ -43,39 +54,51 @@ (if (fboundp (car elem)) (defalias nfunc (car elem)) (defalias nfunc (cdr elem))))) - '((coding-system-list . ignore) + `(;; `coding-system-list' is not available in XEmacs 21.4 built + ;; without the `file-coding' feature. + (coding-system-list . ignore) + ;; `char-int' is an XEmacs function, not available in Emacs. (char-int . identity) + ;; `coding-system-equal' is an Emacs function, not available in XEmacs. (coding-system-equal . equal) + ;; `annotationp' is an XEmacs function, not available in Emacs. (annotationp . ignore) + ;; `set-buffer-file-coding-system' is not available in XEmacs 21.4 + ;; built without the `file-coding' feature. (set-buffer-file-coding-system . ignore) + ;; `read-charset' is an Emacs function, not available in XEmacs. (read-charset - . (lambda (prompt) - "Return a charset." - (intern - (completing-read - prompt - (mapcar (lambda (e) (list (symbol-name (car e)))) - mm-mime-mule-charset-alist) - nil t)))) + . ,(lambda (prompt) + "Return a charset." + (intern + (gnus-completing-read + prompt + (mapcar (lambda (e) (symbol-name (car e))) + mm-mime-mule-charset-alist) + t)))) + ;; `subst-char-in-string' is not available in XEmacs 21.4. (subst-char-in-string - . (lambda (from to string &optional inplace) - ;; stolen (and renamed) from nnheader.el - "Replace characters in STRING from FROM to TO. + . ,(lambda (from to string &optional inplace) + ;; stolen (and renamed) from nnheader.el + "Replace characters in STRING from FROM to TO. Unless optional argument INPLACE is non-nil, return a new string." - (let ((string (if inplace string (copy-sequence string))) - (len (length string)) - (idx 0)) - ;; Replace all occurrences of FROM with TO. - (while (< idx len) - (when (= (aref string idx) from) - (aset string idx to)) - (setq idx (1+ idx))) - string))) + (let ((string (if inplace string (copy-sequence string))) + (len (length string)) + (idx 0)) + ;; Replace all occurrences of FROM with TO. + (while (< idx len) + (when (= (aref string idx) from) + (aset string idx to)) + (setq idx (1+ idx))) + string))) + ;; `replace-in-string' is an XEmacs function, not available in Emacs. (replace-in-string - . (lambda (string regexp rep &optional literal) - "See `replace-regexp-in-string', only the order of args differs." - (replace-regexp-in-string regexp rep string nil literal))) + . ,(lambda (string regexp rep &optional literal) + "See `replace-regexp-in-string', only the order of args differs." + (replace-regexp-in-string regexp rep string nil literal))) + ;; `string-as-unibyte' is an Emacs function, not available in XEmacs. (string-as-unibyte . identity) + ;; `string-make-unibyte' is an Emacs function, not available in XEmacs. (string-make-unibyte . identity) ;; string-as-multibyte often doesn't really do what you think it does. ;; Example: @@ -95,30 +118,69 @@ ;; (string-as-multibyte s) ~= (decode-coding-string s 'emacs-mule) ;; (string-to-multibyte s) ~= (decode-coding-string s 'binary) ;; (string-make-multibyte s) ~= (decode-coding-string s locale-coding-system) + ;; `string-as-multibyte' is an Emacs function, not available in XEmacs. (string-as-multibyte . identity) + ;; `multibyte-string-p' is an Emacs function, not available in XEmacs. (multibyte-string-p . ignore) + ;; `insert-byte' is available only in Emacs 23.1 or greater. (insert-byte . insert-char) + ;; `multibyte-char-to-unibyte' is an Emacs function, not available + ;; in XEmacs. (multibyte-char-to-unibyte . identity) + ;; `set-buffer-multibyte' is an Emacs function, not available in XEmacs. + (set-buffer-multibyte . ignore) + ;; `special-display-p' is an Emacs function, not available in XEmacs. (special-display-p - . (lambda (buffer-name) - "Returns non-nil if a buffer named BUFFER-NAME gets a special frame." - (and special-display-function - (or (and (member buffer-name special-display-buffer-names) t) - (cdr (assoc buffer-name special-display-buffer-names)) - (catch 'return - (dolist (elem special-display-regexps) - (and (stringp elem) - (string-match elem buffer-name) - (throw 'return t)) - (and (consp elem) - (stringp (car elem)) - (string-match (car elem) buffer-name) - (throw 'return (cdr elem)))))))))))) - + . ,(lambda (buffer-name) + "Returns non-nil if a buffer named BUFFER-NAME gets a special frame." + (and special-display-function + (or (and (member buffer-name special-display-buffer-names) t) + (cdr (assoc buffer-name special-display-buffer-names)) + (catch 'return + (dolist (elem special-display-regexps) + (and (stringp elem) + (string-match elem buffer-name) + (throw 'return t)) + (and (consp elem) + (stringp (car elem)) + (string-match (car elem) buffer-name) + (throw 'return (cdr elem))))))))) + ;; `substring-no-properties' is available only in Emacs 22.1 or greater. + (substring-no-properties + . ,(lambda (string &optional from to) + "Return a substring of STRING, without text properties. +It starts at index FROM and ending before TO. +TO may be nil or omitted; then the substring runs to the end of STRING. +If FROM is nil or omitted, the substring starts at the beginning of STRING. +If FROM or TO is negative, it counts from the end. + +With one argument, just copy STRING without its properties." + (setq string (substring string (or from 0) to)) + (set-text-properties 0 (length string) nil string) + string)) + ;; `line-number-at-pos' is available only in Emacs 22.1 or greater + ;; and XEmacs 21.5. + (line-number-at-pos + . ,(lambda (&optional pos) + "Return (narrowed) buffer line number at position POS. +If POS is nil, use current buffer location. +Counting starts at (point-min), so the value refers +to the contents of the accessible portion of the buffer." + (let ((opoint (or pos (point))) start) + (save-excursion + (goto-char (point-min)) + (setq start (point)) + (goto-char opoint) + (forward-line 0) + (1+ (count-lines start (point)))))))))) + +;; `decode-coding-string', `encode-coding-string', `decode-coding-region' +;; and `encode-coding-region' are available in Emacs and XEmacs built with +;; the `file-coding' feature, but the XEmacs versions treat nil, that is +;; given as the `coding-system' argument, as the `binary' coding system. (eval-and-compile (if (featurep 'xemacs) (if (featurep 'file-coding) - ;; Don't modify string if CODING-SYSTEM is nil. (progn (defun mm-decode-coding-string (str coding-system) (if coding-system @@ -143,19 +205,12 @@ (defalias 'mm-decode-coding-region 'decode-coding-region) (defalias 'mm-encode-coding-region 'encode-coding-region))) -(defalias 'mm-string-to-multibyte - (cond - ((featurep 'xemacs) - 'identity) - ((fboundp 'string-to-multibyte) - 'string-to-multibyte) - (t - (lambda (string) - "Return a multibyte string with the same individual chars as string." - (mapconcat - (lambda (ch) (mm-string-as-multibyte (char-to-string ch))) - string ""))))) +;; `string-to-multibyte' is available only in Emacs. +(defalias 'mm-string-to-multibyte (if (featurep 'xemacs) + 'identity + 'string-to-multibyte)) +;; `char-or-char-int-p' is an XEmacs function, not available in Emacs. (eval-and-compile (defalias 'mm-char-or-char-int-p (cond @@ -163,6 +218,45 @@ ((fboundp 'char-valid-p) 'char-valid-p) (t 'identity)))) +;; `ucs-to-char' is a function that Mule-UCS provides. +(eval-and-compile + (if (featurep 'xemacs) + (cond ((and (fboundp 'unicode-to-char) ;; XEmacs 21.5. + (subrp (symbol-function 'unicode-to-char))) + (if (featurep 'mule) + (defalias 'mm-ucs-to-char 'unicode-to-char) + (defun mm-ucs-to-char (codepoint) + "Convert Unicode codepoint to character." + (or (unicode-to-char codepoint) ?#)))) + ((featurep 'mule) + (defun mm-ucs-to-char (codepoint) + "Convert Unicode codepoint to character." + (if (fboundp 'ucs-to-char) ;; Mule-UCS is loaded. + (progn + (defalias 'mm-ucs-to-char + (lambda (codepoint) + "Convert Unicode codepoint to character." + (condition-case nil + (or (ucs-to-char codepoint) ?#) + (error ?#)))) + (mm-ucs-to-char codepoint)) + (condition-case nil + (or (int-to-char codepoint) ?#) + (error ?#))))) + (t + (defun mm-ucs-to-char (codepoint) + "Convert Unicode codepoint to character." + (condition-case nil + (or (int-to-char codepoint) ?#) + (error ?#))))) + (if (let ((char (make-char 'japanese-jisx0208 36 34))) + (eq char (decode-char 'ucs char))) + ;; Emacs 23. + (defalias 'mm-ucs-to-char 'identity) + (defun mm-ucs-to-char (codepoint) + "Convert Unicode codepoint to character." + (or (decode-char 'ucs codepoint) ?#))))) + ;; Fixme: This seems always to be used to read a MIME charset, so it ;; should be re-named and fixed (in Emacs) to offer completion only on ;; proper charset names (base coding systems which have a @@ -173,18 +267,19 @@ ;; Actually, there should be an `mm-coding-system-mime-charset'. (eval-and-compile (defalias 'mm-read-coding-system - (cond - ((fboundp 'read-coding-system) - (if (and (featurep 'xemacs) - (<= (string-to-number emacs-version) 21.1)) - (lambda (prompt &optional default-coding-system) - (read-coding-system prompt)) - 'read-coding-system)) - (t (lambda (prompt &optional default-coding-system) - "Prompt the user for a coding system." - (completing-read - prompt (mapcar (lambda (s) (list (symbol-name (car s)))) - mm-mime-mule-charset-alist))))))) + (if (featurep 'emacs) 'read-coding-system + (cond + ((fboundp 'read-coding-system) + (if (and (featurep 'xemacs) + (<= (string-to-number emacs-version) 21.1)) + (lambda (prompt &optional default-coding-system) + (read-coding-system prompt)) + 'read-coding-system)) + (t (lambda (prompt &optional default-coding-system) + "Prompt the user for a coding system." + (gnus-completing-read + prompt (mapcar (lambda (s) (symbol-name (car s))) + mm-mime-mule-charset-alist)))))))) (defvar mm-coding-system-list nil) (defun mm-get-coding-system-list () @@ -213,16 +308,21 @@ non-nil, an alias is created and added to the alias. Else windows-NUMBER is used." (interactive (let ((completion-ignore-case t) - (candidates (cp-supported-codepages))) - (list (completing-read "Setup DOS Codepage: (default 437) " candidates - nil t nil nil "437")))) + (candidates (if (fboundp 'cp-supported-codepages) + (cp-supported-codepages) + ;; Removed in Emacs 23 (unicode), so signal an error: + (error "`codepage-setup' not present in this Emacs version")))) + (list (gnus-completing-read "Setup DOS Codepage" candidates + t nil nil "437")))) (when alias (setq alias (if (stringp alias) (intern alias) (intern (format "windows-%s" number))))) (let* ((cp (intern (format "cp%s" number)))) (unless (mm-coding-system-p cp) - (codepage-setup number)) + (if (fboundp 'codepage-setup) ; silence compiler + (codepage-setup number) + (error "`codepage-setup' not present in this Emacs version"))) (when (and alias ;; Don't add alias if setup of cp failed. (mm-coding-system-p cp)) @@ -259,10 +359,18 @@ the alias. Else windows-NUMBER is used." ,@(when (and (not (mm-coding-system-p 'gbk)) (mm-coding-system-p 'cp936)) '((gbk . cp936))) + ;; UTF8 is a bogus name for UTF-8 + ,@(when (and (not (mm-coding-system-p 'utf8)) + (mm-coding-system-p 'utf-8)) + '((utf8 . utf-8))) ;; ISO8859-1 is a bogus name for ISO-8859-1 ,@(when (and (not (mm-coding-system-p 'iso8859-1)) (mm-coding-system-p 'iso-8859-1)) '((iso8859-1 . iso-8859-1))) + ;; ISO_8859-1 is a bogus name for ISO-8859-1 + ,@(when (and (not (mm-coding-system-p 'iso_8859-1)) + (mm-coding-system-p 'iso-8859-1)) + '((iso_8859-1 . iso-8859-1))) ) "A mapping from unknown or invalid charset names to the real charset names. @@ -271,8 +379,7 @@ See `mm-codepage-iso-8859-list' and `mm-codepage-ibm-list'.") (defcustom mm-codepage-iso-8859-list (list 1250 ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft ;; Outlook users in Czech republic. Use this to allow reading of - ;; their e-mails. cp1250 should be defined by M-x codepage-setup - ;; (Emacs 21). + ;; their e-mails. '(1252 . 1) ;; Windows-1252 is a superset of iso-8859-1 (West ;; Europe). See also `gnus-article-dumbquotes-map'. '(1254 . 9) ;; Windows-1254 is a superset of iso-8859-9 (Turkish). @@ -378,32 +485,12 @@ Unless LIST is given, `mm-codepage-ibm-list' is used." (mm-setup-codepage-iso-8859) (mm-setup-codepage-ibm) -(defcustom mm-charset-override-alist - '((iso-8859-1 . windows-1252) - (iso-8859-8 . windows-1255) - (iso-8859-9 . windows-1254)) - "A mapping from undesired charset names to their replacement. - -You may add pairs like (iso-8859-1 . windows-1252) here, -i.e. treat iso-8859-1 as windows-1252. windows-1252 is a -superset of iso-8859-1." - :type '(list (set :inline t - (const (iso-8859-1 . windows-1252)) - (const (iso-8859-8 . windows-1255)) - (const (iso-8859-9 . windows-1254)) - (const (undecided . windows-1252))) - (repeat :inline t - :tag "Other options" - (cons (symbol :tag "From charset") - (symbol :tag "To charset")))) - :version "22.1" ;; Gnus 5.10.9 - :group 'mime) - +;; Note: this has to be defined before `mm-charset-to-coding-system'. (defcustom mm-charset-eval-alist (if (featurep 'xemacs) nil ;; I don't know what would be useful for XEmacs. - '(;; Emacs 21 offers 1250 1251 1253 1257. Emacs 22 provides autoloads for - ;; 1250-1258 (i.e. `mm-codepage-setup' does nothing). + '(;; Emacs 22 provides autoloads for 1250-1258 + ;; (i.e. `mm-codepage-setup' does nothing). (windows-1250 . (mm-codepage-setup 1250 t)) (windows-1251 . (mm-codepage-setup 1251 t)) (windows-1253 . (mm-codepage-setup 1253 t)) @@ -427,6 +514,159 @@ could use `autoload-coding-system' here." :group 'mime) (put 'mm-charset-eval-alist 'risky-local-variable t) +(defvar mm-charset-override-alist) + +;; Note: this function has to be defined before `mm-charset-override-alist' +;; since it will use this function in order to determine its default value +;; when loading mm-util.elc. +(defun mm-charset-to-coding-system (charset &optional lbt + allow-override silent) + "Return coding-system corresponding to CHARSET. +CHARSET is a symbol naming a MIME charset. +If optional argument LBT (`unix', `dos' or `mac') is specified, it is +used as the line break code type of the coding system. + +If ALLOW-OVERRIDE is given, use `mm-charset-override-alist' to +map undesired charset names to their replacement. This should +only be used for decoding, not for encoding. + +A non-nil value of SILENT means don't issue a warning even if CHARSET +is not available." + ;; OVERRIDE is used (only) in `mm-decode-body' and `mm-decode-string'. + (when (stringp charset) + (setq charset (intern (downcase charset)))) + (when lbt + (setq charset (intern (format "%s-%s" charset lbt)))) + (cond + ((null charset) + charset) + ;; Running in a non-MULE environment. + ((or (null (mm-get-coding-system-list)) + (not (fboundp 'coding-system-get))) + charset) + ;; Check override list quite early. Should only used for decoding, not for + ;; encoding! + ((and allow-override + (let ((cs (cdr (assq charset mm-charset-override-alist)))) + (and cs (mm-coding-system-p cs) cs)))) + ;; ascii + ((eq charset 'us-ascii) + 'ascii) + ;; Check to see whether we can handle this charset. (This depends + ;; on there being some coding system matching each `mime-charset' + ;; property defined, as there should be.) + ((and (mm-coding-system-p charset) +;;; Doing this would potentially weed out incorrect charsets. +;;; charset +;;; (eq charset (coding-system-get charset 'mime-charset)) + ) + charset) + ;; Use coding system Emacs knows. + ((and (fboundp 'coding-system-from-name) + (coding-system-from-name charset))) + ;; Eval expressions from `mm-charset-eval-alist' + ((let* ((el (assq charset mm-charset-eval-alist)) + (cs (car el)) + (form (cdr el))) + (and cs + form + (prog2 + ;; Avoid errors... + (condition-case nil (eval form) (error nil)) + ;; (message "Failed to eval `%s'" form)) + (mm-coding-system-p cs) + (message "Added charset `%s' via `mm-charset-eval-alist'" cs)) + cs))) + ;; Translate invalid charsets. + ((let ((cs (cdr (assq charset mm-charset-synonym-alist)))) + (and cs + (mm-coding-system-p cs) + ;; (message + ;; "Using synonym `%s' from `mm-charset-synonym-alist' for `%s'" + ;; cs charset) + cs))) + ;; Last resort: search the coding system list for entries which + ;; have the right mime-charset in case the canonical name isn't + ;; defined (though it should be). + ((let (cs) + ;; mm-get-coding-system-list returns a list of cs without lbt. + ;; Do we need -lbt? + (dolist (c (mm-get-coding-system-list)) + (if (and (null cs) + (eq charset (or (coding-system-get c :mime-charset) + (coding-system-get c 'mime-charset)))) + (setq cs c))) + (unless (or silent cs) + ;; Warn the user about unknown charset: + (if (fboundp 'gnus-message) + (gnus-message 7 "Unknown charset: %s" charset) + (message "Unknown charset: %s" charset))) + cs)))) + +;; Note: `mm-charset-to-coding-system' has to be defined before this. +(defcustom mm-charset-override-alist + ;; Note: pairs that cannot be used in the Emacs version currently running + ;; will be removed. + '((gb2312 . gbk) + (iso-8859-1 . windows-1252) + (iso-8859-8 . windows-1255) + (iso-8859-9 . windows-1254)) + "A mapping from undesired charset names to their replacement. + +You may add pairs like (iso-8859-1 . windows-1252) here, +i.e. treat iso-8859-1 as windows-1252. windows-1252 is a +superset of iso-8859-1." + :type + '(list + :convert-widget + (lambda (widget) + (let ((defaults + (delq nil + (mapcar (lambda (pair) + (if (mm-charset-to-coding-system (cdr pair) + nil nil t) + pair)) + '((gb2312 . gbk) + (iso-8859-1 . windows-1252) + (iso-8859-8 . windows-1255) + (iso-8859-9 . windows-1254) + (undecided . windows-1252))))) + (val (copy-sequence (default-value 'mm-charset-override-alist))) + pair rest) + (while val + (push (if (and (prog1 + (setq pair (assq (caar val) defaults)) + (setq defaults (delq pair defaults))) + (equal (car val) pair)) + `(const ,pair) + `(cons :format "%v" + (const :format "(%v" ,(caar val)) + (symbol :size 3 :format " . %v)\n" ,(cdar val)))) + rest) + (setq val (cdr val))) + (while defaults + (push `(const ,(pop defaults)) rest)) + (widget-convert + 'list + `(set :inline t :format "%v" ,@(nreverse rest)) + `(repeat :inline t :tag "Other options" + (cons :format "%v" + (symbol :size 3 :format "(%v") + (symbol :size 3 :format " . %v)\n"))))))) + ;; Remove pairs that cannot be used in the Emacs version currently + ;; running. Note that this section will be evaluated when loading + ;; mm-util.elc. + :set (lambda (symbol value) + (custom-set-default + symbol (delq nil + (mapcar (lambda (pair) + (if (mm-charset-to-coding-system (cdr pair) + nil nil t) + pair)) + value)))) + :version "22.1" ;; Gnus 5.10.9 + :group 'mime) + (defvar mm-binary-coding-system (cond ((mm-coding-system-p 'binary) 'binary) @@ -435,7 +675,7 @@ could use `autoload-coding-system' here." "100% binary coding system.") (defvar mm-text-coding-system - (or (if (memq system-type '(windows-nt ms-dos ms-windows)) + (or (if (memq system-type '(windows-nt ms-dos)) (and (mm-coding-system-p 'raw-text-dos) 'raw-text-dos) (and (mm-coding-system-p 'raw-text) 'raw-text)) mm-binary-coding-system) @@ -447,12 +687,12 @@ could use `autoload-coding-system' here." (defvar mm-auto-save-coding-system (cond ((mm-coding-system-p 'utf-8-emacs) ; Mule 7 - (if (memq system-type '(windows-nt ms-dos ms-windows)) + (if (memq system-type '(windows-nt ms-dos)) (if (mm-coding-system-p 'utf-8-emacs-dos) 'utf-8-emacs-dos mm-binary-coding-system) 'utf-8-emacs)) ((mm-coding-system-p 'emacs-mule) - (if (memq system-type '(windows-nt ms-dos ms-windows)) + (if (memq system-type '(windows-nt ms-dos)) (if (mm-coding-system-p 'emacs-mule-dos) 'emacs-mule-dos mm-binary-coding-system) 'emacs-mule)) @@ -486,6 +726,10 @@ could use `autoload-coding-system' here." (iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978) (euc-kr korean-ksc5601) (gb2312 chinese-gb2312) + (gbk chinese-gbk) + (gb18030 gb18030-2-byte + gb18030-4-byte-bmp gb18030-4-byte-smp + gb18030-4-byte-ext-1 gb18030-4-byte-ext-2) (big5 chinese-big5-1 chinese-big5-2) (tibetan tibetan) (thai-tis620 thai-tis620) @@ -554,7 +798,7 @@ with Mule charsets. It is completely useless for Emacs." cs mime mule alist) (while css (setq cs (pop css) - mime (or (coding-system-get cs :mime-charset) ; Emacs 23 (unicode) + mime (or (coding-system-get cs :mime-charset); Emacs 23 (unicode) (coding-system-get cs 'mime-charset))) (when (and mime (not (eq t (setq mule @@ -594,13 +838,16 @@ Valid elements include: "A table of the difference character between ISO-8859-X and ISO-8859-15.") (defcustom mm-coding-system-priorities - (if (boundp 'current-language-environment) - (let ((lang (symbol-value 'current-language-environment))) - (cond ((string= lang "Japanese") - ;; Japanese users prefer iso-2022-jp to euc-japan or - ;; shift_jis, however iso-8859-1 should be used when - ;; there are only ASCII text and Latin-1 characters. - '(iso-8859-1 iso-2022-jp iso-2022-jp-2 shift_jis utf-8))))) + (let ((lang (if (boundp 'current-language-environment) + (symbol-value 'current-language-environment)))) + (cond (;; XEmacs without Mule but with `file-coding'. + (not lang) nil) + ;; In XEmacs 21.5 it may be the one like "Japanese (UTF-8)". + ((string-match "\\`Japanese" lang) + ;; Japanese users prefer iso-2022-jp to euc-japan or + ;; shift_jis, however iso-8859-1 should be used when + ;; there are only ASCII text and Latin-1 characters. + '(iso-8859-1 iso-2022-jp iso-2022-jp-2 shift_jis utf-8)))) "Preferred coding systems for encoding outgoing messages. More than one suitable coding system may be found for some text. @@ -619,6 +866,21 @@ variable is set, it overrides the default priority." Setting it to nil is useful on Emacsen supporting Unicode if sending mail with multiple parts is preferred to sending a Unicode one.") +(defvar mm-extra-numeric-entities + (mapcar + (lambda (item) + (cons (car item) (mm-ucs-to-char (cdr item)))) + '((#x80 . #x20AC) (#x82 . #x201A) (#x83 . #x0192) (#x84 . #x201E) + (#x85 . #x2026) (#x86 . #x2020) (#x87 . #x2021) (#x88 . #x02C6) + (#x89 . #x2030) (#x8A . #x0160) (#x8B . #x2039) (#x8C . #x0152) + (#x8E . #x017D) (#x91 . #x2018) (#x92 . #x2019) (#x93 . #x201C) + (#x94 . #x201D) (#x95 . #x2022) (#x96 . #x2013) (#x97 . #x2014) + (#x98 . #x02DC) (#x99 . #x2122) (#x9A . #x0161) (#x9B . #x203A) + (#x9C . #x0153) (#x9E . #x017E) (#x9F . #x0178))) + "*Alist of extra numeric entities and characters other than ISO 10646. +This table is used for decoding extra numeric entities to characters, +like \"€\" to the euro sign, mainly in html messages.") + ;;; Internal variables: ;;; Functions: @@ -649,105 +911,21 @@ mail with multiple parts is preferred to sending a Unicode one.") (pop alist)) out))) -(defun mm-charset-to-coding-system (charset &optional lbt - allow-override) - "Return coding-system corresponding to CHARSET. -CHARSET is a symbol naming a MIME charset. -If optional argument LBT (`unix', `dos' or `mac') is specified, it is -used as the line break code type of the coding system. - -If ALLOW-OVERRIDE is given, use `mm-charset-override-alist' to -map undesired charset names to their replacement. This should -only be used for decoding, not for encoding." - ;; OVERRIDE is used (only) in `mm-decode-body' and `mm-decode-string'. - (when (stringp charset) - (setq charset (intern (downcase charset)))) - (when lbt - (setq charset (intern (format "%s-%s" charset lbt)))) - (cond - ((null charset) - charset) - ;; Running in a non-MULE environment. - ((or (null (mm-get-coding-system-list)) - (not (fboundp 'coding-system-get))) - charset) - ;; Check override list quite early. Should only used for decoding, not for - ;; encoding! - ((and allow-override - (let ((cs (cdr (assq charset mm-charset-override-alist)))) - (and cs (mm-coding-system-p cs) cs)))) - ;; ascii - ((eq charset 'us-ascii) - 'ascii) - ;; Check to see whether we can handle this charset. (This depends - ;; on there being some coding system matching each `mime-charset' - ;; property defined, as there should be.) - ((and (mm-coding-system-p charset) -;;; Doing this would potentially weed out incorrect charsets. -;;; charset -;;; (eq charset (coding-system-get charset 'mime-charset)) - ) - charset) - ;; Eval expressions from `mm-charset-eval-alist' - ((let* ((el (assq charset mm-charset-eval-alist)) - (cs (car el)) - (form (cdr el))) - (and cs - form - (prog2 - ;; Avoid errors... - (condition-case nil (eval form) (error nil)) - ;; (message "Failed to eval `%s'" form)) - (mm-coding-system-p cs) - (message "Added charset `%s' via `mm-charset-eval-alist'" cs)) - cs))) - ;; Translate invalid charsets. - ((let ((cs (cdr (assq charset mm-charset-synonym-alist)))) - (and cs - (mm-coding-system-p cs) - ;; (message - ;; "Using synonym `%s' from `mm-charset-synonym-alist' for `%s'" - ;; cs charset) - cs))) - ;; Last resort: search the coding system list for entries which - ;; have the right mime-charset in case the canonical name isn't - ;; defined (though it should be). - ((let (cs) - ;; mm-get-coding-system-list returns a list of cs without lbt. - ;; Do we need -lbt? - (dolist (c (mm-get-coding-system-list)) - (if (and (null cs) - (eq charset (or (coding-system-get c :mime-charset) - (coding-system-get c 'mime-charset)))) - (setq cs c))) - (unless cs - ;; Warn the user about unknown charset: - (if (fboundp 'gnus-message) - (gnus-message 7 "Unknown charset: %s" charset) - (message "Unknown charset: %s" charset))) - cs)))) - (eval-and-compile - (defvar mm-emacs-mule (and (not (featurep 'xemacs)) - (boundp 'default-enable-multibyte-characters) - default-enable-multibyte-characters - (fboundp 'set-buffer-multibyte)) - "True in Emacs with Mule.") - - (if mm-emacs-mule - (defun mm-enable-multibyte () - "Set the multibyte flag of the current buffer. + (if (featurep 'xemacs) + (defalias 'mm-enable-multibyte 'ignore) + (defun mm-enable-multibyte () + "Set the multibyte flag of the current buffer. Only do this if the default value of `enable-multibyte-characters' is non-nil. This is a no-op in XEmacs." - (set-buffer-multibyte 'to)) - (defalias 'mm-enable-multibyte 'ignore)) + (set-buffer-multibyte 'to))) - (if mm-emacs-mule - (defun mm-disable-multibyte () - "Unset the multibyte flag of in the current buffer. + (if (featurep 'xemacs) + (defalias 'mm-disable-multibyte 'ignore) + (defun mm-disable-multibyte () + "Unset the multibyte flag of in the current buffer. This is a no-op in XEmacs." - (set-buffer-multibyte nil)) - (defalias 'mm-disable-multibyte 'ignore))) + (set-buffer-multibyte nil)))) (defun mm-preferred-coding-system (charset) ;; A typo in some Emacs versions. @@ -798,7 +976,6 @@ If the charset is `composition', return the actual one." (if (eq charset 'unknown) (error "The message contains non-printable characters, please use attachment")) (if (and (fboundp 'coding-system-get) (fboundp 'get-charset-property)) - ;; This exists in Emacs 20. (or (and (mm-preferred-coding-system charset) (or (coding-system-get @@ -812,6 +989,7 @@ If the charset is `composition', return the actual one." ;; This is for XEmacs. (mm-mule-charset-to-mime-charset charset))) +;; `delete-dups' is not available in XEmacs 21.4. (if (fboundp 'delete-dups) (defalias 'mm-delete-duplicates 'delete-dups) (defun mm-delete-duplicates (list) @@ -842,8 +1020,8 @@ This is a compatibility function for Emacsen without `delete-dups'." "Return non-nil if the session is multibyte. This affects whether coding conversion should be attempted generally." (if (featurep 'mule) - (if (boundp 'default-enable-multibyte-characters) - default-enable-multibyte-characters + (if (boundp 'enable-multibyte-characters) + (default-value 'enable-multibyte-characters) t))) (defun mm-iso-8859-x-to-15-region (&optional b e) @@ -884,9 +1062,10 @@ This affects whether coding conversion should be attempted generally." (autoload 'latin-unity-massage-name "latin-unity") (autoload 'latin-unity-maybe-remap "latin-unity") (autoload 'latin-unity-representations-feasible-region "latin-unity") - (autoload 'latin-unity-representations-present-region "latin-unity") - (defvar latin-unity-coding-systems) - (defvar latin-unity-ucs-list)) + (autoload 'latin-unity-representations-present-region "latin-unity")) + +(defvar latin-unity-coding-systems) +(defvar latin-unity-ucs-list) (defun mm-xemacs-find-mime-charset-1 (begin end) "Determine which MIME charset to use to send region as message. @@ -957,6 +1136,8 @@ But this is very much a corner case, so don't worry about it." (when (featurep 'xemacs) `(and (featurep 'mule) (mm-xemacs-find-mime-charset-1 ,begin ,end)))) +(declare-function mm-delete-duplicates "mm-util" (list)) + (defun mm-find-mime-charset-region (b e &optional hack-charsets) "Return the MIME charsets needed to encode the region between B and E. nil means ASCII, a single-element list represents an appropriate MIME @@ -1003,6 +1184,8 @@ charset, and a longer list means no appropriate charset." ;; Otherwise, we'll get nil, and the next setq will get invoked. (setq charsets (mm-xemacs-find-mime-charset b e)) + ;; Fixme: won't work for unibyte Emacs 23: + ;; We're not multibyte, or a single coding system won't cover it. (setq charsets (mm-delete-duplicates @@ -1034,60 +1217,43 @@ charset, and a longer list means no appropriate charset." (defmacro mm-with-unibyte-buffer (&rest forms) "Create a temporary buffer, and evaluate FORMS there like `progn'. Use unibyte mode for this." - `(let (default-enable-multibyte-characters) - (with-temp-buffer ,@forms))) + `(with-temp-buffer + (mm-disable-multibyte) + ,@forms)) (put 'mm-with-unibyte-buffer 'lisp-indent-function 0) (put 'mm-with-unibyte-buffer 'edebug-form-spec '(body)) (defmacro mm-with-multibyte-buffer (&rest forms) "Create a temporary buffer, and evaluate FORMS there like `progn'. Use multibyte mode for this." - `(let ((default-enable-multibyte-characters t)) - (with-temp-buffer ,@forms))) + `(with-temp-buffer + (mm-enable-multibyte) + ,@forms)) (put 'mm-with-multibyte-buffer 'lisp-indent-function 0) (put 'mm-with-multibyte-buffer 'edebug-form-spec '(body)) (defmacro mm-with-unibyte-current-buffer (&rest forms) "Evaluate FORMS with current buffer temporarily made unibyte. -Also bind `default-enable-multibyte-characters' to nil. -Equivalent to `progn' in XEmacs - -NOTE: Use this macro with caution in multibyte buffers (it is not -worth using this macro in unibyte buffers of course). Use of -`(set-buffer-multibyte t)', which is run finally, is generally -harmful since it is likely to modify existing data in the buffer. -For instance, it converts \"\\300\\255\" into \"\\255\" in -Emacs 23 (unicode)." - (let ((multibyte (make-symbol "multibyte")) - (buffer (make-symbol "buffer"))) - `(if mm-emacs-mule - (let ((,multibyte enable-multibyte-characters) - (,buffer (current-buffer))) - (unwind-protect - (let (default-enable-multibyte-characters) - (set-buffer-multibyte nil) - ,@forms) - (set-buffer ,buffer) - (set-buffer-multibyte ,multibyte))) - (let (default-enable-multibyte-characters) - ,@forms)))) +Equivalent to `progn' in XEmacs. + +Note: We recommend not using this macro any more; there should be +better ways to do a similar thing. The previous version of this macro +bound the default value of `enable-multibyte-characters' to nil while +evaluating FORMS but it is no longer done. So, some programs assuming +it if any may malfunction." + (if (featurep 'xemacs) + `(progn ,@forms) + (let ((multibyte (make-symbol "multibyte"))) + `(let ((,multibyte enable-multibyte-characters)) + (when ,multibyte + (set-buffer-multibyte nil)) + (prog1 + (progn ,@forms) + (when ,multibyte + (set-buffer-multibyte t))))))) (put 'mm-with-unibyte-current-buffer 'lisp-indent-function 0) (put 'mm-with-unibyte-current-buffer 'edebug-form-spec '(body)) -(defmacro mm-with-unibyte (&rest forms) - "Eval the FORMS with the default value of `enable-multibyte-characters' nil." - `(let (default-enable-multibyte-characters) - ,@forms)) -(put 'mm-with-unibyte 'lisp-indent-function 0) -(put 'mm-with-unibyte 'edebug-form-spec '(body)) - -(defmacro mm-with-multibyte (&rest forms) - "Eval the FORMS with the default value of `enable-multibyte-characters' t." - `(let ((default-enable-multibyte-characters t)) - ,@forms)) -(put 'mm-with-multibyte 'lisp-indent-function 0) -(put 'mm-with-multibyte 'edebug-form-spec '(body)) - (defun mm-find-charset-region (b e) "Return a list of Emacs charsets in the region B to E." (cond @@ -1144,24 +1310,24 @@ to advanced Emacs features, such as file-name-handlers, format decoding, `find-file-hooks', etc. If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'. This function ensures that none of these modifications will take place." - (let* ((format-alist nil) - (auto-mode-alist (if inhibit nil (mm-auto-mode-alist))) - (default-major-mode 'fundamental-mode) - (enable-local-variables nil) - (after-insert-file-functions nil) - (enable-local-eval nil) - (inhibit-file-name-operation (if inhibit - 'insert-file-contents - inhibit-file-name-operation)) - (inhibit-file-name-handlers - (if inhibit - (append mm-inhibit-file-name-handlers - inhibit-file-name-handlers) - inhibit-file-name-handlers)) - (ffh (if (boundp 'find-file-hook) - 'find-file-hook - 'find-file-hooks)) - (val (symbol-value ffh))) + (letf* ((format-alist nil) + (auto-mode-alist (if inhibit nil (mm-auto-mode-alist))) + ((default-value 'major-mode) 'fundamental-mode) + (enable-local-variables nil) + (after-insert-file-functions nil) + (enable-local-eval nil) + (inhibit-file-name-operation (if inhibit + 'insert-file-contents + inhibit-file-name-operation)) + (inhibit-file-name-handlers + (if inhibit + (append mm-inhibit-file-name-handlers + inhibit-file-name-handlers) + inhibit-file-name-handlers)) + (ffh (if (boundp 'find-file-hook) + 'find-file-hook + 'find-file-hooks)) + (val (symbol-value ffh))) (set ffh nil) (unwind-protect (insert-file-contents filename visit beg end replace) @@ -1207,6 +1373,8 @@ If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'." inhibit-file-name-handlers))) (write-region start end filename append visit lockname))) +(autoload 'gmm-write-region "gmm-utils") + ;; It is not a MIME function, but some MIME functions use it. (if (and (fboundp 'make-temp-file) (ignore-errors @@ -1271,16 +1439,23 @@ If SUFFIX is non-nil, add that at the end of the file name." ;; Reset the umask. (set-default-file-modes umask))))) +(defvar mm-image-load-path-cache nil) + (defun mm-image-load-path (&optional package) - (let (dir result) - (dolist (path load-path (nreverse result)) - (when (and path - (file-directory-p - (setq dir (concat (file-name-directory - (directory-file-name path)) - "etc/images/" (or package "gnus/"))))) - (push dir result)) - (push path result)))) + (if (and mm-image-load-path-cache + (equal load-path (car mm-image-load-path-cache))) + (cdr mm-image-load-path-cache) + (let (dir result) + (dolist (path load-path) + (when (and path + (file-directory-p + (setq dir (concat (file-name-directory + (directory-file-name path)) + "etc/images/" (or package "gnus/"))))) + (push dir result))) + (setq result (nreverse result) + mm-image-load-path-cache (cons load-path result)) + result))) ;; Fixme: This doesn't look useful where it's used. (if (fboundp 'detect-coding-region) @@ -1298,6 +1473,8 @@ If SUFFIX is non-nil, add that at the end of the file name." (if (eq (point) end) 'ascii (mm-guess-charset)) (goto-char point))))) +(declare-function mm-detect-coding-region "mm-util" (start end)) + (if (fboundp 'coding-system-get) (defun mm-detect-mime-charset-region (start end) "Detect MIME charset of the text in the region between START and END." @@ -1372,14 +1549,13 @@ decompressed data. The buffer's multibyteness must be turned off." prog t (list t err-file) nil args) jka-compr-acceptable-retval-list) (erase-buffer) - (insert (mapconcat - 'identity - (delete "" (split-string - (prog2 - (insert-file-contents err-file) - (buffer-string) - (erase-buffer)))) - " ") + (insert (mapconcat 'identity + (split-string + (prog2 + (insert-file-contents err-file) + (buffer-string) + (erase-buffer)) t) + " ") "\n") (setq err-msg (format "Error while executing \"%s %s < %s\"" @@ -1389,7 +1565,7 @@ decompressed data. The buffer's multibyteness must be turned off." (error (setq err-msg (error-message-string err))))) (when (file-exists-p err-file) - (ignore-errors (jka-compr-delete-temp-file err-file))) + (ignore-errors (delete-file err-file))) (when inplace (unless err-msg (delete-region (point-min) (point-max)) @@ -1422,12 +1598,12 @@ gzip, bzip2, etc. are allowed." filename)) (mm-decompress-buffer filename nil t)))) (when decomp - (set-buffer (let (default-enable-multibyte-characters) - (generate-new-buffer " *temp*"))) + (set-buffer (generate-new-buffer " *temp*")) + (mm-disable-multibyte) (insert decomp) (setq filename (file-name-sans-extension filename))) (goto-char (point-min)) - (prog1 + (unwind-protect (cond ((boundp 'set-auto-coding-function) ;; Emacs (if filename @@ -1440,7 +1616,7 @@ gzip, bzip2, etc. are allowed." (funcall (symbol-value 'set-auto-coding-function) nil (- (point-max) (point-min))) (error nil))))) - ((featurep 'file-coding) ;; XEmacs + ((and (featurep 'xemacs) (featurep 'file-coding)) ;; XEmacs (let ((case-fold-search t) (end (point-at-eol)) codesys start) @@ -1493,5 +1669,4 @@ gzip, bzip2, etc. are allowed." (provide 'mm-util) -;; arch-tag: 94dc5388-825d-4fd1-bfa5-2100aa351238 ;;; mm-util.el ends here