1 ;;; rfc2047.el --- Functions for encoding and decoding rfc2047 messages
2 ;; Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
4 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
5 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
6 ;; This file is part of GNU Emacs.
8 ;; GNU Emacs is free software; you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; GNU Emacs is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GNU Emacs; see the file COPYING. If not, write to the
20 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 ;; Boston, MA 02111-1307, USA.
25 ;; RFC 2047 is "MIME (Multipurpose Internet Mail Extensions) Part
26 ;; Three: Message Header Extensions for Non-ASCII Text".
32 (defvar message-posting-charset))
39 ;; Fixme: Avoid this (for gnus-point-at-...) mm dependence on gnus.
41 (autoload 'mm-body-7-or-8 "mm-bodies")
43 (defvar rfc2047-header-encoding-alist
44 '(("Newsgroups" . nil)
47 ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\)" .
50 "*Header/encoding method alist.
51 The list is traversed sequentially. The keys can either be
56 1) nil, in which case no encoding is done;
57 2) `mime', in which case the header will be encoded according to RFC2047;
58 3) a charset, in which case it will be encoded as that charset;
59 4) `default', in which case the field will be encoded as the rest
61 5) a string, like `mime', expect for using it as word-chars.")
63 (defvar rfc2047-charset-encoding-alist
86 "Alist of MIME charsets to RFC2047 encodings.
87 Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding,
88 quoted-printable and base64 respectively.")
90 (defvar rfc2047-encoding-function-alist
91 '((Q . rfc2047-q-encode-region)
92 (B . rfc2047-b-encode-region)
94 "Alist of RFC2047 encodings to encoding functions.")
96 (defvar rfc2047-q-encoding-alist
97 '(("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\):"
99 ;; = (\075), _ (\137), ? (\077) are used in the encoded word.
100 ;; Avoid using 8bit characters.
101 ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?"
102 ("." . "\010\012\014\040-\074\076\100-\136\140-\177"))
103 "Alist of header regexps and valid Q characters.")
106 ;;; Functions for encoding RFC2047 messages
109 (defun rfc2047-narrow-to-field ()
110 "Narrow the buffer to the header on the current line."
116 (if (re-search-forward "^[^ \n\t]" nil t)
121 (goto-char (point-min)))
123 (defun rfc2047-field-value ()
124 "Return the value of the field at point."
127 (rfc2047-narrow-to-field)
128 (re-search-forward ":[ \t\n]*" nil t)
129 (buffer-substring (point) (point-max)))))
131 (defun rfc2047-encode-message-header ()
132 "Encode the message header according to `rfc2047-header-encoding-alist'.
133 Should be called narrowed to the head of the message."
136 (goto-char (point-min))
137 (let (alist elem method)
140 (rfc2047-narrow-to-field)
141 (if (not (rfc2047-encodable-p))
143 (if (and (eq (mm-body-7-or-8) '8bit)
146 (car message-posting-charset)))
147 ;; 8 bit must be decoded.
148 ;; Is message-posting-charset a coding system?
149 (mm-encode-coding-region
150 (point-min) (point-max)
151 (car message-posting-charset))
153 ;; No encoding necessary, but folding is nice
156 (goto-char (point-min))
157 (skip-chars-forward "^:")
158 (when (looking-at ": ")
162 ;; We found something that may perhaps be encoded.
164 alist rfc2047-header-encoding-alist)
165 (while (setq elem (pop alist))
166 (when (or (and (stringp (car elem))
167 (looking-at (car elem)))
173 (rfc2047-encode-region (point-min) (point-max) method))
175 (rfc2047-encode-region (point-min) (point-max)))
176 ((eq method 'default)
177 (if (and (featurep 'mule)
178 (if (boundp 'default-enable-multibyte-characters)
179 default-enable-multibyte-characters)
181 (mm-encode-coding-region (point-min) (point-max)
182 mail-parse-charset)))
183 ;; We get this when CC'ing messsages to newsgroups with
184 ;; 8-bit names. The group name mail copy just get
185 ;; unconditionally encoded. Previously, it would ask
186 ;; whether to encode, which was quite confusing for the
187 ;; user. If the new behaviour is wrong, tell me. I have
188 ;; left the old code commented out below.
189 ;; -- Per Abrahamsen <abraham@dina.kvl.dk> Date: 2001-10-07.
192 (mm-find-charset-region (point-min) (point-max)))
193 (rfc2047-encode-region (point-min) (point-max))))
195 ;;; (and (delq 'ascii
196 ;;; (mm-find-charset-region (point-min)
198 ;;; (if (or (message-options-get
199 ;;; 'rfc2047-encode-message-header-encode-any)
200 ;;; (message-options-set
201 ;;; 'rfc2047-encode-message-header-encode-any
203 ;;; "Some texts are not encoded. Encode anyway?")))
204 ;;; (rfc2047-encode-region (point-min) (point-max))
205 ;;; (error "Cannot send unencoded text"))))
206 ((mm-coding-system-p method)
207 (if (and (featurep 'mule)
208 (if (boundp 'default-enable-multibyte-characters)
209 default-enable-multibyte-characters))
210 (mm-encode-coding-region (point-min) (point-max) method)))
213 (goto-char (point-max)))))))
215 ;; Fixme: This, and the require below may not be the Right Thing, but
216 ;; should be safe just before release. -- fx 2001-02-08
217 (eval-when-compile (defvar message-posting-charset))
219 (defun rfc2047-encodable-p ()
220 "Return non-nil if any characters in current buffer need encoding in headers.
221 The buffer may be narrowed."
222 (require 'message) ; for message-posting-charset
226 (mm-find-charset-region (point-min) (point-max))))
227 (cs (list 'us-ascii (car message-posting-charset)))
230 (unless (memq (pop charsets) cs)
234 (defun rfc2047-dissect-region (b e &optional word-chars)
235 "Dissect the region between B and E into words."
237 ;; Anything except most CTLs, WSP
238 (setq word-chars "\010\012\014\041-\177"))
239 (let (mail-parse-mule-charset
243 (narrow-to-region b e)
244 (goto-char (point-min))
245 (skip-chars-forward "\000-\177")
248 (skip-chars-backward word-chars b)
249 (unless (eq b (point))
250 (push (cons (buffer-substring b (point)) nil) words))
253 (setq current (mm-charset-after))
255 (skip-chars-forward word-chars)
256 (while (and (not (eobp))