1 ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages
2 ;; Copyright (C) 1998, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
4 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
5 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
6 ;; This file is part of GNU Emacs.
8 ;; GNU Emacs is free software; you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; GNU Emacs is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GNU Emacs; see the file COPYING. If not, write to the
20 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 ;; Boston, MA 02111-1307, USA.
25 ;; RFC 2047 is "MIME (Multipurpose Internet Mail Extensions) Part
26 ;; Three: Message Header Extensions for Non-ASCII Text".
32 (defvar message-posting-charset))
36 ;; Fixme: Avoid this (used for mail-parse-charset) mm dependence on gnus.
39 (autoload 'mm-body-7-or-8 "mm-bodies")
41 (defvar rfc2047-header-encoding-alist
42 '(("Newsgroups" . nil)
45 ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\
46 \\|Mail-Followup-To\\|Mail-Copies-To\\|Approved\\)" . address-mime)
48 "*Header/encoding method alist.
49 The list is traversed sequentially. The keys can either be
54 1) nil, in which case no encoding is done;
55 2) `mime', in which case the header will be encoded according to RFC2047;
56 3) `address-mime', like `mime', but takes account of the rules for address
57 fields (where quoted strings and comments must be treated separately);
58 4) a charset, in which case it will be encoded as that charset;
59 5) `default', in which case the field will be encoded as the rest
62 (defvar rfc2047-charset-encoding-alist
86 "Alist of MIME charsets to RFC2047 encodings.
87 Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding,
88 quoted-printable and base64 respectively.")
90 (defvar rfc2047-encoding-function-alist
91 '((Q . rfc2047-q-encode-region)
92 (B . rfc2047-b-encode-region)
94 "Alist of RFC2047 encodings to encoding functions.")
97 ;;; Functions for encoding RFC2047 messages
100 (defun rfc2047-narrow-to-field ()
101 "Narrow the buffer to the header on the current line."
107 (if (re-search-forward "^[^ \n\t]" nil t)
110 (goto-char (point-min)))
112 (defun rfc2047-field-value ()
113 "Return the value of the field at point."
116 (rfc2047-narrow-to-field)
117 (re-search-forward ":[ \t\n]*" nil t)
118 (buffer-substring (point) (point-max)))))
120 (defvar rfc2047-encoding-type 'address-mime
121 "The type of encoding done by `rfc2047-encode-region'.
122 This should be dynamically bound around calls to
123 `rfc2047-encode-region' to either `mime' or `address-mime'. See
124 `rfc2047-header-encoding-alist', for definitions.")
126 (defun rfc2047-encode-message-header ()
127 "Encode the message header according to `rfc2047-header-encoding-alist'.
128 Should be called narrowed to the head of the message."
131 (goto-char (point-min))
132 (let (alist elem method)
135 (rfc2047-narrow-to-field)
136 (if (not (rfc2047-encodable-p))
138 (if (and (eq (mm-body-7-or-8) '8bit)
141 (car message-posting-charset)))
142 ;; 8 bit must be decoded.
143 (mm-encode-coding-region
144 (point-min) (point-max)
145 (mm-charset-to-coding-system
146 (car message-posting-charset))))
147 ;; No encoding necessary, but folding is nice
150 (goto-char (point-min))
151 (skip-chars-forward "^:")
152 (when (looking-at ": ")
156 ;; We found something that may perhaps be encoded.
158 alist rfc2047-header-encoding-alist)
159 (while (setq elem (pop alist))
160 (when (or (and (stringp (car elem))
161 (looking-at (car elem)))
165 (goto-char (point-min))
166 (re-search-forward "^[^:]+: *" nil t)
168 ((eq method 'address-mime)
169 (rfc2047-encode-region (point) (point-max)))
171 (let ((rfc2047-encoding-type 'mime))
172 (rfc2047-encode-region (point) (point-max))))
173 ((eq method 'default)
174 (if (and (featurep 'mule)
175 (if (boundp 'default-enable-multibyte-characters)
176 default-enable-multibyte-characters)
178 (mm-encode-coding-region (point) (point-max)
179 mail-parse-charset)))
180 ;; We get this when CC'ing messsages to newsgroups with
181 ;; 8-bit names. The group name mail copy just got
182 ;; unconditionally encoded. Previously, it would ask
183 ;; whether to encode, which was quite confusing for the
184 ;; user. If the new behaviour is wrong, tell me. I have
185 ;; left the old code commented out below.
186 ;; -- Per Abrahamsen <abraham@dina.kvl.dk> Date: 2001-10-07.
187 ;; Modified by Dave Love, with the commented-out code changed
188 ;; in accordance with changes elsewhere.
190 (rfc2047-encode-region (point) (point-max)))
192 ;;; (if (or (message-options-get
193 ;;; 'rfc2047-encode-message-header-encode-any)
194 ;;; (message-options-set
195 ;;; 'rfc2047-encode-message-header-encode-any
197 ;;; "Some texts are not encoded. Encode anyway?")))
198 ;;; (rfc2047-encode-region (point-min) (point-max))
199 ;;; (error "Cannot send unencoded text")))
200 ((mm-coding-system-p method)
201 (if (and (featurep 'mule)
202 (if (boundp 'default-enable-multibyte-characters)
203 default-enable-multibyte-characters))
204 (mm-encode-coding-region (point) (point-max) method)))
207 (goto-char (point-max)))))))
209 ;; Fixme: This, and the require below may not be the Right Thing, but
210 ;; should be safe just before release. -- fx 2001-02-08
211 (eval-when-compile (defvar message-posting-charset))
213 (defun rfc2047-encodable-p ()
214 "Return non-nil if any characters in current buffer need encoding in headers.
215 The buffer may be narrowed."
216 (require 'message) ; for message-posting-charset
218 (mm-find-mime-charset-region (point-min) (point-max))))
220 (not (equal charsets (list (car message-posting-charset)))))))
222 ;; Use this syntax table when parsing into regions that may need
223 ;; encoding. Double quotes are string delimiters, backslash is
224 ;; character quoting, and all other RFC 2822 special characters are
225 ;; treated as punctuation so we can use forward-sexp/forward-word to
226 ;; skip to the end of regions appropriately. Nb. ietf-drums does
227 ;; things differently.
228 (defconst rfc2047-syntax-table
229 ;; (make-char-table 'syntax-table '(2)) only works in Emacs.
230 (let ((table (make-syntax-table)))
231 ;; The following is done to work for setting all elements of the table
232 ;; in Emacs 21 and 22 and XEmacs; it appears to be the cleanest way.
233 ;; Play safe and don't assume the form of the word syntax entry --
235 (if (fboundp 'set-char-table-range) ; Emacs
236 (funcall (intern "set-char-table-range")
237 table t (aref (standard-syntax-table) ?a))
238 (if (fboundp 'put-char-table)
239 (if (fboundp 'get-char-table) ; warning avoidance
240 (put-char-table t (get-char-table ?a (standard-syntax-table))
242 (modify-syntax-entry ?\\ "\\" table)
243 (modify-syntax-entry ?\" "\"" table)
244 (modify-syntax-entry ?\( "." table)
245 (modify-syntax-entry ?\) "." table)
246 (modify-syntax-entry ?\< "." table)
247 (modify-syntax-entry ?\> "." table)
248 (modify-syntax-entry ?\[ "." table)
249 (modify-syntax-entry ?\] "." table)
250 (modify-syntax-entry ?: "." table)
251 (modify-syntax-entry ?\; "." table)
252 (modify-syntax-entry ?, "." table)
253 (modify-syntax-entry ?@ "." table)
256 (defun rfc2047-encode-region (b e)
257 "Encode words in region B to E that need encoding.
258 By default, the region is treated as containing RFC2822 addresses.
259 Dynamically bind `rfc2047-encoding-type' to change that."
261 (narrow-to-region b e)
262 (if (eq 'mime rfc2047-encoding-type)
263 ;; Simple case. Treat as single word after any initial ASCII
264 ;; part and before any tailing ASCII part. The leading ASCII
265 ;; is relevant for instance in Subject headers with `Re:' for
266 ;; interoperability with non-MIME clients, and we might as
267 ;; well avoid the tail too.
269 (goto-char (point-min))
270 ;; Does it need encoding?
271 (skip-chars-forward "\000-\177")
273 (skip-chars-backward "^ \n") ; beginning of space-delimited word
274 (rfc2047-encode (point) (progn
276 (skip-chars-backward "\000-\177")
277 (skip-chars-forward "^ \n")
278 ;; end of space-delimited word
280 ;; `address-mime' case -- take care of quoted words, comments.
281 (with-syntax-table rfc2047-syntax-table
282 (let ((start) ; start of current token
283 end ; end of current token
284 ;; Whether there's an encoded word before the current
285 ;; token, either immediately or separated by space.
287 (goto-char (point-min))
288 (condition-case nil ; in case of unbalanced quotes
289 ;; Look for rfc2822-style: sequences of atoms, quoted
290 ;; strings, specials, whitespace. (Specials mustn't be
295 (unless (= 0 (skip-chars-forward " \t\n"))
296 (setq start (point)))
298 ((not (char-after))) ; eob
300 ((eq ?\" (char-syntax (char-after)))
304 ;; Does it need encoding?
306 (skip-chars-forward "\000-\177" end)
308 (setq last-encoded nil)
309 ;; It needs encoding. Strip the quotes first,
310 ;; since encoded words can't occur in quotes.
312 (delete-backward-char 1)
316 ;; There was a preceding quoted word. We need
317 ;; to include any separating whitespace in this
318 ;; word to avoid it getting lost.
319 (skip-chars-backward " \t")
320 ;; A space is needed between the encoded words.
324 ;; Adjust the end position for the deleted quotes.
325 (rfc2047-encode start (- end 2))
326 (setq last-encoded t))) ; record that it was encoded
327 ((eq ?. (char-syntax (char-after)))
328 ;; Skip other delimiters, but record that they've
329 ;; potentially separated quoted words.
331 (setq last-encoded nil))
332 (t ; normal token/whitespace sequence
335 (skip-chars-backward " \t")
337 ;; Deal with encoding and leading space as for
340 (skip-chars-forward "\000-\177" end)
342 (setq last-encoded nil)
345 (skip-chars-backward " \t")
349 (rfc2047-encode start end)
350 (setq last-encoded t)))))
352 (error "Invalid data for rfc2047 encoding: %s"
353 (buffer-substring b e)))))))
354 (rfc2047-fold-region b (point))))
356 (defun rfc2047-encode-string (string)
357 "Encode words in STRING.
358 By default, the string is treated as containing addresses (see
359 `rfc2047-encoding-type')."
362 (rfc2047-encode-region (point-min) (point-max))
365 (defun rfc2047-encode (b e)
366 "Encode the word(s) in the region B to E.
367 By default, the region is treated as containing addresses (see
368 `rfc2047-encoding-type')."
369 (let* ((mime-charset (mm-find-mime-charset-region b e))
370 (cs (if (> (length mime-charset) 1)
371 ;; Fixme: Instead of this, try to break region into
372 ;; parts that can be encoded separately.
373 (error "Can't rfc2047-encode `%s'"
374 (buffer-substring b e))
375 (setq mime-charset (car mime-charset))
376 (mm-charset-to-coding-system mime-charset)))
377 ;; Fixme: Better, calculate the number of non-ASCII