-;;; rfc2047.el --- Functions for encoding and decoding rfc2047 messages
-;; Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages
+;; Copyright (C) 1998, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
;;; Code:
-(eval-when-compile (require 'cl))
+(eval-when-compile
+ (require 'cl)
+ (defvar message-posting-charset)
+ (unless (fboundp 'with-syntax-table) ; not in Emacs 20
+ (defmacro with-syntax-table (table &rest body)
+ "Evaluate BODY with syntax table of current buffer set to TABLE.
+The syntax table of the current buffer is saved, BODY is evaluated, and the
+saved table is restored, even in case of an abnormal exit.
+Value is what BODY returns."
+ (let ((old-table (make-symbol "table"))
+ (old-buffer (make-symbol "buffer")))
+ `(let ((,old-table (syntax-table))
+ (,old-buffer (current-buffer)))
+ (unwind-protect
+ (progn
+ (set-syntax-table ,table)
+ ,@body)
+ (save-current-buffer
+ (set-buffer ,old-buffer)
+ (set-syntax-table ,old-table))))))))
(require 'qp)
(require 'mm-util)
-(require 'ietf-drums)
+;; Fixme: Avoid this (used for mail-parse-charset) mm dependence on gnus.
(require 'mail-prsvr)
(require 'base64)
-;; Fixme: Avoid this (for gnus-point-at-...) mm dependence on gnus.
-(require 'gnus-util)
(autoload 'mm-body-7-or-8 "mm-bodies")
+(eval-and-compile
+ ;; Avoid gnus-util for mm- code.
+ (defalias 'rfc2047-point-at-bol
+ (if (fboundp 'point-at-bol)
+ 'point-at-bol
+ 'line-beginning-position))
+
+ (defalias 'rfc2047-point-at-eol
+ (if (fboundp 'point-at-eol)
+ 'point-at-eol
+ 'line-end-position)))
+
(defvar rfc2047-header-encoding-alist
'(("Newsgroups" . nil)
+ ("Followup-To" . nil)
("Message-ID" . nil)
- ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\)" .
- "-A-Za-z0-9!*+/=_")
+ ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\
+\\|Mail-Followup-To\\|Mail-Copies-To\\|Approved\\)" . address-mime)
(t . mime))
"*Header/encoding method alist.
The list is traversed sequentially. The keys can either be
1) nil, in which case no encoding is done;
2) `mime', in which case the header will be encoded according to RFC2047;
-3) a charset, in which case it will be encoded as that charset;
-4) `default', in which case the field will be encoded as the rest
- of the article.
-5) a string, like `mime', expect for using it as word-chars.")
+3) `address-mime', like `mime', but takes account of the rules for address
+ fields (where quoted strings and comments must be treated separately);
+4) a charset, in which case it will be encoded as that charset;
+5) `default', in which case the field will be encoded as the rest
+ of the article.")
(defvar rfc2047-charset-encoding-alist
'((us-ascii . nil)
(iso-8859-4 . Q)
(iso-8859-5 . B)
(koi8-r . B)
- (iso-8859-7 . Q)
- (iso-8859-8 . Q)
+ (iso-8859-7 . B)
+ (iso-8859-8 . B)
(iso-8859-9 . Q)
(iso-8859-14 . Q)
(iso-8859-15 . Q)
(cn-gb-2312 . B)
(euc-kr . B)
(iso-2022-jp-2 . B)
- (iso-2022-int-1 . B))
+ (iso-2022-int-1 . B)
+ (viscii . Q))
"Alist of MIME charsets to RFC2047 encodings.
-Valid encodings are nil, `Q' and `B'.")
+Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding,
+quoted-printable and base64 respectively.")
(defvar rfc2047-encoding-function-alist
'((Q . rfc2047-q-encode-region)
(nil . ignore))
"Alist of RFC2047 encodings to encoding functions.")
-(defvar rfc2047-q-encoding-alist
- '(("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\):"
- . "-A-Za-z0-9!*+/" )
- ;; = (\075), _ (\137), ? (\077) are used in the encoded word.
- ;; Avoid using 8bit characters.
- ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?"
- ("." . "\010\012\014\040-\074\076\100-\136\140-\177"))
- "Alist of header regexps and valid Q characters.")
-
;;;
;;; Functions for encoding RFC2047 messages
;;;
(progn
(forward-line 1)
(if (re-search-forward "^[^ \n\t]" nil t)
- (progn
- (beginning-of-line)
- (point))
+ (rfc2047-point-at-bol)
(point-max))))
(goto-char (point-min)))
+(defun rfc2047-field-value ()
+ "Return the value of the field at point."
+ (save-excursion
+ (save-restriction
+ (rfc2047-narrow-to-field)
+ (re-search-forward ":[ \t\n]*" nil t)
+ (buffer-substring (point) (point-max)))))
+
+(defvar rfc2047-encoding-type 'address-mime
+ "The type of encoding done by `rfc2047-encode-region'.
+This should be dynamically bound around calls to
+`rfc2047-encode-region' to either `mime' or `address-mime'. See
+`rfc2047-header-encoding-alist', for definitions.")
+
(defun rfc2047-encode-message-header ()
"Encode the message header according to `rfc2047-header-encoding-alist'.
Should be called narrowed to the head of the message."
(mm-coding-system-p
(car message-posting-charset)))
;; 8 bit must be decoded.
- ;; Is message-posting-charset a coding system?
(mm-encode-coding-region
(point-min) (point-max)
- (car message-posting-charset))
- nil)
+ (mm-charset-to-coding-system
+ (car message-posting-charset))))
;; No encoding necessary, but folding is nice
(rfc2047-fold-region
(save-excursion
(eq (car elem) t))
(setq alist nil
method (cdr elem))))
+ (goto-char (point-min))
+ (re-search-forward "^[^:]+: *" nil t)
(cond
- ((stringp method)
- (rfc2047-encode-region (point-min) (point-max) method))
+ ((eq method 'address-mime)
+ (rfc2047-encode-region (point) (point-max)))
((eq method 'mime)
- (rfc2047-encode-region (point-min) (point-max)))
+ (let ((rfc2047-encoding-type 'mime))
+ (rfc2047-encode-region (point) (point-max))))
((eq method 'default)
(if (and (featurep 'mule)
(if (boundp 'default-enable-multibyte-characters)
default-enable-multibyte-characters)
mail-parse-charset)
- (mm-encode-coding-region (point-min) (point-max)
+ (mm-encode-coding-region (point) (point-max)
mail-parse-charset)))
;; We get this when CC'ing messsages to newsgroups with
- ;; 8-bit names. The group name mail copy just get
+ ;; 8-bit names. The group name mail copy just got
;; unconditionally encoded. Previously, it would ask
;; whether to encode, which was quite confusing for the
;; user. If the new behaviour is wrong, tell me. I have
;; left the old code commented out below.
;; -- Per Abrahamsen <abraham@dina.kvl.dk> Date: 2001-10-07.
+ ;; Modified by Dave Love, with the commented-out code changed
+ ;; in accordance with changes elsewhere.
((null method)
- (when (delq 'ascii
- (mm-find-charset-region (point-min) (point-max)))
- (rfc2047-encode-region (point-min) (point-max))))
-;;; ((null method)
-;;; (and (delq 'ascii
-;;; (mm-find-charset-region (point-min)
-;;; (point-max)))
-;;; (if (or (message-options-get
-;;; 'rfc2047-encode-message-header-encode-any)
-;;; (message-options-set
-;;; 'rfc2047-encode-message-header-encode-any
-;;; (y-or-n-p
-;;; "Some texts are not encoded. Encode anyway?")))
-;;; (rfc2047-encode-region (point-min) (point-max))
-;;; (error "Cannot send unencoded text"))))
+ (rfc2047-encode-region (point) (point-max)))
+;;; ((null method)
+;;; (if (or (message-options-get
+;;; 'rfc2047-encode-message-header-encode-any)
+;;; (message-options-set
+;;; 'rfc2047-encode-message-header-encode-any
+;;; (y-or-n-p
+;;; "Some texts are not encoded. Encode anyway?")))
+;;; (rfc2047-encode-region (point-min) (point-max))
+;;; (error "Cannot send unencoded text")))
((mm-coding-system-p method)
(if (and (featurep 'mule)
(if (boundp 'default-enable-multibyte-characters)
default-enable-multibyte-characters))
- (mm-encode-coding-region (point-min) (point-max) method)))
+ (mm-encode-coding-region (point) (point-max) method)))
;; Hm.
(t)))
(goto-char (point-max)))))))
The buffer may be narrowed."
(require 'message) ; for message-posting-charset
(let ((charsets
- (mapcar
- 'mm-mime-charset
- (mm-find-charset-region (point-min) (point-max))))
- (cs (list 'us-ascii (car message-posting-charset)))
- found)
- (while charsets
- (unless (memq (pop charsets) cs)
- (setq found t)))
- found))
-
-(defun rfc2047-dissect-region (b e &optional word-chars)
- "Dissect the region between B and E into words."
- (unless word-chars
- ;; Anything except most CTLs, WSP
- (setq word-chars "\010\012\014\041-\177"))
- (let (mail-parse-mule-charset
- words point current
- result word)
- (save-restriction
- (narrow-to-region b e)
- (goto-char (point-min))
- (skip-chars-forward "\000-\177")
- (while (not (eobp))
- (setq point (point))
- (skip-chars-backward word-chars b)
- (unless (eq b (point))
- (push (cons (buffer-substring b (point)) nil) words))
- (setq b (point))
- (goto-char point)
- (setq current (mm-charset-after))
- (forward-char 1)
- (skip-chars-forward word-chars)
- (while (and (not (eobp))
- (eq (mm-charset-after) current))
- (forward-char 1)
- (skip-chars-forward word-chars))
- (unless (eq b (point))
- (push (cons (buffer-substring b (point)) current) words))
- (setq b (point))
- (skip-chars-forward "\000-\177"))
- (unless (eq b (point))
- (push (cons (buffer-substring b (point)) nil) words)))
- ;; merge adjacent words
- (setq word (pop words))
- (while word
- (if (and (cdr word)
- (caar words)
- (not (cdar words))
- (not (string-match "[^ \t]" (caar words))))
- (if (eq (cdr (nth 1 words)) (cdr word))
- (progn
- (setq word (cons (concat
- (car (nth 1 words)) (caar words)
- (car word))
- (cdr word)))
- (pop words)
- (pop words))
- (push (cons (concat (caar words) (car word)) (cdr word))
- result)
- (pop words)
- (setq word (pop words)))
- (push word result)
- (setq word (pop words))))
- result))
-
-(defun rfc2047-encode-region (b e &optional word-chars)
- "Encode all encodable words in region B to E."
- (let ((words (rfc2047-dissect-region b e word-chars)) word)
- (save-restriction
- (narrow-to-region b e)
- (delete-region (point-min) (point-max))
- (while (setq word (pop words))
- (if (not (cdr word))
- (insert (car word))
- (rfc2047-fold-region (gnus-point-at-bol) (point))
- (goto-char (point-max))
- (if (> (- (point) (save-restriction
- (widen)
- (gnus-point-at-bol))) 76)
- (insert "\n "))
- ;; Insert blank between encoded words
- (if (eq (char-before) ?=) (insert " "))
- (rfc2047-encode (point)
- (progn (insert (car word)) (point))
- (cdr word))))
- (rfc2047-fold-region (point-min) (point-max)))))
-
-(defun rfc2047-encode-string (string &optional word-chars)
- "Encode words in STRING."
+ (mm-find-mime-charset-region (point-min) (point-max))))
+ (and charsets
+ (not (equal charsets (list (car message-posting-charset)))))))
+
+;; Use this syntax table when parsing into regions that may need
+;; encoding. Double quotes are string delimiters, backslash is
+;; character quoting, and all other RFC 2822 special characters are
+;; treated as punctuation so we can use forward-sexp/forward-word to
+;; skip to the end of regions appropriately. Nb. ietf-drums does
+;; things differently.
+(defconst rfc2047-syntax-table
+ ;; (make-char-table 'syntax-table '(2)) only works in Emacs.
+ (let ((table (make-syntax-table)))
+ ;; The following is done to work for setting all elements of the table
+ ;; in Emacs 21 and 22 and XEmacs; it appears to be the cleanest way.
+ ;; Play safe and don't assume the form of the word syntax entry --
+ ;; copy it from ?a.
+ (if (fboundp 'set-char-table-range) ; Emacs
+ (funcall (intern "set-char-table-range")
+ table t (aref (standard-syntax-table) ?a))
+ (if (fboundp 'put-char-table)
+ (if (fboundp 'get-char-table) ; warning avoidance
+ (put-char-table t (get-char-table ?a (standard-syntax-table))
+ table))))
+ (modify-syntax-entry ?\\ "\\" table)
+ (modify-syntax-entry ?\" "\"" table)
+ (modify-syntax-entry ?\( "." table)
+ (modify-syntax-entry ?\) "." table)
+ (modify-syntax-entry ?\< "." table)
+ (modify-syntax-entry ?\> "." table)
+ (modify-syntax-entry ?\[ "." table)
+ (modify-syntax-entry ?\] "." table)
+ (modify-syntax-entry ?: "." table)
+ (modify-syntax-entry ?\; "." table)
+ (modify-syntax-entry ?, "." table)
+ (modify-syntax-entry ?@ "." table)
+ table))
+
+(defun rfc2047-encode-region (b e)
+ "Encode words in region B to E that need encoding.
+By default, the region is treated as containing RFC2822 addresses.
+Dynamically bind `rfc2047-encoding-type' to change that."
+ (save-restriction
+ (narrow-to-region b e)
+ (if (eq 'mime rfc2047-encoding-type)
+ ;; Simple case. Treat as single word after any initial ASCII
+ ;; part and before any tailing ASCII part. The leading ASCII
+ ;; is relevant for instance in Subject headers with `Re:' for
+ ;; interoperability with non-MIME clients, and we might as
+ ;; well avoid the tail too.
+ (progn
+ (goto-char (point-min))
+ ;; Does it need encoding?
+ (skip-chars-forward "\000-\177")
+ (unless (eobp)
+ (skip-chars-backward "^ \n") ; beginning of space-delimited word
+ (rfc2047-encode (point) (progn
+ (goto-char e)
+ (skip-chars-backward "\000-\177")
+ (skip-chars-forward "^ \n")
+ ;; end of space-delimited word
+ (point)))))
+ ;; `address-mime' case -- take care of quoted words, comments.
+ (with-syntax-table rfc2047-syntax-table
+ (let ((start) ; start of current token
+ end ; end of current token
+ ;; Whether there's an encoded word before the current
+ ;; token, either immediately or separated by space.
+ last-encoded)
+ (goto-char (point-min))
+ (condition-case nil ; in case of unbalanced quotes
+ ;; Look for rfc2822-style: sequences of atoms, quoted
+ ;; strings, specials, whitespace. (Specials mustn't be
+ ;; encoded.)
+ (while (not (eobp))
+ (setq start (point))
+ ;; Skip whitespace.
+ (unless (= 0 (skip-chars-forward " \t\n"))
+ (setq start (point)))
+ (cond
+ ((not (char-after))) ; eob
+ ;; else token start
+ ((eq ?\" (char-syntax (char-after)))
+ ;; Quoted word.
+ (forward-sexp)
+ (setq end (point))
+ ;; Does it need encoding?
+ (goto-char start)
+ (skip-chars-forward "\000-\177" end)
+ (if (= end (point))
+ (setq last-encoded nil)
+ ;; It needs encoding. Strip the quotes first,
+ ;; since encoded words can't occur in quotes.
+ (goto-char end)
+ (delete-backward-char 1)
+ (goto-char start)
+ (delete-char 1)
+ (when last-encoded
+ ;; There was a preceding quoted word. We need
+ ;; to include any separating whitespace in this
+ ;; word to avoid it getting lost.
+ (skip-chars-backward " \t")
+ ;; A space is needed between the encoded words.
+ (insert ? )
+ (setq start (point)
+ end (1+ end)))
+ ;; Adjust the end position for the deleted quotes.
+ (rfc2047-encode start (- end 2))
+