1 ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages
3 ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4 ;; 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
6 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
7 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25 ;; RFC 2047 is "MIME (Multipurpose Internet Mail Extensions) Part
26 ;; Three: Message Header Extensions for Non-ASCII Text".
32 (defvar message-posting-charset)
37 ;; Fixme: Avoid this (used for mail-parse-charset) mm dependence on gnus.
39 (require 'rfc2045) ;; rfc2045-encode-string
40 (autoload 'mm-body-7-or-8 "mm-bodies")
42 (defvar rfc2047-header-encoding-alist
43 '(("Newsgroups" . nil)
46 ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|\\(In-\\)?Reply-To\\|Sender\
47 \\|Mail-Followup-To\\|Mail-Copies-To\\|Approved\\)" . address-mime)
49 "*Header/encoding method alist.
50 The list is traversed sequentially. The keys can either be
55 1) nil, in which case no encoding is done;
56 2) `mime', in which case the header will be encoded according to RFC2047;
57 3) `address-mime', like `mime', but takes account of the rules for address
58 fields (where quoted strings and comments must be treated separately);
59 4) a charset, in which case it will be encoded as that charset;
60 5) `default', in which case the field will be encoded as the rest
63 (defvar rfc2047-charset-encoding-alist
89 "Alist of MIME charsets to RFC2047 encodings.
90 Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding,
91 quoted-printable and base64 respectively.")
93 (defvar rfc2047-encode-function-alist
94 '((Q . rfc2047-q-encode-string)
95 (B . rfc2047-b-encode-string)
97 "Alist of RFC2047 encodings to encoding functions.")
99 (defvar rfc2047-encode-encoded-words t
100 "Whether encoded words should be encoded again.")
102 (defvar rfc2047-allow-irregular-q-encoded-words t
103 "*Whether to decode irregular Q-encoded words.")
105 (eval-and-compile ;; Necessary to hard code them in `rfc2047-decode-region'.
106 (defconst rfc2047-encoded-word-regexp
107 "=\\?\\([^][\000-\040()<>@,\;:*\\\"/?.=]+\\)\\(?:\\*[^?]+\\)?\\?\
108 \\(B\\?[+/0-9A-Za-z]*=*\
111 "Regexp that matches encoded word."
112 ;; The patterns for the B encoding and the Q encoding, i.e. the ones
113 ;; beginning with "B" and "Q" respectively, are restricted into only
114 ;; the characters that those encodings may generally use.
116 (defconst rfc2047-encoded-word-regexp-loose
117 "=\\?\\([^][\000-\040()<>@,\;:*\\\"/?.=]+\\)\\(?:\\*[^?]+\\)?\\?\
118 \\(B\\?[+/0-9A-Za-z]*=*\
119 \\|Q\\?\\(?:\\?+[ -<>@-~]\\)?\\(?:[ ->@-~]+\\?+[ -<>@-~]\\)*[ ->@-~]*\\?*\
121 "Regexp that matches encoded word allowing loose Q encoding."
122 ;; The pattern for the Q encoding, i.e. the one beginning with "Q",
124 ;; "Q\\?\\(\\?+[^\n=?]\\)?\\([^\n?]+\\?+[^\n=?]\\)*[^\n?]*\\?*"
125 ;; <--------1-------><----------2,3----------><--4--><-5->
127 ;; 1. After "Q?", allow "?"s that follow a character other than "=".
128 ;; 2. Allow "=" after "Q?"; it isn't regarded as the terminator.
129 ;; 3. In the middle of an encoded word, allow "?"s that follow a
130 ;; character other than "=".
131 ;; 4. Allow any characters other than "?" in the middle of an
133 ;; 5. At the end, allow "?"s.
137 ;;; Functions for encoding RFC2047 messages
140 (defun rfc2047-qp-or-base64 ()
141 "Return the type with which to encode the buffer.
142 This is either `base64' or `quoted-printable'."
144 (let ((limit (min (point-max) (+ 2000 (point-min))))
146 (goto-char (point-min))
147 (skip-chars-forward "\x20-\x7f\r\n\t" limit)
148 (while (< (point) limit)
151 (skip-chars-forward "\x20-\x7f\r\n\t" limit))
152 (if (or (< (* 6 n8bit) (- limit (point-min)))
153 ;; Don't base64, say, a short line with a single
154 ;; non-ASCII char when splitting parts by charset.
159 (defun rfc2047-narrow-to-field ()
160 "Narrow the buffer to the header on the current line."
166 (if (re-search-forward "^[^ \n\t]" nil t)
169 (goto-char (point-min)))
171 (defun rfc2047-field-value ()
172 "Return the value of the field at point."
175 (rfc2047-narrow-to-field)
176 (re-search-forward ":[ \t\n]*" nil t)
177 (buffer-substring-no-properties (point) (point-max)))))
179 (defun rfc2047-quote-special-characters-in-quoted-strings (&optional
181 "Quote special characters with `\\'s in quoted strings.
182 Quoting will not be done in a quoted string if it contains characters
183 matching ENCODABLE-REGEXP or it is within parentheses."
184 (goto-char (point-min))
185 (let ((tspecials (concat "[" ietf-drums-tspecials "]"))
188 (with-syntax-table (standard-syntax-table)
192 (eq (char-before) ?\)))
194 (goto-char (point-max)))
196 (narrow-to-region start (point))
198 (while (search-forward "\"" nil t)
199 (setq beg (match-beginning 0))
200 (unless (eq (char-before beg) ?\\)
206 (setq end (1- (point)))
208 (if (and encodable-regexp
209 (re-search-forward encodable-regexp end t))
212 (narrow-to-region beg end)
213 (while (re-search-forward tspecials nil 'move)
214 (if (eq (char-before) ?\\)
215 (if (looking-at tspecials) ;; Already quoted.
218 (goto-char (match-beginning 0))
224 (goto-char (point-max)))
226 (setq start (point))))))
228 (defvar rfc2047-encoding-type 'address-mime
229 "The type of encoding done by `rfc2047-encode-region'.
230 This should be dynamically bound around calls to
231 `rfc2047-encode-region' to either `mime' or `address-mime'. See
232 `rfc2047-header-encoding-alist', for definitions.")
234 (defun rfc2047-encode-message-header ()
235 "Encode the message header according to `rfc2047-header-encoding-alist'.
236 Should be called narrowed to the head of the message."
239 (goto-char (point-min))
240 (let (alist elem method)
243 (rfc2047-narrow-to-field)
245 alist rfc2047-header-encoding-alist)
246 (while (setq elem (pop alist))
247 (when (or (and (stringp (car elem))
248 (looking-at (car elem)))
252 (if (not (rfc2047-encodable-p))
254 (when (eq method 'address-mime)
255 (rfc2047-quote-special-characters-in-quoted-strings))
256 (if (and (eq (mm-body-7-or-8) '8bit)
259 (car message-posting-charset)))
260 ;; 8 bit must be decoded.
261 (mm-encode-coding-region
262 (point-min) (point-max)
263 (mm-charset-to-coding-system
264 (car message-posting-charset))))
265 ;; No encoding necessary, but folding is nice
269 (goto-char (point-min))
270 (skip-chars-forward "^:")
271 (when (looking-at ": ")
275 ;; We found something that may perhaps be encoded.
276 (re-search-forward "^[^:]+: *" nil t)
278 ((eq method 'address-mime)
279 (rfc2047-encode-region (point) (point-max)))
281 (let ((rfc2047-encoding-type 'mime))
282 (rfc2047-encode-region (point) (point-max))))
283 ((eq method 'default)
284 (if (and (featurep 'mule)
285 (if (boundp 'default-enable-multibyte-characters)
286 default-enable-multibyte-characters)
288 (mm-encode-coding-region (point) (point-max)
289 mail-parse-charset)))
290 ;; We get this when CC'ing messsages to newsgroups with
291 ;; 8-bit names. The group name mail copy just got
292 ;; unconditionally encoded. Previously, it would ask
293 ;; whether to encode, which was quite confusing for the
294 ;; user. If the new behavior is wrong, tell me. I have
295 ;; left the old code commented out below.
296 ;; -- Per Abrahamsen <abraham@dina.kvl.dk> Date: 2001-10-07.
297 ;; Modified by Dave Love, with the commented-out code changed
298 ;; in accordance with changes elsewhere.
300 (rfc2047-encode-region (point) (point-max)))
302 ;;; (if (or (message-options-get
303 ;;; 'rfc2047-encode-message-header-encode-any)
304 ;;; (message-options-set
305 ;;; 'rfc2047-encode-message-header-encode-any
307 ;;; "Some texts are not encoded. Encode anyway?")))
308 ;;; (rfc2047-encode-region (point-min) (point-max))
309 ;;; (error "Cannot send unencoded text")))
310 ((mm-coding-system-p method)
311 (if (or (and (featurep 'mule)
312 (if (boundp 'default-enable-multibyte-characters)
313 default-enable-multibyte-characters))
314 (featurep 'file-coding))
315 (mm-encode-coding-region (point) (point-max) method)))
318 (goto-char (point-max)))))))
320 ;; Fixme: This, and the require below may not be the Right Thing, but
321 ;; should be safe just before release. -- fx 2001-02-08
323 (defun rfc2047-encodable-p ()
324 "Return non-nil if any characters in current buffer need encoding in headers.
325 The buffer may be narrowed."
326 (require 'message) ; for message-posting-charset
328 (mm-find-mime-charset-region (point-min) (point-max))))
329 (goto-char (point-min))
330 (or (and rfc2047-encode-encoded-words
332 (re-search-forward rfc2047-encoded-word-regexp nil t)
333 (goto-char (point-min))))
335 (not (equal charsets (list (car message-posting-charset))))))))
337 ;; Use this syntax table when parsing into regions that may need
338 ;; encoding. Double quotes are string delimiters, backslash is
339 ;; character quoting, and all other RFC 2822 special characters are
340 ;; treated as punctuation so we can use forward-sexp/forward-word to
341 ;; skip to the end of regions appropriately. Nb. ietf-drums does
342 ;; things differently.
343 (defconst rfc2047-syntax-table
344 ;; (make-char-table 'syntax-table '(2)) only works in Emacs.
345 (let ((table (make-syntax-table)))
346 ;; The following is done to work for setting all elements of the table
347 ;; in Emacs 21-23 and XEmacs; it appears to be the cleanest way.
348 ;; Play safe and don't assume the form of the word syntax entry --
350 (if (fboundp 'set-char-table-range) ; Emacs
351 (funcall (intern "set-char-table-range")
352 table t (aref (standard-syntax-table) ?a))
353 (if (fboundp 'put-char-table)
354 (if (fboundp 'get-char-table) ; warning avoidance
355 (put-char-table t (get-char-table ?a (standard-syntax-table))
357 (modify-syntax-entry ?\\ "\\" table)
358 (modify-syntax-entry ?\" "\"" table)
359 (modify-syntax-entry ?\( "(" table)
360 (modify-syntax-entry ?\) ")" table)
361 (modify-syntax-entry ?\< "." table)
362 (modify-syntax-entry ?\> "." table)
363 (modify-syntax-entry ?\[ "." table)
364 (modify-syntax-entry ?\] "." table)
365 (modify-syntax-entry ?: "." table)
366 (modify-syntax-entry ?\; "." table)
367 (modify-syntax-entry ?, "." table)
368 (modify-syntax-entry ?@ "." table)
371 (defun rfc2047-encode-region (b e)
372 "Encode words in region B to E that need encoding.
373 By default, the region is treated as containing RFC2822 addresses.
374 Dynamically bind `rfc2047-encoding-type' to change that."
376 (narrow-to-region b e)
377 (let ((encodable-regexp (if rfc2047-encode-encoded-words
378 "[^\000-\177]+\\|=\\?"
380 start ; start of current token
382 ;; Whether there's an encoded word before the current token,
383 ;; either immediately or separated by space.
385 (orig-text (buffer-substring-no-properties b e)))
386 (if (eq 'mime rfc2047-encoding-type)
387 ;; Simple case. Continuous words in which all those contain
388 ;; non-ASCII characters are encoded collectively. Encoding
389 ;; ASCII words, including `Re:' used in Subject headers, is
390 ;; avoided for interoperability with non-MIME clients and
391 ;; for making it easy to find keywords.
393 (goto-char (point-min))
394 (while (progn (skip-chars-forward " \t\n")
397 (while (and (looking-at "[ \t\n]*\\([^ \t\n]+\\)")
399 (setq end (match-end 0))
400 (re-search-forward encodable-regexp end t)))
402 (if (> (point) start)
403 (rfc2047-encode start (point))
405 ;; `address-mime' case -- take care of quoted words, comments.
406 (rfc2047-quote-special-characters-in-quoted-strings encodable-regexp)
407 (with-syntax-table rfc2047-syntax-table
408 (goto-char (point-min))
409 (condition-case err ; in case of unbalanced quotes
410 ;; Look for rfc2822-style: sequences of atoms, quoted
411 ;; strings, specials, whitespace. (Specials mustn't be
415 (skip-chars-forward " \t\n")
418 ((not (char-after))) ; eob
420 ((eq ?\" (setq csyntax (char-syntax (char-after))))
424 ;; Does it need encoding?
426 (if (re-search-forward encodable-regexp end 'move)
427 ;; It needs encoding. Strip the quotes first,
428 ;; since encoded words can't occur in quotes.
431 (delete-backward-char 1)
435 ;; There was a preceding quoted word. We need
436 ;; to include any separating whitespace in this
437 ;; word to avoid it getting lost.
438 (skip-chars-backward " \t")
439 ;; A space is needed between the encoded words.
443 ;; Adjust the end position for the deleted quotes.
444 (rfc2047-encode start (- end 2))
445 (setq last-encoded t)) ; record that it was encoded
446 (setq last-encoded nil)))
448 ;; Skip other delimiters, but record that they've
449 ;; potentially separated quoted words.
451 (setq last-encoded nil))
453 (error "Unbalanced parentheses"))
455 ;; Look for the end of parentheses.
457 ;; Encode text as an unstructured field.
458 (let ((rfc2047-encoding-type 'mime))
459 (rfc2047-encode-region (1+ start) (1- (point))))
460 (skip-chars-forward ")"))
461 (t ; normal token/whitespace sequence
463 ;; Skip one ASCII word, or encode continuous words
464 ;; in which all those contain non-ASCII characters.
466 (while (not (or end (eobp)))
467 (when (looking-at "[\000-\177]+")
471 (while (and (or (re-search-forward
472 "[ \t\n]\\|\\Sw" end 'move)
474 (eq ?\\ (char-syntax (char-before))))
475 ;; Skip backslash-quoted characters.
478 (setq end (match-beginning 0))
479 (if rfc2047-encode-encoded-words
482 (when (search-forward "=?" end 'move)
483 (goto-char (match-beginning 0))
486 ;; Where the value nil of `end' means there may be
487 ;; text to have to be encoded following the point.
488 ;; Otherwise, the point reached to the end of ASCII
489 ;; words separated by whitespace or a special char.
491 (when (looking-at encodable-regexp)
492 (goto-char (setq begin (match-end 0)))
493 (while (and (looking-at "[ \t\n]+\\([^ \t\n]+\\)")
494 (setq end (match-end 0))
496 (while (re-search-forward
497 encodable-regexp end t))
500 (or (not (re-search-forward "\\Sw" end t))
502 (goto-char (match-beginning 0))
505 (when (looking-at "[^ \t\n]+")
506 (setq end (match-end 0))
507 (if (re-search-forward "\\Sw+" end t)
508 ;; There are special characters better
509 ;; to be encoded so that MTAs may parse
511 (cond ((= end (point)))
512 ((looking-at (concat "\\sw*\\("
517 (goto-char (1- (match-end 0)))
518 (unless (= (point) (match-beginning 0))
519 ;; Separate encodable text and
523 (skip-chars-forward " \t\n")
524 (if (and (looking-at "[^ \t\n]+")
525 (string-match encodable-regexp
528 (goto-char end)))))))
529 (skip-chars-backward " \t\n")
532 (if (re-search-forward encodable-regexp end 'move)
534 (unless (memq (char-before start) '(nil ?\t ? ))
537 (skip-chars-backward "^ \t\n")
538 (and (looking-at "\\Sw+")
539 (= (match-end 0) start)))
540 ;; Also encode bogus delimiters.
542 ;; Separate encodable text and delimiter.
545 (setq start (1+ start)
547 (rfc2047-encode start end)
548 (setq last-encoded t))
549 (setq last-encoded nil)))))
551 (if (or debug-on-quit debug-on-error)
552 (signal (car err) (cdr err))
553 (error "Invalid data for rfc2047 encoding: %s"
554 (mm-replace-in-string orig-text "[ \t\n]+" " "))))))))
555 (rfc2047-fold-region b (point))
556 (goto-char (point-max))))
558 (defun rfc2047-encode-string (string)
559 "Encode words in STRING.
560 By default, the string is treated as containing addresses (see
561 `rfc2047-encoding-type')."
562 (mm-with-multibyte-buffer
564 (rfc2047-encode-region (point-min) (point-max))
568 ;; 2. Syntax of encoded-words
570 ;; While there is no limit to the length of a multiple-line header
571 ;; field, each line of a header field that contains one or more
572 ;; 'encoded-word's is limited to 76 characters.
574 ;; In `rfc2047-encode-parameter' it is bound to nil, so don't defconst it.
575 (defvar rfc2047-encode-max-chars 76
576 "Maximum characters of each header line that contain encoded-words.
577 According to RFC 2047, it is 76. If it is nil, encoded-words
578 will not be folded. Too small value may cause an error. You
579 should not change this value.")
581 (defun rfc2047-encode-1 (column string cs encoder start crest tail
583 "Subroutine used by `rfc2047-encode'."
584 (cond ((string-equal string "")
586 ((not rfc2047-encode-max-chars)
588 (funcall encoder (if cs
589 (mm-encode-coding-string string cs)
592 ((>= column rfc2047-encode-max-chars)
594 (cond ((string-match "\n[ \t]+\\'" eword)
595 ;; Reomove a superfluous empty line.
596 (setq eword (substring eword 0 (match-beginning 0))))
597 ((string-match "(+\\'" eword)
598 ;; Break the line before the open parenthesis.
599 (setq crest (concat crest (match-string 0 eword))
600 eword (substring eword 0 (match-beginning 0))))))
601 (rfc2047-encode-1 (length crest) string cs encoder start " " tail
602 (concat eword "\n" crest)))
605 (limit (1- (length string)))
610 (setq next (concat start
613 (mm-encode-coding-string
614 (substring string 0 (1+ index))
616 (substring string 0 (1+ index))))
618 len (+ column (length next)))
619 (if (> len rfc2047-encode-max-chars)
622 (if (or (< index limit)
623 (<= (+ len (or (string-match "\n" tail)
625 rfc2047-encode-max-chars))
628 (if (string-match "\\`)+" tail)
629 ;; Break the line after the close parenthesis.
630 (setq tail (concat (substring tail 0 (match-end 0))
632 (substring tail (match-end 0)))
638 (concat eword next tail)
641 (string-match "(+\\'" eword))
642 (setq crest (concat crest (match-string 0 eword))
643 eword (substring eword 0 (match-beginning 0)))
644 (setq eword (concat eword next)))
646 eword (concat eword next)))
647 (when (string-match "\n[ \t]+\\'" eword)
648 ;; Reomove a superfluous empty line.
649 (setq eword (substring eword 0 (match-beginning 0))))
650 (rfc2047-encode-1 (length crest) (substring string index)
651 cs encoder start " " tail
652 (concat eword "\n" crest)))))))
654 (defun rfc2047-encode (b e)
655 "Encode the word(s) in the region B to E.
656 Point moves to the end of the region."
657 (let ((mime-charset (or (mm-find-mime-charset-region b e) (list 'us-ascii)))
658 cs encoding tail crest eword)
659 (cond ((> (length mime-charset) 1)
660 (error "Can't rfc2047-encode `%s'"
661 (buffer-substring-no-properties b e)))
662 ((= (length mime-charset) 1)
663 (setq mime-charset (car mime-charset)
664 cs (mm-charset-to-coding-system mime-charset))
665 (unless (and (mm-multibyte-p)
666 (mm-coding-system-p cs))
669 (narrow-to-region b e)
671 (or (cdr (assq mime-charset
672 rfc2047-charset-encoding-alist))
673 ;; For the charsets that don't have a preferred
674 ;; encoding, choose the one that's shorter.
675 (if (eq (rfc2047-qp-or-base64) 'base64)
680 (skip-chars-forward "^ \t\n")
681 ;; `tail' may contain a close parenthesis.
682 (setq tail (buffer-substring-no-properties e (point)))
684 (setq b (point-marker)
685 e (set-marker (make-marker) e))
686 (rfc2047-fold-region (point-at-bol) b)
688 (skip-chars-backward "^ \t\n")
689 (unless (= 0 (skip-chars-backward " \t"))
690 ;; `crest' may contain whitespace and an open parenthesis.
691 (setq crest (buffer-substring-no-properties (point) b)))
692 (setq eword (rfc2047-encode-1
694 (mm-replace-in-string
695 (buffer-substring-no-properties b e)
696 "\n\\([ \t]?\\)" "\\1")
698 (or (cdr (assq encoding
699 rfc2047-encode-function-alist))
701 (concat "=?" (downcase (symbol-name mime-charset))
702 "?" (upcase (symbol-name encoding)) "?")
705 (delete-region (if (eq (aref eword 0) ?\n)
707 ;; The line was folded before encoding.
712 ;; `eword' contains `crest' and `tail'.
716 (unless (or (/= 0 (length tail))
718 (looking-at "[ \t\n)]"))
723 (defun rfc2047-fold-field ()
724 "Fold the current header field."
727 (rfc2047-narrow-to-field)
728 (rfc2047-fold-region (point-min) (point-max)))))
730 (defun rfc2047-fold-region (b e)
731 "Fold long lines in region B to E."
733 (narrow-to-region b e)
734 (goto-char (point-min))
738 (bol (save-restriction
742 (when (and (or break qword-break)
743 (> (- (point) bol) 76))
744 (goto-char (or break qword-break))
747 (skip-chars-backward " \t")
748 (if (looking-at "[ \t]")
751 (setq bol (1- (point)))
752 ;; Don't break before the first non-LWSP characters.
753 (skip-chars-forward " \t")
757 ((eq (char-after) ?\n)
762 (skip-chars-forward " \t")
763 (unless (or (eobp) (eq (char-after) ?\n))
765 ((eq (char-after) ?\r)
767 ((memq (char-after) '(? ?\t))
768 (skip-chars-forward " \t")
769 (unless first ;; Don't break just after the header name.
770 (setq break (point))))
772 (if (not (looking-at "=\\?[^=]"))
773 (if (eq (char-after) ?=)
775 (skip-chars-forward "^ \t\n\r="))
776 ;; Don't break at the start of the field.
777 (unless (= (point) b)
778 (setq qword-break (point)))
779 (skip-chars-forward "^ \t\n\r")))
781 (skip-chars-forward "^ \t\n\r")))
783 (when (and (or break qword-break)
784 (> (- (point) bol) 76))
785 (goto-char (or break qword-break))
788 (if (or (> 0 (skip-chars-backward " \t"))
789 (looking-at "[ \t]"))
792 (setq bol (1- (point)))
793 ;; Don't break before the first non-LWSP characters.
794 (skip-chars-forward " \t")
796 (forward-char 1))))))
798 (defun rfc2047-unfold-field ()
799 "Fold the current line."
802 (rfc2047-narrow-to-field)
803 (rfc2047-unfold-region (point-min) (point-max)))))
805 (defun rfc2047-unfold-region (b e)
806 "Unfold lines in region B to E."
808 (narrow-to-region b e)
809 (goto-char (point-min))
810 (let ((bol (save-restriction
813 (eol (point-at-eol)))
816 (if (and (looking-at "[ \t]")
817 (< (- (point-at-eol) bol) 76))
818 (delete-region eol (progn
820 (skip-chars-forward "\r\n")
822 (setq bol (point-at-bol)))
823 (setq eol (point-at-eol))
826 (defun rfc2047-b-encode-string (string)
827 "Base64-encode the header contained in STRING."
828 (base64-encode-string string t))
830 (defun rfc2047-q-encode-string (string)
831 "Quoted-printable-encode the header in STRING."
832 (mm-with-unibyte-buffer
834 (quoted-printable-encode-region
835 (point-min) (point-max) nil
836 ;; = (\075), _ (\137), ? (\077) are used in the encoded word.
837 ;; Avoid using 8bit characters.
838 ;; This list excludes `especials' (see the RFC2047 syntax),
839 ;; meaning that some characters in non-structured fields will
840 ;; get encoded when they con't need to be. The following is
841 ;; what it used to be.
842 ;;; ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?"
843 ;;; "\010\012\014\040-\074\076\100-\136\140-\177")
844 "-\b\n\f !#-'*+0-9A-Z\\^`-~\d")
845 (subst-char-in-region (point-min) (point-max) ? ?_)
848 (defun rfc2047-encode-parameter (param value)
849 "Return and PARAM=VALUE string encoded in the RFC2047-like style.
850 This is a replacement for the `rfc2231-encode-string' function.
852 When attaching files as MIME parts, we should use the RFC2231 encoding
853 to specify the file names containing non-ASCII characters. However,
854 many mail softwares don't support it in practice and recipients won't
855 be able to extract files with correct names. Instead, the RFC2047-like
856 encoding is acceptable generally. This function provides the very
857 RFC2047-like encoding, resigning to such a regrettable trend. To use
858 it, put the following line in your ~/.gnus.el file:
860 \(defalias 'mail-header-encode-parameter 'rfc2047-encode-parameter)
862 (let ((rfc2047-encoding-type 'mime)
863 (rfc2047-encode-max-chars nil))
864 (rfc2045-encode-string param (rfc2047-encode-string value))))
867 ;;; Functions for decoding RFC2047 messages
870 (defvar rfc2047-quote-decoded-words-containing-tspecials nil
871 "If non-nil, quote decoded words containing special characters.")
873 (defvar rfc2047-allow-incomplete-encoded-text t
874 "*Non-nil means allow incomplete encoded-text in successive encoded-words.
875 Dividing of encoded-text in the place other than character boundaries
876 violates RFC2047 section 5, while we have a capability to decode it.
877 If it is non-nil, the decoder will decode B- or Q-encoding in each
878 encoded-word, concatenate them, and decode it by charset. Otherwise,
879 the decoder will fully decode each encoded-word before concatenating
882 (defun rfc2047-strip-backslashes-in-quoted-strings ()
883 "Strip backslashes in quoted strings. `\\\"' remains."
884 (goto-char (point-min))
886 (with-syntax-table (standard-syntax-table)
887 (while (search-forward "\"" nil t)
888 (unless (eq (char-before) ?\\)
889 (setq beg (match-end 0))
890 (goto-char (match-beginning 0))
895 (narrow-to-region beg (1- (point)))
897 (while (search-forward "\\" nil 'move)
898 (unless (memq (char-after) '(?\"))
899 (delete-backward-char 1))
903 (goto-char beg))))))))
905 (defun rfc2047-charset-to-coding-system (charset &optional allow-override)