X-Git-Url: https://cgit.sxemacs.org/?p=gnus;a=blobdiff_plain;f=lisp%2Frfc2047.el;h=6647d10b0b2b8aab1c8eba9ef7730cffd022136c;hp=f1cb1f69e568f765eeea51bb02c43fddc25f31e0;hb=HEAD;hpb=77d0c673aad416091abd5f9bc295917437898da5 diff --git a/lisp/rfc2047.el b/lisp/rfc2047.el index f1cb1f69e..6647d10b0 100644 --- a/lisp/rfc2047.el +++ b/lisp/rfc2047.el @@ -1,6 +1,6 @@ ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages -;; Copyright (C) 1998-2011 Free Software Foundation, Inc. +;; Copyright (C) 1998-2016 Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen ;; MORIOKA Tomohiko @@ -102,7 +102,7 @@ quoted-printable and base64 respectively.") (eval-and-compile ;; Necessary to hard code them in `rfc2047-decode-region'. (defconst rfc2047-encoded-word-regexp - "=\\?\\([^][\000-\040()<>@,\;:*\\\"/?.=]+\\)\\(?:\\*[^?]+\\)?\\?\ + "=\\?\\([^][\000-\040()<>@,;:*\\\"/?.=]+\\)\\(?:\\*[^?]+\\)?\\?\ \\(B\\?[+/0-9A-Za-z]*=*\ \\|Q\\?[ ->@-~]*\ \\)\\?=" @@ -112,7 +112,7 @@ quoted-printable and base64 respectively.") ;; the characters that those encodings may generally use. ) (defconst rfc2047-encoded-word-regexp-loose - "=\\?\\([^][\000-\040()<>@,\;:*\\\"/?.=]+\\)\\(?:\\*[^?]+\\)?\\?\ + "=\\?\\([^][\000-\040()<>@,;:*\\\"/?.=]+\\)\\(?:\\*[^?]+\\)?\\?\ \\(B\\?[+/0-9A-Za-z]*=*\ \\|Q\\?\\(?:\\?+[ -<>@-~]\\)?\\(?:[ ->@-~]+\\?+[ -<>@-~]\\)*[ ->@-~]*\\?*\ \\)\\?=" @@ -235,85 +235,96 @@ Should be called narrowed to the head of the message." (interactive "*") (save-excursion (goto-char (point-min)) - (let (alist elem method) + (let (alist elem method charsets) (while (not (eobp)) (save-restriction (rfc2047-narrow-to-field) (setq method nil - alist rfc2047-header-encoding-alist) - (while (setq elem (pop alist)) - (when (or (and (stringp (car elem)) - (looking-at (car elem))) - (eq (car elem) t)) - (setq alist nil - method (cdr elem)))) - (if (not (rfc2047-encodable-p)) - (prog2 - (when (eq method 'address-mime) - (rfc2047-quote-special-characters-in-quoted-strings)) - (if (and (eq (mm-body-7-or-8) '8bit) - (mm-multibyte-p) - (mm-coding-system-p - (car message-posting-charset))) - ;; 8 bit must be decoded. - (mm-encode-coding-region - (point-min) (point-max) - (mm-charset-to-coding-system - (car message-posting-charset)))) - ;; No encoding necessary, but folding is nice - (when nil - (rfc2047-fold-region - (save-excursion - (goto-char (point-min)) - (skip-chars-forward "^:") - (when (looking-at ": ") - (forward-char 2)) - (point)) - (point-max)))) - ;; We found something that may perhaps be encoded. - (re-search-forward "^[^:]+: *" nil t) - (cond - ((eq method 'address-mime) - (rfc2047-encode-region (point) (point-max))) - ((eq method 'mime) - (let ((rfc2047-encoding-type 'mime)) - (rfc2047-encode-region (point) (point-max)))) - ((eq method 'default) - (if (and (featurep 'mule) - (if (boundp 'enable-multibyte-characters) - (default-value 'enable-multibyte-characters)) - mail-parse-charset) - (mm-encode-coding-region (point) (point-max) - mail-parse-charset))) - ;; We get this when CC'ing messages to newsgroups with - ;; 8-bit names. The group name mail copy just got - ;; unconditionally encoded. Previously, it would ask - ;; whether to encode, which was quite confusing for the - ;; user. If the new behavior is wrong, tell me. I have - ;; left the old code commented out below. - ;; -- Per Abrahamsen Date: 2001-10-07. - ;; Modified by Dave Love, with the commented-out code changed - ;; in accordance with changes elsewhere. - ((null method) - (rfc2047-encode-region (point) (point-max))) -;;; ((null method) -;;; (if (or (message-options-get -;;; 'rfc2047-encode-message-header-encode-any) -;;; (message-options-set -;;; 'rfc2047-encode-message-header-encode-any -;;; (y-or-n-p -;;; "Some texts are not encoded. Encode anyway?"))) -;;; (rfc2047-encode-region (point-min) (point-max)) -;;; (error "Cannot send unencoded text"))) - ((mm-coding-system-p method) - (if (or (and (featurep 'mule) - (if (boundp 'enable-multibyte-characters) - (default-value 'enable-multibyte-characters))) - (featurep 'file-coding)) - (mm-encode-coding-region (point) (point-max) method))) - ;; Hm. - (t))) - (goto-char (point-max))))))) + alist rfc2047-header-encoding-alist + charsets (mm-find-mime-charset-region (point-min) (point-max))) + ;; M$ Outlook boycotts decoding of a header if it consists + ;; of two or more encoded words and those charsets differ; + ;; it seems to decode all words in a header from a charset + ;; found first in the header. So, we unify the charsets into + ;; a single one used for encoding the whole text in a header. + (let ((mm-coding-system-priorities + (if (= (length charsets) 1) + (cons (mm-charset-to-coding-system (car charsets)) + mm-coding-system-priorities) + mm-coding-system-priorities))) + (while (setq elem (pop alist)) + (when (or (and (stringp (car elem)) + (looking-at (car elem))) + (eq (car elem) t)) + (setq alist nil + method (cdr elem)))) + (if (not (rfc2047-encodable-p)) + (prog2 + (when (eq method 'address-mime) + (rfc2047-quote-special-characters-in-quoted-strings)) + (if (and (eq (mm-body-7-or-8) '8bit) + (mm-multibyte-p) + (mm-coding-system-p + (car message-posting-charset))) + ;; 8 bit must be decoded. + (mm-encode-coding-region + (point-min) (point-max) + (mm-charset-to-coding-system + (car message-posting-charset)))) + ;; No encoding necessary, but folding is nice + (when nil + (rfc2047-fold-region + (save-excursion + (goto-char (point-min)) + (skip-chars-forward "^:") + (when (looking-at ": ") + (forward-char 2)) + (point)) + (point-max)))) + ;; We found something that may perhaps be encoded. + (re-search-forward "^[^:]+: *" nil t) + (cond + ((eq method 'address-mime) + (rfc2047-encode-region (point) (point-max))) + ((eq method 'mime) + (let ((rfc2047-encoding-type 'mime)) + (rfc2047-encode-region (point) (point-max)))) + ((eq method 'default) + (if (and (featurep 'mule) + (if (boundp 'enable-multibyte-characters) + (default-value 'enable-multibyte-characters)) + mail-parse-charset) + (mm-encode-coding-region (point) (point-max) + mail-parse-charset))) + ;; We get this when CC'ing messages to newsgroups with + ;; 8-bit names. The group name mail copy just got + ;; unconditionally encoded. Previously, it would ask + ;; whether to encode, which was quite confusing for the + ;; user. If the new behavior is wrong, tell me. I have + ;; left the old code commented out below. + ;; -- Per Abrahamsen Date: 2001-10-07. + ;; Modified by Dave Love, with the commented-out code changed + ;; in accordance with changes elsewhere. + ((null method) + (rfc2047-encode-region (point) (point-max))) +;;; ((null method) +;;; (if (or (message-options-get +;;; 'rfc2047-encode-message-header-encode-any) +;;; (message-options-set +;;; 'rfc2047-encode-message-header-encode-any +;;; (y-or-n-p +;;; "Some texts are not encoded. Encode anyway?"))) +;;; (rfc2047-encode-region (point-min) (point-max)) +;;; (error "Cannot send unencoded text"))) + ((mm-coding-system-p method) + (if (or (and (featurep 'mule) + (if (boundp 'enable-multibyte-characters) + (default-value 'enable-multibyte-characters))) + (featurep 'file-coding)) + (mm-encode-coding-region (point) (point-max) method))) + ;; Hm. + (t))) + (goto-char (point-max)))))))) ;; Fixme: This, and the require below may not be the Right Thing, but ;; should be safe just before release. -- fx 2001-02-08 @@ -362,7 +373,7 @@ The buffer may be narrowed." (modify-syntax-entry ?@ "." table) table)) -(defun rfc2047-encode-region (b e) +(defun rfc2047-encode-region (b e &optional dont-fold) "Encode words in region B to E that need encoding. By default, the region is treated as containing RFC2822 addresses. Dynamically bind `rfc2047-encoding-type' to change that." @@ -546,16 +557,17 @@ Dynamically bind `rfc2047-encoding-type' to change that." (signal (car err) (cdr err)) (error "Invalid data for rfc2047 encoding: %s" (mm-replace-in-string orig-text "[ \t\n]+" " ")))))))) - (rfc2047-fold-region b (point)) + (unless dont-fold + (rfc2047-fold-region b (point))) (goto-char (point-max)))) -(defun rfc2047-encode-string (string) +(defun rfc2047-encode-string (string &optional dont-fold) "Encode words in STRING. By default, the string is treated as containing addresses (see `rfc2047-encoding-type')." (mm-with-multibyte-buffer (insert string) - (rfc2047-encode-region (point-min) (point-max)) + (rfc2047-encode-region (point-min) (point-max) dont-fold) (buffer-string))) ;; From RFC 2047: @@ -850,7 +862,7 @@ This is a substitution for the `rfc2231-encode-string' function, that is the standard but many mailers don't support it." (let ((rfc2047-encoding-type 'mime) (rfc2047-encode-max-chars nil)) - (rfc2045-encode-string param (rfc2047-encode-string value)))) + (rfc2045-encode-string param (rfc2047-encode-string value t)))) ;;; ;;; Functions for decoding RFC2047 messages @@ -1124,7 +1136,7 @@ other than `\"' and `\\' in quoted strings." ;; `decode-coding-string' in Emacs offers a third optional ;; arg NOCOPY to avoid consing a new string if the decoding ;; is "trivial". Unfortunately it currently doesn't - ;; consider anything else than a `nil' coding system + ;; consider anything else than a nil coding system ;; trivial. ;; `rfc2047-decode-string' is called multiple times for each ;; article during summary buffer generation, and we really