X-Git-Url: http://cgit.sxemacs.org/?a=blobdiff_plain;f=lisp%2Frfc2047.el;h=ebd8a4a41bcb5a350be5f0480d3e8c04fb1123df;hb=0ed5b1813c561b0290b3ebbe2e0eb9bba6156076;hp=32842f56bcfcbd4142069ef1345e6326cfdf7795;hpb=cf333e351fcdfe217c2dbf71aa61c17e1453f276;p=gnus diff --git a/lisp/rfc2047.el b/lisp/rfc2047.el index 32842f56b..ebd8a4a41 100644 --- a/lisp/rfc2047.el +++ b/lisp/rfc2047.el @@ -97,6 +97,25 @@ quoted-printable and base64 respectively.") ;;; Functions for encoding RFC2047 messages ;;; +(defun rfc2047-qp-or-base64 () + "Return the type with which to encode the buffer. +This is either `base64' or `quoted-printable'." + (save-excursion + (let ((limit (min (point-max) (+ 2000 (point-min)))) + (n8bit 0)) + (goto-char (point-min)) + (skip-chars-forward "\x20-\x7f\r\n\t" limit) + (while (< (point) limit) + (incf n8bit) + (forward-char 1) + (skip-chars-forward "\x20-\x7f\r\n\t" limit)) + (if (or (< (* 6 n8bit) (- limit (point-min))) + ;; Don't base64, say, a short line with a single + ;; non-ASCII char when splitting parts by charset. + (= n8bit 1)) + 'quoted-printable + 'base64)))) + (defun rfc2047-narrow-to-field () "Narrow the buffer to the header on the current line." (beginning-of-line) @@ -382,7 +401,7 @@ By default, the region is treated as containing addresses (see ;; encoding, choose the one that's shorter. (save-restriction (narrow-to-region b e) - (if (eq (mm-qp-or-base64) 'base64) + (if (eq (rfc2047-qp-or-base64) 'base64) 'B 'Q)))) (start (concat @@ -588,8 +607,8 @@ By default, the region is treated as containing addresses (see (eval-and-compile (defconst rfc2047-encoded-word-regexp - "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\ -\\?\\([!->@-~ +]*\\)\\?=")) + "=\\?\\([^][\000-\040()<>@,\;:*\\\"/?.=]+\\)(?:\\*[^?]+\\)?\ +\\?\\(B\\|Q\\)\\?\\([!->@-~ +]*\\)\\?=")) ;; Fixme: This should decode in place, not cons intermediate strings. ;; Also check whether it needs to worry about delimiting fields like @@ -668,7 +687,20 @@ By default, the region is treated as containing addresses (see mail-parse-charset (not (eq mail-parse-charset 'us-ascii)) (not (eq mail-parse-charset 'gnus-decoded))) - (mm-decode-coding-string string mail-parse-charset) + ;; `decode-coding-string' in Emacs offers a third optional + ;; arg NOCOPY to avoid consing a new string if the decoding + ;; is "trivial". Unfortunately it currently doesn't + ;; consider anything else than a `nil' coding system + ;; trivial. + ;; `rfc2047-decode-string' is called multiple times for each + ;; article during summary buffer generation, and we really + ;; want to avoid unnecessary consing. So we bypass + ;; `decode-coding-string' if the string is purely ASCII. + (if (and (fboundp 'detect-coding-string) + ;; string is purely ASCII + (eq (detect-coding-string string t) 'undecided)) + string + (mm-decode-coding-string string mail-parse-charset)) (mm-string-as-multibyte string))))) (defun rfc2047-parse-and-decode (word) @@ -681,7 +713,7 @@ decodable." (condition-case nil (rfc2047-decode (match-string 1 word) - (upcase (match-string 2 word)) + (string-to-char (match-string 2 word)) (match-string 3 word)) (error word)) word))) ; un-decodable @@ -691,15 +723,19 @@ decodable." ;; Be more liberal to accept buggy base64 strings. If ;; base64-decode-string accepts buggy strings, this function could ;; be aliased to identity. - (case (mod (length string) 4) - (0 string) - (1 string) ;; Error, don't pad it. - (2 (concat string "==")) - (3 (concat string "=")))) + (if (= 0 (mod (length string) 4)) + string + (when (string-match "=+$" string) + (setq string (substring string 0 (match-beginning 0)))) + (case (mod (length string) 4) + (0 string) + (1 string) ;; Error, don't pad it. + (2 (concat string "==")) + (3 (concat string "="))))) (defun rfc2047-decode (charset encoding string) "Decode STRING from the given MIME CHARSET in the given ENCODING. -Valid ENCODINGs are \"B\" and \"Q\". +Valid ENCODINGs are the characters \"B\" and \"Q\". If your Emacs implementation can't decode CHARSET, return nil." (if (stringp charset) (setq charset (intern (downcase charset)))) @@ -719,13 +755,13 @@ If your Emacs implementation can't decode CHARSET, return nil." (setq cs mail-parse-charset)) (mm-decode-coding-string (cond - ((equal "B" encoding) + ((char-equal ?B encoding) (base64-decode-string (rfc2047-pad-base64 string))) - ((equal "Q" encoding) + ((char-equal ?Q encoding) (quoted-printable-decode-string (mm-subst-char-in-string ?_ ? string t))) - (t (error "Invalid encoding: %s" encoding))) + (t (error "Invalid encoding: %c" encoding))) cs)))) (provide 'rfc2047)