X-Git-Url: https://cgit.sxemacs.org/?a=blobdiff_plain;f=lisp%2Frfc2047.el;h=fdce08db1d77f19ecd97918b672a67c1577c487a;hb=b3b2cc0f4f2109fd7eeba0582be0d336c3a190c2;hp=63a67367a2f3f3aa6f85793293f9e87e0a711e6e;hpb=557d01dbb1e8b032553215cdd6770b0a762d186e;p=gnus diff --git a/lisp/rfc2047.el b/lisp/rfc2047.el index 63a67367a..fdce08db1 100644 --- a/lisp/rfc2047.el +++ b/lisp/rfc2047.el @@ -1,5 +1,5 @@ ;;; rfc2047.el --- Functions for encoding and decoding rfc2047 messages -;; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. +;; Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen ;; MORIOKA Tomohiko @@ -22,25 +22,33 @@ ;;; Commentary: +;; RFC 2047 is "MIME (Multipurpose Internet Mail Extensions) Part +;; Three: Message Header Extensions for Non-ASCII Text". + ;;; Code: -(eval-and-compile - (eval - '(unless (fboundp 'base64-decode-string) - (require 'base64)))) +(eval-when-compile + (require 'cl) + (defvar message-posting-charset)) (require 'qp) (require 'mm-util) (require 'ietf-drums) (require 'mail-prsvr) +(require 'base64) +;; Fixme: Avoid this (for gnus-point-at-...) mm dependence on gnus. +(require 'gnus-util) +(autoload 'mm-body-7-or-8 "mm-bodies") (defvar rfc2047-header-encoding-alist '(("Newsgroups" . nil) ("Message-ID" . nil) + ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\)" . + "-A-Za-z0-9!*+/=_") (t . mime)) "*Header/encoding method alist. The list is traversed sequentially. The keys can either be -header regexps or `t'. +header regexps or t. The values can be: @@ -48,7 +56,8 @@ The values can be: 2) `mime', in which case the header will be encoded according to RFC2047; 3) a charset, in which case it will be encoded as that charset; 4) `default', in which case the field will be encoded as the rest - of the article.") + of the article. +5) a string, like `mime', expect for using it as word-chars.") (defvar rfc2047-charset-encoding-alist '((us-ascii . nil) @@ -58,21 +67,24 @@ The values can be: (iso-8859-4 . Q) (iso-8859-5 . B) (koi8-r . B) - (iso-8859-7 . Q) - (iso-8859-8 . Q) + (iso-8859-7 . B) + (iso-8859-8 . B) (iso-8859-9 . Q) (iso-8859-14 . Q) (iso-8859-15 . Q) (iso-2022-jp . B) (iso-2022-kr . B) (gb2312 . B) + (big5 . B) + (cn-big5 . B) (cn-gb . B) (cn-gb-2312 . B) (euc-kr . B) (iso-2022-jp-2 . B) (iso-2022-int-1 . B)) "Alist of MIME charsets to RFC2047 encodings. -Valid encodings are nil, `Q' and `B'.") +Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding, +quoted-printable and base64 respectively.") (defvar rfc2047-encoding-function-alist '((Q . rfc2047-q-encode-region) @@ -81,9 +93,10 @@ Valid encodings are nil, `Q' and `B'.") "Alist of RFC2047 encodings to encoding functions.") (defvar rfc2047-q-encoding-alist - '(("\\(From\\|Cc\\|To\\|Bcc\||Reply-To\\):" . "-A-Za-z0-9!*+/") + '(("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\):" + . "-A-Za-z0-9!*+/" ) ;; = (\075), _ (\137), ? (\077) are used in the encoded word. - ;; Avoid using 8bit characters. Some versions of Emacs has bug! + ;; Avoid using 8bit characters. ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" ("." . "\010\012\014\040-\074\076\100-\136\140-\177")) "Alist of header regexps and valid Q characters.") @@ -106,6 +119,14 @@ Valid encodings are nil, `Q' and `B'.") (point-max)))) (goto-char (point-min))) +(defun rfc2047-field-value () + "Return the value of the field at point." + (save-excursion + (save-restriction + (rfc2047-narrow-to-field) + (re-search-forward ":[ \t\n]*" nil t) + (buffer-substring (point) (point-max))))) + (defun rfc2047-encode-message-header () "Encode the message header according to `rfc2047-header-encoding-alist'. Should be called narrowed to the head of the message." @@ -117,15 +138,26 @@ Should be called narrowed to the head of the message." (save-restriction (rfc2047-narrow-to-field) (if (not (rfc2047-encodable-p)) - (if (and (eq (mm-body-7-or-8) '8bit) - (mm-multibyte-p) - (mm-coding-system-p - (car message-posting-charset))) - ;; 8 bit must be decoded. - ;; Is message-posting-charset a coding system? - (mm-encode-coding-region - (point-min) (point-max) - (car message-posting-charset))) + (prog1 + (if (and (eq (mm-body-7-or-8) '8bit) + (mm-multibyte-p) + (mm-coding-system-p + (car message-posting-charset))) + ;; 8 bit must be decoded. + ;; Is message-posting-charset a coding system? + (mm-encode-coding-region + (point-min) (point-max) + (car message-posting-charset)) + nil) + ;; No encoding necessary, but folding is nice + (rfc2047-fold-region + (save-excursion + (goto-char (point-min)) + (skip-chars-forward "^:") + (when (looking-at ": ") + (forward-char 2)) + (point)) + (point-max))) ;; We found something that may perhaps be encoded. (setq method nil alist rfc2047-header-encoding-alist) @@ -136,22 +168,57 @@ Should be called narrowed to the head of the message." (setq alist nil method (cdr elem)))) (cond + ((stringp method) + (rfc2047-encode-region (point-min) (point-max) method)) ((eq method 'mime) (rfc2047-encode-region (point-min) (point-max))) ((eq method 'default) (if (and (featurep 'mule) + (if (boundp 'default-enable-multibyte-characters) + default-enable-multibyte-characters) mail-parse-charset) - (mm-encode-coding-region (point-min) (point-max) + (mm-encode-coding-region (point-min) (point-max) mail-parse-charset))) + ;; We get this when CC'ing messsages to newsgroups with + ;; 8-bit names. The group name mail copy just get + ;; unconditionally encoded. Previously, it would ask + ;; whether to encode, which was quite confusing for the + ;; user. If the new behaviour is wrong, tell me. I have + ;; left the old code commented out below. + ;; -- Per Abrahamsen Date: 2001-10-07. + ((null method) + (when (delq 'ascii + (mm-find-charset-region (point-min) (point-max))) + (rfc2047-encode-region (point-min) (point-max)))) +;;; ((null method) +;;; (and (delq 'ascii +;;; (mm-find-charset-region (point-min) +;;; (point-max))) +;;; (if (or (message-options-get +;;; 'rfc2047-encode-message-header-encode-any) +;;; (message-options-set +;;; 'rfc2047-encode-message-header-encode-any +;;; (y-or-n-p +;;; "Some texts are not encoded. Encode anyway?"))) +;;; (rfc2047-encode-region (point-min) (point-max)) +;;; (error "Cannot send unencoded text")))) ((mm-coding-system-p method) - (if (featurep 'mule) + (if (and (featurep 'mule) + (if (boundp 'default-enable-multibyte-characters) + default-enable-multibyte-characters)) (mm-encode-coding-region (point-min) (point-max) method))) ;; Hm. (t))) (goto-char (point-max))))))) -(defun rfc2047-encodable-p (&optional header) - "Say whether the current (narrowed) buffer contains characters that need encoding in headers." +;; Fixme: This, and the require below may not be the Right Thing, but +;; should be safe just before release. -- fx 2001-02-08 +(eval-when-compile (defvar message-posting-charset)) + +(defun rfc2047-encodable-p () + "Return non-nil if any characters in current buffer need encoding in headers. +The buffer may be narrowed." + (require 'message) ; for message-posting-charset (let ((charsets (mapcar 'mm-mime-charset @@ -163,12 +230,13 @@ Should be called narrowed to the head of the message." (setq found t))) found)) -(defun rfc2047-dissect-region (b e) +(defun rfc2047-dissect-region (b e &optional word-chars) "Dissect the region between B and E into words." - (let ((word-chars "-A-Za-z0-9!*+/") - ;; Not using ietf-drums-specials-token makes life simple. - mail-parse-mule-charset - words point current + (unless word-chars + ;; Anything except most CTLs, WSP + (setq word-chars "\010\012\014\041-\177")) + (let (mail-parse-mule-charset + words point current result word) (save-restriction (narrow-to-region b e) @@ -178,7 +246,7 @@ Should be called narrowed to the head of the message." (setq point (point)) (skip-chars-backward word-chars b) (unless (eq b (point)) - (push (cons (buffer-substring b (point)) nil) words)) + (push (cons (buffer-substring b (point)) nil) words)) (setq b (point)) (goto-char point) (setq current (mm-charset-after)) @@ -189,7 +257,7 @@ Should be called narrowed to the head of the message." (forward-char 1) (skip-chars-forward word-chars)) (unless (eq b (point)) - (push (cons (buffer-substring b (point)) current) words)) + (push (cons (buffer-substring b (point)) current) words)) (setq b (point)) (skip-chars-forward "\000-\177")) (unless (eq b (point)) @@ -197,14 +265,14 @@ Should be called narrowed to the head of the message." ;; merge adjacent words (setq word (pop words)) (while word - (if (and (cdr word) + (if (and (cdr word) (caar words) (not (cdar words)) (not (string-match "[^ \t]" (caar words)))) (if (eq (cdr (nth 1 words)) (cdr word)) (progn - (setq word (cons (concat - (car (nth 1 words)) (caar words) + (setq word (cons (concat + (car (nth 1 words)) (caar words) (car word)) (cdr word))) (pop words) @@ -217,9 +285,9 @@ Should be called narrowed to the head of the message." (setq word (pop words)))) result)) -(defun rfc2047-encode-region (b e) - "Encode all encodable words in REGION." - (let ((words (rfc2047-dissect-region b e)) word) +(defun rfc2047-encode-region (b e &optional word-chars) + "Encode all encodable words in region B to E." + (let ((words (rfc2047-dissect-region b e word-chars)) word) (save-restriction (narrow-to-region b e) (delete-region (point-min) (point-max)) @@ -233,22 +301,23 @@ Should be called narrowed to the head of the message." (gnus-point-at-bol))) 76) (insert "\n ")) ;; Insert blank between encoded words - (if (eq (char-before) ?=) (insert " ")) - (rfc2047-encode (point) + (if (eq (char-before) ?=) (insert " ")) + (rfc2047-encode (point) (progn (insert (car word)) (point)) (cdr word)))) (rfc2047-fold-region (point-min) (point-max))))) -(defun rfc2047-encode-string (string) +(defun rfc2047-encode-string (string &optional word-chars) "Encode words in STRING." (with-temp-buffer (insert string) - (rfc2047-encode-region (point-min) (point-max)) + (rfc2047-encode-region (point-min) (point-max) word-chars) (buffer-string))) (defun rfc2047-encode (b e charset) - "Encode the word in the region with CHARSET." + "Encode the word in the region B to E with CHARSET." (let* ((mime-charset (mm-mime-charset charset)) + (cs (mm-charset-to-coding-system mime-charset)) (encoding (or (cdr (assq mime-charset rfc2047-charset-encoding-alist)) 'B)) @@ -266,8 +335,8 @@ Should be called narrowed to the head of the message." (unless (eobp) (insert "\n")))) (if (and (mm-multibyte-p) - (mm-coding-system-p mime-charset)) - (mm-encode-coding-region (point-min) (point-max) mime-charset)) + (mm-coding-system-p cs)) + (mm-encode-coding-region (point-min) (point-max) cs)) (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) (point-min) (point-max)) (goto-char (point-min)) @@ -280,29 +349,44 @@ Should be called narrowed to the head of the message." (insert "?=") (forward-line 1))))) +(defun rfc2047-fold-field () + "Fold the current line." + (save-excursion + (save-restriction + (rfc2047-narrow-to-field) + (rfc2047-fold-region (point-min) (point-max))))) + (defun rfc2047-fold-region (b e) - "Fold the long lines in the region." + "Fold long lines in region B to E." (save-restriction (narrow-to-region b e) (goto-char (point-min)) (let ((break nil) + (qword-break nil) + (first t) (bol (save-restriction (widen) (gnus-point-at-bol)))) (while (not (eobp)) - (when (and break (> (- (point) bol) 76)) - (goto-char break) - (setq break nil) - (insert "\n ") + (when (and (or break qword-break) + (> (- (point) bol) 76)) + (goto-char (or break qword-break)) + (setq break nil + qword-break nil) + (if (looking-at "[ \t]") + (insert "\n") + (insert "\n ")) (setq bol (1- (point))) ;; Don't break before the first non-LWSP characters. (skip-chars-forward " \t") - (forward-char 1)) + (unless (eobp) + (forward-char 1))) (cond ((eq (char-after) ?\n) (forward-char 1) (setq bol (point) - break nil) + break nil + qword-break nil) (skip-chars-forward " \t") (unless (or (eobp) (eq (char-after) ?\n)) (forward-char 1))) @@ -310,51 +394,63 @@ Should be called narrowed to the head of the message." (forward-char 1)) ((memq (char-after) '(? ?\t)) (skip-chars-forward " \t") - (setq break (1- (point)))) + (if first + ;; Don't break just after the header name. + (setq first nil) + (setq break (1- (point))))) ((not break) - (if (not (looking-at "=\\?")) + (if (not (looking-at "=\\?[^=]")) (if (eq (char-after) ?=) (forward-char 1) (skip-chars-forward "^ \t\n\r=")) - (setq break (point)) + (setq qword-break (point)) (skip-chars-forward "^ \t\n\r"))) (t (skip-chars-forward "^ \t\n\r")))) - (when (and break (> (- (point) bol) 76)) - (goto-char break) - (setq break nil) - (insert "\n ") + (when (and (or break qword-break) + (> (- (point) bol) 76)) + (goto-char (or break qword-break)) + (setq break nil + qword-break nil) + (if (looking-at "[ \t]") + (insert "\n") + (insert "\n ")) (setq bol (1- (point))) ;; Don't break before the first non-LWSP characters. (skip-chars-forward " \t") - (forward-char 1))))) + (unless (eobp) + (forward-char 1)))))) + +(defun rfc2047-unfold-field () + "Fold the current line." + (save-excursion + (save-restriction + (rfc2047-narrow-to-field) + (rfc2047-unfold-region (point-min) (point-max))))) (defun rfc2047-unfold-region (b e) - "Fold the long lines in the region." + "Unfold lines in region B to E." (save-restriction (narrow-to-region b e) (goto-char (point-min)) (let ((bol (save-restriction (widen) (gnus-point-at-bol))) - (eol (gnus-point-at-eol)) - leading) + (eol (gnus-point-at-eol))) (forward-line 1) (while (not (eobp)) - (looking-at "[ \t]*") - (setq leading (- (match-end 0) (match-beginning 0))) - (if (< (- (gnus-point-at-eol) bol leading) 76) - (progn - (goto-char eol) - (delete-region eol (progn - (skip-chars-forward "[ \t\n\r]+") - (1- (point))))) + (if (and (looking-at "[ \t]") + (< (- (gnus-point-at-eol) bol) 76)) + (delete-region eol (progn + (goto-char eol) + (skip-chars-forward "\r\n") + (point))) (setq bol (gnus-point-at-bol))) (setq eol (gnus-point-at-eol)) (forward-line 1))))) (defun rfc2047-b-encode-region (b e) - "Encode the header contained in REGION with the B encoding." + "Base64-encode the header contained in region B to E." (save-restriction (narrow-to-region (goto-char b) e) (while (not (eobp)) @@ -364,33 +460,41 @@ Should be called narrowed to the head of the message." (forward-line)))) (defun rfc2047-q-encode-region (b e) - "Encode the header contained in REGION with the Q encoding." + "Quoted-printable-encode the header in region B to E." (save-excursion (save-restriction (narrow-to-region (goto-char b) e) - (let ((alist rfc2047-q-encoding-alist)) + (let ((alist rfc2047-q-encoding-alist) + (bol (save-restriction + (widen) + (gnus-point-at-bol)))) (while alist (when (looking-at (caar alist)) - (quoted-printable-encode-region b e nil (cdar alist)) + (mm-with-unibyte-current-buffer-mule4 + (quoted-printable-encode-region + (point-min) (point-max) nil (cdar alist))) (subst-char-in-region (point-min) (point-max) ? ?_) (setq alist nil)) (pop alist)) - (goto-char (1+ (point-min))) - (while (and (not (bobp)) (not (eobp))) - (goto-char (min (point-max) (save-restriction - (widen) - ;; THe QP encapsulation is about 20. - (+ 56 (gnus-point-at-bol))))) - (search-backward "=" (- (point) 2) t) - (unless (or (bobp) (eobp)) - (insert "\n"))))))) + ;; The size of QP encapsulation is about 20, so set limit to + ;; 56=76-20. + (unless (< (- (point-max) (point-min)) 56) + ;; Don't break if it could fit in one line. + ;; Let rfc2047-encode-region break it later. + (goto-char (1+ (point-min))) + (while (and (not (bobp)) (not (eobp))) + (goto-char (min (point-max) (+ 56 bol))) + (search-backward "=" (- (point) 2) t) + (unless (or (bobp) (eobp)) + (insert "\n") + (setq bol (point))))))))) ;;; ;;; Functions for decoding RFC2047 messages ;;; (defvar rfc2047-encoded-word-regexp - "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\\?\\([!->@-~ +]+\\)\\?=") + "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\\?\\([!->@-~ +]*\\)\\?=") (defun rfc2047-decode-region (start end) "Decode MIME-encoded words in region between START and END." @@ -416,8 +520,17 @@ Should be called narrowed to the head of the message." (prog1 (match-string 0) (delete-region (match-beginning 0) (match-end 0))))) + ;; Remove newlines between decoded words. Though such things + ;; must not be essentially there. + (save-restriction + (narrow-to-region e (point)) + (goto-char e) + (while (re-search-forward "[\n\r]+" nil t) + (replace-match " ")) + (goto-char (point-max))) (when (and (mm-multibyte-p) mail-parse-charset + (not (eq mail-parse-charset 'us-ascii)) (not (eq mail-parse-charset 'gnus-decoded))) (mm-decode-coding-region b e mail-parse-charset)) (setq b (point))) @@ -425,19 +538,25 @@ Should be called narrowed to the head of the message." mail-parse-charset (not (eq mail-parse-charset 'us-ascii)) (not (eq mail-parse-charset 'gnus-decoded))) - (mm-decode-coding-region b (point-max) mail-parse-charset)) - (rfc2047-unfold-region (point-min) (point-max)))))) + (mm-decode-coding-region b (point-max) mail-parse-charset)))))) (defun rfc2047-decode-string (string) "Decode the quoted-printable-encoded STRING and return the results." (let ((m (mm-multibyte-p))) - (with-temp-buffer - (when m - (mm-enable-multibyte)) - (insert string) - (inline - (rfc2047-decode-region (point-min) (point-max))) - (buffer-string)))) + (if (string-match "=\\?" string) + (with-temp-buffer + (when m + (mm-enable-multibyte)) + (insert string) + (inline + (rfc2047-decode-region (point-min) (point-max))) + (buffer-string)) + (if (and m + mail-parse-charset + (not (eq mail-parse-charset 'us-ascii)) + (not (eq mail-parse-charset 'gnus-decoded))) + (mm-decode-coding-string string mail-parse-charset) + string)))) (defun rfc2047-parse-and-decode (word) "Decode WORD and return it if it is an encoded word. @@ -453,19 +572,30 @@ Return WORD if not." (error word)) word))) +(defun rfc2047-pad-base64 (string) + "Pad STRING to quartets." + ;; Be more liberal to accept buggy base64 strings. If + ;; base64-decode-string accepts buggy strings, this function could + ;; be aliased to identity. + (case (mod (length string) 4) + (0 string) + (1 string) ;; Error, don't pad it. + (2 (concat string "==")) + (3 (concat string "=")))) + (defun rfc2047-decode (charset encoding string) - "Decode STRING that uses CHARSET with ENCODING. + "Decode STRING from the given MIME CHARSET in the given ENCODING. Valid ENCODINGs are \"B\" and \"Q\". -If your Emacs implementation can't decode CHARSET, it returns nil." +If your Emacs implementation can't decode CHARSET, return nil." (if (stringp charset) (setq charset (intern (downcase charset)))) - (if (or (not charset) + (if (or (not charset) (eq 'gnus-all mail-parse-ignored-charsets) (memq 'gnus-all mail-parse-ignored-charsets) (memq charset mail-parse-ignored-charsets)) (setq charset mail-parse-charset)) (let ((cs (mm-charset-to-coding-system charset))) - (if (and (not cs) charset + (if (and (not cs) charset (listp mail-parse-ignored-charsets) (memq 'gnus-unknown mail-parse-ignored-charsets)) (setq cs (mm-charset-to-coding-system mail-parse-charset))) @@ -473,12 +603,13 @@ If your Emacs implementation can't decode CHARSET, it returns nil." (when (and (eq cs 'ascii) mail-parse-charset) (setq cs mail-parse-charset)) - (mm-with-unibyte-current-buffer + (mm-with-unibyte-current-buffer-mule4 ;; In Emacs Mule 4, decoding UTF-8 should be in unibyte mode. (mm-decode-coding-string (cond ((equal "B" encoding) - (base64-decode-string string)) + (base64-decode-string + (rfc2047-pad-base64 string))) ((equal "Q" encoding) (quoted-printable-decode-string (mm-replace-chars-in-string string ?_ ? )))