X-Git-Url: https://cgit.sxemacs.org/?a=blobdiff_plain;f=lisp%2Frfc2047.el;h=e663384101abaeafaff7c07284bf9e6c0642543e;hb=4fc7ec6583b0050c03754944e2dcf9e47968d665;hp=6423dac12f65ce47483b3b15f2943ff7bb26cf7b;hpb=3ed6dd43f86e57d9111ef8d21a2663a810b1d889;p=gnus diff --git a/lisp/rfc2047.el b/lisp/rfc2047.el index 6423dac12..e66338410 100644 --- a/lisp/rfc2047.el +++ b/lisp/rfc2047.el @@ -1,5 +1,5 @@ ;;; rfc2047.el --- Functions for encoding and decoding rfc2047 messages -;; Copyright (C) 1998 Free Software Foundation, Inc. +;; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen ;; MORIOKA Tomohiko @@ -24,6 +24,7 @@ ;;; Code: +(eval-when-compile (require 'cl)) (eval-and-compile (eval '(unless (fboundp 'base64-decode-string) @@ -32,9 +33,7 @@ (require 'qp) (require 'mm-util) (require 'ietf-drums) - -(defvar rfc2047-default-charset 'iso-8859-1 - "Default MIME charset -- does not need encoding.") +(require 'mail-prsvr) (defvar rfc2047-header-encoding-alist '(("Newsgroups" . nil) @@ -48,7 +47,7 @@ The values can be: 1) nil, in which case no encoding is done; 2) `mime', in which case the header will be encoded according to RFC2047; -3) a charset, in which case it will be encoded as that charse; +3) a charset, in which case it will be encoded as that charset; 4) `default', in which case the field will be encoded as the rest of the article.") @@ -63,6 +62,8 @@ The values can be: (iso-8859-7 . Q) (iso-8859-8 . Q) (iso-8859-9 . Q) + (iso-8859-14 . Q) + (iso-8859-15 . Q) (iso-2022-jp . B) (iso-2022-kr . B) (gb2312 . B) @@ -81,8 +82,11 @@ Valid encodings are nil, `Q' and `B'.") "Alist of RFC2047 encodings to encoding functions.") (defvar rfc2047-q-encoding-alist - '(("\\(From\\|Cc\\|To\\|Bcc\||Reply-To\\):" . "-A-Za-z0-9!*+/=_") - ("." . "^\000-\007\013\015-\037\200-\377=_?")) + '(("\\(From\\|Cc\\|To\\|Bcc\||Reply-To\\):" . "-A-Za-z0-9!*+/") + ;; = (\075), _ (\137), ? (\077) are used in the encoded word. + ;; Avoid using 8bit characters. Some versions of Emacs has bug! + ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" + ("." . "\010\012\014\040-\074\076\100-\136\140-\177")) "Alist of header regexps and valid Q characters.") ;;; @@ -107,39 +111,65 @@ Valid encodings are nil, `Q' and `B'.") "Encode the message header according to `rfc2047-header-encoding-alist'. Should be called narrowed to the head of the message." (interactive "*") - (when (featurep 'mule) - (save-excursion - (goto-char (point-min)) - (let ((alist rfc2047-header-encoding-alist) - elem method) - (while (not (eobp)) - (save-restriction - (rfc2047-narrow-to-field) - (when (rfc2047-encodable-p) - ;; We found something that may perhaps be encoded. - (while (setq elem (pop alist)) - (when (or (and (stringp (car elem)) - (looking-at (car elem))) - (eq (car elem) t)) - (setq alist nil - method (cdr elem)))) - (when method - (cond - ((eq method 'mime) - (rfc2047-encode-region (point-min) (point-max))) - ;; Hm. - (t)))) - (goto-char (point-max))))) - (when rfc2047-default-charset - (encode-coding-region (point-min) (point-max) - rfc2047-default-charset))))) - -(defun rfc2047-encodable-p () - "Say whether the current (narrowed) buffer contains characters that need encoding." - (let ((charsets (mapcar - 'mm-mule-charset-to-mime-charset - (find-charset-region (point-min) (point-max)))) - (cs (list 'us-ascii rfc2047-default-charset)) + (save-excursion + (goto-char (point-min)) + (let (alist elem method) + (while (not (eobp)) + (save-restriction + (rfc2047-narrow-to-field) + (if (not (rfc2047-encodable-p)) + (if (and (eq (mm-body-7-or-8) '8bit) + (mm-multibyte-p) + (mm-coding-system-p + (car message-posting-charset))) + ;; 8 bit must be decoded. + ;; Is message-posting-charset a coding system? + (mm-encode-coding-region + (point-min) (point-max) + (car message-posting-charset))) + ;; We found something that may perhaps be encoded. + (setq method nil + alist rfc2047-header-encoding-alist) + (while (setq elem (pop alist)) + (when (or (and (stringp (car elem)) + (looking-at (car elem))) + (eq (car elem) t)) + (setq alist nil + method (cdr elem)))) + (cond + ((eq method 'mime) + (rfc2047-encode-region (point-min) (point-max))) + ((eq method 'default) + (if (and (featurep 'mule) + mail-parse-charset) + (mm-encode-coding-region (point-min) (point-max) + mail-parse-charset))) + ((null method) + (and (delq 'ascii + (mm-find-charset-region (point-min) + (point-max))) + (if (or (message-options-get + 'rfc2047-encode-message-header-encode-any) + (message-options-set + 'rfc2047-encode-message-header-encode-any + (y-or-n-p + "Some texts are not encoded. Encode anyway?"))) + (rfc2047-encode-region (point-min) (point-max)) + (error "Cannot send unencoded text.")))) + ((mm-coding-system-p method) + (if (featurep 'mule) + (mm-encode-coding-region (point-min) (point-max) method))) + ;; Hm. + (t))) + (goto-char (point-max))))))) + +(defun rfc2047-encodable-p (&optional header) + "Say whether the current (narrowed) buffer contains characters that need encoding in headers." + (let ((charsets + (mapcar + 'mm-mime-charset + (mm-find-charset-region (point-min) (point-max)))) + (cs (list 'us-ascii (car message-posting-charset))) found) (while charsets (unless (memq (pop charsets) cs) @@ -147,36 +177,80 @@ Should be called narrowed to the head of the message." found)) (defun rfc2047-dissect-region (b e) - "Dissect the region between B and E." - (let (words) + "Dissect the region between B and E into words." + (let ((word-chars "-A-Za-z0-9!*+/") + ;; Not using ietf-drums-specials-token makes life simple. + mail-parse-mule-charset + words point current + result word) (save-restriction (narrow-to-region b e) (goto-char (point-min)) - (while (re-search-forward - (concat "[^" ietf-drums-tspecials " \t\n]+") nil t) - (push - (list (match-beginning 0) (match-end 0) - (car - (delq 'ascii - (find-charset-region (match-beginning 0) - (match-end 0))))) - words)) - words))) + (skip-chars-forward "\000-\177") + (while (not (eobp)) + (setq point (point)) + (skip-chars-backward word-chars b) + (unless (eq b (point)) + (push (cons (buffer-substring b (point)) nil) words)) + (setq b (point)) + (goto-char point) + (setq current (mm-charset-after)) + (forward-char 1) + (skip-chars-forward word-chars) + (while (and (not (eobp)) + (eq (mm-charset-after) current)) + (forward-char 1) + (skip-chars-forward word-chars)) + (unless (eq b (point)) + (push (cons (buffer-substring b (point)) current) words)) + (setq b (point)) + (skip-chars-forward "\000-\177")) + (unless (eq b (point)) + (push (cons (buffer-substring b (point)) nil) words))) + ;; merge adjacent words + (setq word (pop words)) + (while word + (if (and (cdr word) + (caar words) + (not (cdar words)) + (not (string-match "[^ \t]" (caar words)))) + (if (eq (cdr (nth 1 words)) (cdr word)) + (progn + (setq word (cons (concat + (car (nth 1 words)) (caar words) + (car word)) + (cdr word))) + (pop words) + (pop words)) + (push (cons (concat (caar words) (car word)) (cdr word)) + result) + (pop words) + (setq word (pop words))) + (push word result) + (setq word (pop words)))) + result)) (defun rfc2047-encode-region (b e) "Encode all encodable words in REGION." - (let ((words (rfc2047-dissect-region b e)) - beg end current word) - (while (setq word (pop words)) - (if (equal (nth 2 word) current) - (setq beg (nth 0 word)) - (when current - (rfc2047-encode beg end current)) - (setq current (nth 2 word) - beg (nth 0 word) - end (nth 1 word)))) - (when current - (rfc2047-encode beg end current)))) + (let ((words (rfc2047-dissect-region b e)) word) + (save-restriction + (narrow-to-region b e) + (delete-region (point-min) (point-max)) + (while (setq word (pop words)) + (if (not (cdr word)) + (insert (car word)) + (rfc2047-fold-region (gnus-point-at-bol) (point)) + (goto-char (point-max)) + (if (> (- (point) (save-restriction + (widen) + (gnus-point-at-bol))) 76) + (insert "\n ")) + ;; Insert blank between encoded words + (if (eq (char-before) ?=) (insert " ")) + (rfc2047-encode (point) + (progn (insert (car word)) (point)) + (cdr word)))) + (rfc2047-fold-region (point-min) (point-max))))) (defun rfc2047-encode-string (string) "Encode words in STRING." @@ -187,10 +261,9 @@ Should be called narrowed to the head of the message." (defun rfc2047-encode (b e charset) "Encode the word in the region with CHARSET." - (let* ((mime-charset - (mm-mime-charset charset b e)) + (let* ((mime-charset (mm-mime-charset charset)) (encoding (or (cdr (assq mime-charset - rfc2047-charset-encoding-alist)) + rfc2047-charset-encoding-alist)) 'B)) (start (concat "=?" (downcase (symbol-name mime-charset)) "?" @@ -198,7 +271,16 @@ Should be called narrowed to the head of the message." (first t)) (save-restriction (narrow-to-region b e) - (mm-encode-coding-region b e mime-charset) + (when (eq encoding 'B) + ;; break into lines before encoding + (goto-char (point-min)) + (while (not (eobp)) + (goto-char (min (point-max) (+ 15 (point)))) + (unless (eobp) + (insert "\n")))) + (if (and (mm-multibyte-p) + (mm-coding-system-p mime-charset)) + (mm-encode-coding-region (point-min) (point-max) mime-charset)) (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) (point-min) (point-max)) (goto-char (point-min)) @@ -211,33 +293,120 @@ Should be called narrowed to the head of the message." (insert "?=") (forward-line 1))))) +(defun rfc2047-fold-region (b e) + "Fold the long lines in the region." + (save-restriction + (narrow-to-region b e) + (goto-char (point-min)) + (let ((break nil) + (qword-break nil) + (bol (save-restriction + (widen) + (gnus-point-at-bol)))) + (while (not (eobp)) + (when (and (or break qword-break) (> (- (point) bol) 76)) + (goto-char (or break qword-break)) + (setq break nil + qword-break nil) + (insert "\n ") + (setq bol (1- (point))) + ;; Don't break before the first non-LWSP characters. + (skip-chars-forward " \t") + (forward-char 1)) + (cond + ((eq (char-after) ?\n) + (forward-char 1) + (setq bol (point) + break nil + qword-break nil) + (skip-chars-forward " \t") + (unless (or (eobp) (eq (char-after) ?\n)) + (forward-char 1))) + ((eq (char-after) ?\r) + (forward-char 1)) + ((memq (char-after) '(? ?\t)) + (skip-chars-forward " \t") + (setq break (1- (point)))) + ((not break) + (if (not (looking-at "=\\?[^=]")) + (if (eq (char-after) ?=) + (forward-char 1) + (skip-chars-forward "^ \t\n\r=")) + (setq qword-break (point)) + (skip-chars-forward "^ \t\n\r"))) + (t + (skip-chars-forward "^ \t\n\r")))) + (when (and (or break qword-break) (> (- (point) bol) 76)) + (goto-char (or break qword-break)) + (setq break nil + qword-break nil) + (insert "\n ") + (setq bol (1- (point))) + ;; Don't break before the first non-LWSP characters. + (skip-chars-forward " \t") + (forward-char 1))))) + +(defun rfc2047-unfold-region (b e) + "Fold the long lines in the region." + (save-restriction + (narrow-to-region b e) + (goto-char (point-min)) + (let ((bol (save-restriction + (widen) + (gnus-point-at-bol))) + (eol (gnus-point-at-eol)) + leading) + (forward-line 1) + (while (not (eobp)) + (looking-at "[ \t]*") + (setq leading (- (match-end 0) (match-beginning 0))) + (if (< (- (gnus-point-at-eol) bol leading) 76) + (progn + (goto-char eol) + (delete-region eol (progn + (skip-chars-forward "[ \t\n\r]+") + (1- (point))))) + (setq bol (gnus-point-at-bol))) + (setq eol (gnus-point-at-eol)) + (forward-line 1))))) + (defun rfc2047-b-encode-region (b e) "Encode the header contained in REGION with the B encoding." - (base64-encode-region b e t) - (goto-char (point-min)) - (while (not (eobp)) - (goto-char (min (point-max) (+ 64 (point)))) - (unless (eobp) - (insert "\n")))) + (save-restriction + (narrow-to-region (goto-char b) e) + (while (not (eobp)) + (base64-encode-region (point) (progn (end-of-line) (point)) t) + (if (and (bolp) (eolp)) + (delete-backward-char 1)) + (forward-line)))) (defun rfc2047-q-encode-region (b e) "Encode the header contained in REGION with the Q encoding." (save-excursion (save-restriction (narrow-to-region (goto-char b) e) - (let ((alist rfc2047-q-encoding-alist)) + (let ((alist rfc2047-q-encoding-alist) + (bol (save-restriction + (widen) + (gnus-point-at-bol)))) (while alist (when (looking-at (caar alist)) (quoted-printable-encode-region b e nil (cdar alist)) (subst-char-in-region (point-min) (point-max) ? ?_) (setq alist nil)) (pop alist)) - (goto-char (point-min)) - (while (not (eobp)) - (goto-char (min (point-max) (+ 64 (point)))) - (search-backward "=" (- (point) 2) t) - (unless (eobp) - (insert "\n"))))))) + ;; The size of QP encapsulation is about 20, so set limit to + ;; 56=76-20. + (unless (< (- (point-max) (point-min)) 56) + ;; Don't break if it could fit in one line. + ;; Let rfc2047-encode-region break it later. + (goto-char (1+ (point-min))) + (while (and (not (bobp)) (not (eobp))) + (goto-char (min (point-max) (+ 56 bol))) + (search-backward "=" (- (point) 2) t) + (unless (or (bobp) (eobp)) + (insert "\n") + (setq bol (point))))))))) ;;; ;;; Functions for decoding RFC2047 messages @@ -270,11 +439,17 @@ Should be called narrowed to the head of the message." (prog1 (match-string 0) (delete-region (match-beginning 0) (match-end 0))))) - (when (and (mm-multibyte-p) rfc2047-default-charset) - (mm-decode-coding-region b e rfc2047-default-charset)) + (when (and (mm-multibyte-p) + mail-parse-charset + (not (eq mail-parse-charset 'gnus-decoded))) + (mm-decode-coding-region b e mail-parse-charset)) (setq b (point))) - (when (and (mm-multibyte-p) rfc2047-default-charset) - (mm-decode-coding-region b (point-max) rfc2047-default-charset)))))) + (when (and (mm-multibyte-p) + mail-parse-charset + (not (eq mail-parse-charset 'us-ascii)) + (not (eq mail-parse-charset 'gnus-decoded))) + (mm-decode-coding-region b (point-max) mail-parse-charset)) + (rfc2047-unfold-region (point-min) (point-max)))))) (defun rfc2047-decode-string (string) "Decode the quoted-printable-encoded STRING and return the results." @@ -286,7 +461,7 @@ Should be called narrowed to the head of the message." (inline (rfc2047-decode-region (point-min) (point-max))) (buffer-string)))) - + (defun rfc2047-parse-and-decode (word) "Decode WORD and return it if it is an encoded word. Return WORD if not." @@ -305,19 +480,33 @@ Return WORD if not." "Decode STRING that uses CHARSET with ENCODING. Valid ENCODINGs are \"B\" and \"Q\". If your Emacs implementation can't decode CHARSET, it returns nil." + (if (stringp charset) + (setq charset (intern (downcase charset)))) + (if (or (not charset) + (eq 'gnus-all mail-parse-ignored-charsets) + (memq 'gnus-all mail-parse-ignored-charsets) + (memq charset mail-parse-ignored-charsets)) + (setq charset mail-parse-charset)) (let ((cs (mm-charset-to-coding-system charset))) + (if (and (not cs) charset + (listp mail-parse-ignored-charsets) + (memq 'gnus-unknown mail-parse-ignored-charsets)) + (setq cs (mm-charset-to-coding-system mail-parse-charset))) (when cs - (when (eq cs 'ascii) - (setq cs rfc2047-default-charset)) - (mm-decode-coding-string - (cond - ((equal "B" encoding) - (base64-decode-string string)) - ((equal "Q" encoding) - (quoted-printable-decode-string - (mm-replace-chars-in-string string ?_ ? ))) - (t (error "Invalid encoding: %s" encoding))) - cs)))) + (when (and (eq cs 'ascii) + mail-parse-charset) + (setq cs mail-parse-charset)) + (mm-with-unibyte-current-buffer-mule4 + ;; In Emacs Mule 4, decoding UTF-8 should be in unibyte mode. + (mm-decode-coding-string + (cond + ((equal "B" encoding) + (base64-decode-string string)) + ((equal "Q" encoding) + (quoted-printable-decode-string + (mm-replace-chars-in-string string ?_ ? ))) + (t (error "Invalid encoding: %s" encoding))) + cs))))) (provide 'rfc2047)