;;; qp.el --- Quoted-Printable functions
-;; Copyright (C) 1998,99 Free Software Foundation, Inc.
+
+;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+;; 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
+;; Keywords: mail, extensions
+
;; This file is part of GNU Emacs.
-;; GNU Emacs is free software; you can redistribute it and/or modify
+;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 2, or (at your option)
-;; any later version.
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
-;; along with GNU Emacs; see the file COPYING. If not, write to the
-;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA.
+;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
+;; Functions for encoding and decoding quoted-printable text as
+;; defined in RFC 2045.
+
;;; Code:
-(defvar quoted-printable-encoding-characters
- (mapcar 'identity "0123456789ABCDEFabcdef"))
+(require 'mm-util)
+(defvar mm-use-ultra-safe-encoding)
-(defun quoted-printable-decode-region (from to)
- "Decode quoted-printable in the region between FROM and TO."
- (interactive "r")
+;;;###autoload
+(defun quoted-printable-decode-region (from to &optional coding-system)
+ "Decode quoted-printable in the region between FROM and TO, per RFC 2045.
+If CODING-SYSTEM is non-nil, decode bytes into characters with that
+coding-system.
+
+Interactively, you can supply the CODING-SYSTEM argument
+with \\[universal-coding-system-argument].
+
+The CODING-SYSTEM argument is a historical hangover and is deprecated.
+QP encodes raw bytes and should be decoded into raw bytes. Decoding
+them into characters should be done separately."
+ (interactive
+ ;; Let the user determine the coding system with "C-x RET c".
+ (list (region-beginning) (region-end) coding-system-for-read))
+ (unless (mm-coding-system-p coding-system) ; e.g. `ascii' from Gnus
+ (setq coding-system nil))
(save-excursion
- (goto-char from)
- (while (search-forward "=" to t)
- (cond
- ;; End of the line.
- ((eq (char-after) ?\n)
- (delete-char -1)
- (delete-char 1))
- ;; Encoded character.
- ((and
- (memq (char-after) quoted-printable-encoding-characters)
- (memq (char-after (1+ (point)))
- quoted-printable-encoding-characters))
- (subst-char-in-region
- (1- (point)) (point) ?=
- (string-to-number
- (buffer-substring (point) (+ 2 (point)))
- 16))
- (delete-char 2))
- ;; Quoted equal sign.
- ((eq (char-after) ?=)
- (delete-char 1))
- ;; End of buffer.
- ((eobp)
- (delete-char -1))
- ;; Invalid.
- (t
- (message "Malformed MIME quoted-printable message"))))))
-
-(defun quoted-printable-decode-string (string)
- "Decode the quoted-printable-encoded STRING and return the results."
- (with-temp-buffer
+ (save-restriction
+ ;; RFC 2045: ``An "=" followed by two hexadecimal digits, one
+ ;; or both of which are lowercase letters in "abcdef", is
+ ;; formally illegal. A robust implementation might choose to
+ ;; recognize them as the corresponding uppercase letters.''
+ (let ((case-fold-search t))
+ (narrow-to-region from to)
+ ;; Do this in case we're called from Gnus, say, in a buffer
+ ;; which already contains non-ASCII characters which would
+ ;; then get doubly-decoded below.
+ (if coding-system
+ (mm-encode-coding-region (point-min) (point-max) coding-system))
+ (goto-char (point-min))
+ (while (and (skip-chars-forward "^=")
+ (not (eobp)))
+ (cond ((eq (char-after (1+ (point))) ?\n)
+ (delete-char 2))
+ ((looking-at "=[0-9A-F][0-9A-F]")
+ (let ((byte (string-to-number (buffer-substring (1+ (point))
+ (+ 3 (point)))
+ 16)))
+ (mm-insert-byte byte 1)
+ (delete-char 3)))
+ (t
+ (message "Malformed quoted-printable text")
+ (forward-char)))))
+ (if coding-system
+ (mm-decode-coding-region (point-min) (point-max) coding-system)))))
+
+(defun quoted-printable-decode-string (string &optional coding-system)
+ "Decode the quoted-printable encoded STRING and return the result.
+If CODING-SYSTEM is non-nil, decode the string with coding-system.
+Use of CODING-SYSTEM is deprecated; this function should deal with
+raw bytes, and coding conversion should be done separately."
+ (mm-with-unibyte-buffer
(insert string)
- (quoted-printable-decode-region (point-min) (point-max))
+ (quoted-printable-decode-region (point-min) (point-max) coding-system)
(buffer-string)))
(defun quoted-printable-encode-region (from to &optional fold class)
- "QP-encode the region between FROM and TO.
+ "Quoted-printable encode the region between FROM and TO per RFC 2045.
-If FOLD fold long lines. If CLASS, translate the characters
-matched by that regexp.
+If FOLD, fold long lines at 76 characters (as required by the RFC).
+If CLASS is non-nil, translate the characters not matched by that
+regexp class, which is in the form expected by `skip-chars-forward'.
+You should probably avoid non-ASCII characters in this arg.
-If `mm-use-ultra-safe-encoding' is set, fold unconditionally and
+If `mm-use-ultra-safe-encoding' is set, fold lines unconditionally and
encode lines starting with \"From\"."
(interactive "r")
+ (unless class
+ ;; Avoid using 8bit characters. = is \075.
+ ;; Equivalent to "^\000-\007\013\015-\037\200-\377="
+ (setq class "\010-\012\014\040-\074\076-\177"))
(save-excursion
+ (goto-char from)
+ (if (re-search-forward (mm-string-to-multibyte "[^\x0-\x7f\x80-\xff]")
+ to t)
+ (error "Multibyte character in QP encoding region"))
(save-restriction
(narrow-to-region from to)
- ;; (mm-encode-body)
;; Encode all the non-ascii and control characters.
(goto-char (point-min))
- (while (and (skip-chars-forward
- (or class "^\000-\007\013\015-\037\200-\377="))
+ (while (and (skip-chars-forward class)
(not (eobp)))
(insert
(prog1
- (upcase (format "=%02x" (char-after)))
+ ;; To unibyte in case of Emacs 23 (unicode) eight-bit.
+ (format "=%02X" (mm-multibyte-char-to-unibyte (char-after)))
(delete-char 1))))
;; Encode white space at the end of lines.
(goto-char (point-min))
(while (not (eolp))
(insert
(prog1
- (upcase (format "=%02x" (char-after)))
+ (format "=%02X" (char-after))
(delete-char 1)))))
- (when (or fold mm-use-ultra-safe-encoding)
- ;; Fold long lines.
- (goto-char (point-min))
- (while (not (eobp))
- ;; In ultra-safe mode, encode "From " at the beginning of a
- ;; line.
- (when mm-use-ultra-safe-encoding
- (beginning-of-line)
- (when (looking-at "From ")
- (replace-match "From=20" nil t)))
- (end-of-line)
- (while (> (current-column) 72)
- (beginning-of-line)
- (forward-char 71);; 71 char plus an "="
- (search-backward "=" (- (point) 2) t)
- (insert "=\n")
- (end-of-line))
- (unless (eobp)
- (forward-line)))))))
+ (let ((mm-use-ultra-safe-encoding
+ (and (boundp 'mm-use-ultra-safe-encoding)
+ mm-use-ultra-safe-encoding)))
+ (when (or fold mm-use-ultra-safe-encoding)
+ (let ((tab-width 1)) ; HTAB is one character.
+ (goto-char (point-min))
+ (while (not (eobp))
+ ;; In ultra-safe mode, encode "From " at the beginning
+ ;; of a line.
+ (when mm-use-ultra-safe-encoding
+ (if (looking-at "From ")
+ (replace-match "From=20" nil t)
+ (if (looking-at "-")
+ (replace-match "=2D" nil t))))
+ (end-of-line)
+ ;; Fold long lines.
+ (while (> (current-column) 76) ; tab-width must be 1.
+ (beginning-of-line)
+ (forward-char 75) ; 75 chars plus an "="
+ (search-backward "=" (- (point) 2) t)
+ (insert "=\n")
+ (end-of-line))
+ (forward-line))))))))
(defun quoted-printable-encode-string (string)
- "QP-encode STRING and return the results."
- (mm-with-unibyte-buffer
+ "Encode the STRING as quoted-printable and return the result."
+ (with-temp-buffer
+ (if (mm-multibyte-string-p string)
+ (mm-enable-multibyte)
+ (mm-disable-multibyte))
(insert string)
(quoted-printable-encode-region (point-min) (point-max))
(buffer-string)))
(provide 'qp)
-;; qp.el ends here
+;;; qp.el ends here