;;; qp.el --- Quoted-Printable functions
-;; Copyright (C) 1998,99 Free Software Foundation, Inc.
+
+;; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
+;; Keywords: mail, extensions
+
;; This file is part of GNU Emacs.
;; GNU Emacs is free software; you can redistribute it and/or modify
;;; Commentary:
+;; Functions for encoding and decoding quoted-printable text as
+;; defined in RFC 2045.
+
;;; Code:
-(defvar quoted-printable-encoding-characters
- (mapcar 'identity "0123456789ABCDEF"))
+(require 'mm-util)
+(eval-when-compile (defvar mm-use-ultra-safe-encoding))
-(defun quoted-printable-decode-region (from to)
- "Decode quoted-printable in the region between FROM and TO."
+(defun quoted-printable-decode-region (from to &optional coding-system)
+ "Decode quoted-printable in the region between FROM and TO, per RFC 2045.
+If CODING-SYSTEM is non-nil, decode bytes into characters with that
+coding-system."
(interactive "r")
+ (unless (mm-coding-system-p coding-system) ; e.g. `ascii' from Gnus
+ (setq coding-system nil))
(save-excursion
- (goto-char from)
- (while (search-forward "=" to t)
- (cond
- ;; End of the line.
- ((eq (char-after) ?\n)
- (delete-char -1)
- (delete-char 1))
- ;; Encoded character.
- ((and
- (memq (char-after) quoted-printable-encoding-characters)
- (memq (char-after (1+ (point)))
- quoted-printable-encoding-characters))
- (subst-char-in-region
- (1- (point)) (point) ?=
- (string-to-number
- (buffer-substring (point) (+ 2 (point)))
- 16))
- (delete-char 2))
- ;; Quoted equal sign.
- ((eq (char-after) ?=)
- (delete-char 1))
- ;; End of buffer.
- ((eobp)
- (delete-char -1))
- ;; Invalid.
- (t
- (message "Malformed MIME quoted-printable message"))))))
-
-(defun quoted-printable-decode-string (string)
- "Decode the quoted-printable-encoded STRING and return the results."
- (with-temp-buffer
- (insert string)
- (quoted-printable-decode-region (point-min) (point-max))
- (buffer-string)))
+ (save-restriction
+ ;; RFC 2045: ``An "=" followed by two hexadecimal digits, one
+ ;; or both of which are lowercase letters in "abcdef", is
+ ;; formally illegal. A robust implementation might choose to
+ ;; recognize them as the corresponding uppercase letters.''
+ (let ((case-fold-search t))
+ (narrow-to-region from to)
+ ;; Do this in case we're called from Gnus, say, in a buffer
+ ;; which already contains non-ASCII characters which would
+ ;; then get doubly-decoded below.
+ (if coding-system
+ (mm-encode-coding-region (point-min) (point-max) coding-system))
+ (goto-char (point-min))
+ (while (and (skip-chars-forward "^=" to)
+ (not (eobp)))
+ (cond ((eq (char-after (1+ (point))) ?\n)
+ (delete-char 2))
+ ((looking-at "=[0-9A-F][0-9A-F]")
+ (let ((byte (string-to-int (buffer-substring (1+ (point))
+ (+ 3 (point)))
+ 16)))
+ (insert byte)
+ (delete-char 3)
+ (unless (eq byte ?=)
+ (backward-char))))
+ (t
+ (error "Malformed quoted-printable text")
+ (forward-char)))))
+ (if coding-system
+ (mm-decode-coding-region (point-min) (point-max) coding-system)))))
+
+(defun quoted-printable-decode-string (string &optional coding-system)
+ "Decode the quoted-printable encoded STRING and return the result.
+If CODING-SYSTEM is non-nil, decode the region with coding-system."
+ (with-temp-buffer
+ (insert string)
+ (quoted-printable-decode-region (point-min) (point-max) coding-system)
+ (buffer-string)))
(defun quoted-printable-encode-region (from to &optional fold class)
- "QP-encode the region between FROM and TO.
-If FOLD, fold long lines. If CLASS, translate the characters
-matched by that regexp."
+ "Quoted-printable encode the region between FROM and TO per RFC 2045.
+
+If FOLD, fold long lines at 76 characters (as required by the RFC).
+If CLASS is non-nil, translate the characters matched by that class in
+the form expected by `skip-chars-forward'.
+
+If `mm-use-ultra-safe-encoding' is set, fold lines unconditionally and
+encode lines starting with \"From\"."
(interactive "r")
+ (unless class
+ ;; Avoid using 8bit characters. = is \075.
+ ;; Equivalent to "^\000-\007\013\015-\037\200-\377="
+ (setq class "\010-\012\014\040-\074\076-\177"))
+ (if (fboundp 'string-as-multibyte)
+ (setq class (string-as-multibyte class)))
(save-excursion
(save-restriction
(narrow-to-region from to)
- (mm-encode-body)
- (goto-char (point-min))
- (while (and (skip-chars-forward
- (or class "^\000-\007\013\015-\037\200-\377="))
- (not (eobp)))
- (insert
- (prog1
- (upcase (format "=%x" (char-after)))
- (delete-char 1))))
- (when fold
- ;; Fold long lines.
+ (mm-with-unibyte-current-buffer-mule4
+ ;; Fixme: what should this do in XEmacs/Mule?
+ (if (fboundp 'find-charset-region) ; else XEmacs, non-Mule
+ (if (delq 'unknown ; Emacs 20 unibyte
+ (delq 'eight-bit-graphic ; Emacs 21
+ (delq 'eight-bit-control
+ (delq 'ascii
+ (find-charset-region from to)))))
+ (error "Multibyte character in QP encoding region")))
+ ;; Encode all the non-ascii and control characters.
(goto-char (point-min))
- (end-of-line)
- (while (> (current-column) 72)
- (beginning-of-line)
- (forward-char 72)
- (search-backward "=" (- (point) 2) t)
- (insert "=\n")
- (end-of-line))))))
-
+ (while (and (skip-chars-forward class)
+ (not (eobp)))
+ (insert
+ (prog1
+ (format "=%02X" (char-after))
+ (delete-char 1))))
+ ;; Encode white space at the end of lines.
+ (goto-char (point-min))
+ (while (re-search-forward "[ \t]+$" nil t)
+ (goto-char (match-beginning 0))
+ (while (not (eolp))
+ (insert
+ (prog1
+ (format "=%02X" (char-after))
+ (delete-char 1)))))
+ (let ((mm-use-ultra-safe-encoding
+ (and (boundp 'mm-use-ultra-safe-encoding)
+ mm-use-ultra-safe-encoding)))
+ (when (or fold mm-use-ultra-safe-encoding)
+ ;; Fold long lines.
+ (let ((tab-width 1)) ; HTAB is one character.
+ (goto-char (point-min))
+ (while (not (eobp))
+ ;; In ultra-safe mode, encode "From " at the beginning
+ ;; of a line.
+ (when mm-use-ultra-safe-encoding
+ (beginning-of-line)
+ (if (looking-at "From ")
+ (replace-match "From=20" nil t)
+ (if (looking-at "-")
+ (replace-match "=2D" nil t))))
+ (end-of-line)
+ (while (> (current-column) 76) ; tab-width must be 1.
+ (beginning-of-line)
+ (forward-char 75) ; 75 chars plus an "="
+ (search-backward "=" (- (point) 2) t)
+ (insert "=\n")
+ (end-of-line))
+ (unless (eobp)
+ (forward-line))))))))))
+
(defun quoted-printable-encode-string (string)
- "QP-encode STRING and return the results."
- (mm-with-unibyte-buffer
- (insert string)
- (quoted-printable-encode-region (point-min) (point-max))
- (buffer-string)))
+ "Encode the STRING as quoted-printable and return the result."
+ (with-temp-buffer
+ (insert string)
+ (quoted-printable-encode-region (point-min) (point-max))
+ (buffer-string)))
(provide 'qp)
-;; qp.el ends here
+;;; qp.el ends here