X-Git-Url: http://cgit.sxemacs.org/?p=gnus;a=blobdiff_plain;f=lisp%2Frfc2231.el;h=59990add137d90269899c75bd8d29cf598c54680;hp=4e5abe7f76eae3b9dc14e66b1e4aa3efeed8b182;hb=a85e72c397c89080c49b1859f55639a71f598471;hpb=f021703cfe5da8459bdafba17fd57c1f1a7ae3bc diff --git a/lisp/rfc2231.el b/lisp/rfc2231.el index 4e5abe7f7..59990add1 100644 --- a/lisp/rfc2231.el +++ b/lisp/rfc2231.el @@ -1,23 +1,22 @@ ;;; rfc2231.el --- Functions for decoding rfc2231 headers -;; Copyright (C) 1998,99 Free Software Foundation, Inc. + +;; Copyright (C) 1998-2014 Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen ;; This file is part of GNU Emacs. -;; GNU Emacs is free software; you can redistribute it and/or modify +;; GNU Emacs is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) -;; any later version. +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. ;; GNU Emacs is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License -;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. +;; along with GNU Emacs. If not, see . ;;; Commentary: @@ -26,6 +25,9 @@ (eval-when-compile (require 'cl)) (require 'ietf-drums) (require 'rfc2047) +(autoload 'mm-encode-body "mm-bodies") +(autoload 'mail-header-remove-whitespace "mail-parse") +(autoload 'mail-header-remove-comments "mail-parse") (defun rfc2231-get-value (ct attribute) "Return the value of ATTRIBUTE from CT." @@ -36,133 +38,213 @@ N.B. This is in violation with RFC2047, but it seem to be in common use." (rfc2231-parse-string (rfc2047-decode-string string))) -(defun rfc2231-parse-string (string) +(defun rfc2231-parse-string (string &optional signal-error) "Parse STRING and return a list. The list will be on the form - `(name (attribute . value) (attribute . value)...)" + `(name (attribute . value) (attribute . value)...)'. + +If the optional SIGNAL-ERROR is non-nil, signal an error when this +function fails in parsing of parameters. Otherwise, this function +must never cause a Lisp error." (with-temp-buffer (let ((ttoken (ietf-drums-token-to-list ietf-drums-text-token)) (stoken (ietf-drums-token-to-list ietf-drums-tspecials)) (ntoken (ietf-drums-token-to-list "0-9")) - (prev-value "") - display-name mailbox c display-string parameters - attribute value type subtype number encoded - prev-attribute) - (ietf-drums-init (mail-header-remove-whitespace - (mail-header-remove-comments string))) + c type attribute encoded number parameters value) + (ietf-drums-init + (condition-case nil + (mail-header-remove-whitespace + (mail-header-remove-comments string)) + ;; The most likely cause of an error is unbalanced parentheses + ;; or double-quotes. If all parentheses and double-quotes are + ;; quoted meaninglessly with backslashes, removing them might + ;; make it parsable. Let's try... + (error + (let (mod) + (when (and (string-match "\\\\\"" string) + (not (string-match "\\`\"\\|[^\\]\"" string))) + (setq string (mm-replace-in-string string "\\\\\"" "\"") + mod t)) + (when (and (string-match "\\\\(" string) + (string-match "\\\\)" string) + (not (string-match "\\`(\\|[^\\][()]" string))) + (setq string (mm-replace-in-string string "\\\\\\([()]\\)" "\\1") + mod t)) + (or (and mod + (ignore-errors + (mail-header-remove-whitespace + (mail-header-remove-comments string)))) + ;; Finally, attempt to extract only type. + (if (string-match + (concat "\\`[\t\n ]*\\([^" ietf-drums-tspecials "\t\n ]+" + "\\(?:/[^" ietf-drums-tspecials + "\t\n ]+\\)?\\)\\(?:[\t\n ;]\\|\\'\\)") + string) + (match-string 1 string) + "")))))) (let ((table (copy-syntax-table ietf-drums-syntax-table))) (modify-syntax-entry ?\' "w" table) + (modify-syntax-entry ?* " " table) + (modify-syntax-entry ?\; " " table) + (modify-syntax-entry ?= " " table) ;; The following isn't valid, but one should be liberal ;; in what one receives. (modify-syntax-entry ?\: "w" table) (set-syntax-table table)) (setq c (char-after)) (when (and (memq c ttoken) - (not (memq c stoken))) - (setq type (downcase (buffer-substring - (point) (progn (forward-sexp 1) (point))))) + (not (memq c stoken)) + (setq type (ignore-errors + (downcase + (buffer-substring (point) (progn + (forward-sexp 1) + (point))))))) ;; Do the params - (while (not (eobp)) - (setq c (char-after)) - (unless (eq c ?\;) - (error "Invalid header: %s" string)) - (forward-char 1) - ;; If c in nil, then this is an invalid header, but - ;; since elm generates invalid headers on this form, - ;; we allow it. - (when (setq c (char-after)) - (if (and (memq c ttoken) - (not (memq c stoken))) - (setq attribute - (intern - (downcase - (buffer-substring - (point) (progn (forward-sexp 1) (point)))))) - (error "Invalid header: %s" string)) - (setq c (char-after)) - (setq encoded nil) - (when (eq c ?*) - (forward-char 1) - (setq c (char-after)) - (if (not (memq c ntoken)) - (setq encoded t - number nil) - (setq number - (string-to-number - (buffer-substring - (point) (progn (forward-sexp 1) (point))))) + (condition-case err + (progn + (while (not (eobp)) (setq c (char-after)) - (when (eq c ?*) - (setq encoded t) + (unless (eq c ?\;) + (error "Invalid header: %s" string)) + (forward-char 1) + ;; If c in nil, then this is an invalid header, but + ;; since elm generates invalid headers on this form, + ;; we allow it. + (when (setq c (char-after)) + (if (and (memq c ttoken) + (not (memq c stoken))) + (setq attribute + (intern + (downcase + (buffer-substring + (point) (progn (forward-sexp 1) (point)))))) + (error "Invalid header: %s" string)) + (setq c (char-after)) + (if (eq c ?*) + (progn + (forward-char 1) + (setq c (char-after)) + (if (not (memq c ntoken)) + (setq encoded t + number nil) + (setq number + (string-to-number + (buffer-substring + (point) (progn (forward-sexp 1) (point))))) + (setq c (char-after)) + (when (eq c ?*) + (setq encoded t) + (forward-char 1) + (setq c (char-after))))) + (setq number nil + encoded nil)) + (unless (eq c ?=) + (error "Invalid header: %s" string)) (forward-char 1) - (setq c (char-after))))) - ;; See if we have any previous continuations. - (when (and prev-attribute - (not (eq prev-attribute attribute))) - (push (cons prev-attribute prev-value) parameters) - (setq prev-attribute nil - prev-value "")) - (unless (eq c ?=) - (error "Invalid header: %s" string)) - (forward-char 1) - (setq c (char-after)) - (cond - ((eq c ?\") - (setq value - (buffer-substring (1+ (point)) - (progn (forward-sexp 1) (1- (point)))))) - ((and (memq c ttoken) - (not (memq c stoken))) - (setq value (buffer-substring - (point) (progn (forward-sexp 1) (point))))) - (t - (error "Invalid header: %s" string))) - (when encoded - (setq value (rfc2231-decode-encoded-string value))) - (if number - (setq prev-attribute attribute - prev-value (concat prev-value value)) - (push (cons attribute value) parameters)))) - - ;; Take care of any final continuations. - (when prev-attribute - (push (cons prev-attribute prev-value) parameters)) - - (when type - `(,type ,@(nreverse parameters))))))) + (setq c (char-after)) + (cond + ((eq c ?\") + (setq value (buffer-substring (1+ (point)) + (progn + (forward-sexp 1) + (1- (point))))) + (when encoded + (setq value (mapconcat (lambda (c) (format "%%%02x" c)) + value "")))) + ((and (or (memq c ttoken) + ;; EXTENSION: Support non-ascii chars. + (> c ?\177)) + (not (memq c stoken))) + (setq value + (buffer-substring + (point) + (progn + ;; Jump over asterisk, non-ASCII + ;; and non-boundary characters. + (while (and c + (or (eq c ?*) + (> c ?\177) + (not (eq (char-syntax c) ? )))) + (forward-char 1) + (setq c (char-after))) + (point))))) + (t + (error "Invalid header: %s" string))) + (push (list attribute value number encoded) + parameters)))) + (error + (setq parameters nil) + (when signal-error + (signal (car err) (cdr err))))) + + ;; Now collect and concatenate continuation parameters. + (let ((cparams nil) + elem) + (loop for (attribute value part encoded) + in (sort parameters (lambda (e1 e2) + (< (or (caddr e1) 0) + (or (caddr e2) 0)))) + do (cond + ;; First part. + ((or (not (setq elem (assq attribute cparams))) + (and (numberp part) + (zerop part))) + (push (list attribute value encoded) cparams)) + ;; Repetition of a part; do nothing. + ((and elem + (null number)) + ) + ;; Concatenate continuation parts. + (t + (setcar (cdr elem) (concat (cadr elem) value))))) + ;; Finally decode encoded values. + (cons type (mapcar + (lambda (elem) + (cons (car elem) + (if (nth 2 elem) + (rfc2231-decode-encoded-string (nth 1 elem)) + (nth 1 elem)))) + (nreverse cparams)))))))) (defun rfc2231-decode-encoded-string (string) "Decode an RFC2231-encoded string. -These look like \"us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\"." - (with-temp-buffer - (let ((elems (split-string string "'"))) - ;; The encoded string may contain zero to two single-quote - ;; marks. This should give us the encoded word stripped - ;; of any preceding values. - (insert (car (last elems))) +These look like: + \"us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\", + \"us-ascii''This%20is%20%2A%2A%2Afun%2A%2A%2A\", + \"'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\", + \"''This%20is%20%2A%2A%2Afun%2A%2A%2A\", or + \"This is ***fun***\"." + (string-match "\\`\\(?:\\([^']+\\)?'\\([^']+\\)?'\\)?\\(.+\\)" string) + (let ((coding-system (mm-charset-to-coding-system + (match-string 1 string) nil t)) + ;;(language (match-string 2 string)) + (value (match-string 3 string))) + (mm-with-unibyte-buffer + (insert value) (goto-char (point-min)) - (while (search-forward "%" nil t) + (while (re-search-forward "%\\([0-9A-Fa-f][0-9A-Fa-f]\\)" nil t) (insert (prog1 - (string-to-number (buffer-substring (point) (+ (point) 2)) 16) - (delete-region (1- (point)) (+ (point) 2))))) - ;; Encode using the charset, if any. - (when (and (mm-multibyte-p) - (> (length elems) 1) - (not (equal (intern (car elems)) 'us-ascii))) - (mm-decode-coding-region (point-min) (point-max) - (intern (car elems)))) - (buffer-string)))) + (string-to-number (match-string 1) 16) + (delete-region (match-beginning 0) (match-end 0))))) + ;; Decode using the charset, if any. + (if (memq coding-system '(nil ascii)) + (buffer-string) + (mm-decode-coding-string (buffer-string) coding-system))))) (defun rfc2231-encode-string (param value) - "Return and PARAM=VALUE string encoded according to RFC2231." + "Return and PARAM=VALUE string encoded according to RFC2231. +Use `mml-insert-parameter' or `mml-insert-parameter-string' to insert +the result of this function." (let ((control (ietf-drums-token-to-list ietf-drums-no-ws-ctl-token)) (tspecial (ietf-drums-token-to-list ietf-drums-tspecials)) (special (ietf-drums-token-to-list "*'%\n\t")) (ascii (ietf-drums-token-to-list ietf-drums-text-token)) (num -1) + ;; Don't make lines exceeding 76 column. + (limit (- 74 (length param))) spacep encodep charsetp charset broken) - (with-temp-buffer + (mm-with-multibyte-buffer (insert value) (goto-char (point-min)) (while (not (eobp)) @@ -178,29 +260,37 @@ These look like \"us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\"." (forward-char 1)) (when charsetp (setq charset (mm-encode-body))) + (mm-disable-multibyte) (cond - ((or encodep charsetp) + ((or encodep charsetp + (progn + (end-of-line) + (> (current-column) (if spacep (- limit 2) limit)))) + (setq limit (- limit 6)) (goto-char (point-min)) + (insert (symbol-name (or charset 'us-ascii)) "''") (while (not (eobp)) - (when (> (current-column) 60) - (insert "\n") - (setq broken t)) (if (or (not (memq (following-char) ascii)) (memq (following-char) control) (memq (following-char) tspecial) (memq (following-char) special) (eq (following-char) ? )) (progn + (when (>= (current-column) (1- limit)) + (insert ";\n") + (setq broken t)) (insert "%" (format "%02x" (following-char))) (delete-char 1)) + (when (> (current-column) limit) + (insert ";\n") + (setq broken t)) (forward-char 1))) (goto-char (point-min)) - (insert (symbol-name (or charset 'us-ascii)) "''") - (goto-char (point-min)) (if (not broken) (insert param "*=") (while (not (eobp)) - (insert param "*" (format "%d" (incf num)) "*=") + (insert (if (>= num 0) " " "") + param "*" (format "%d" (incf num)) "*=") (forward-line 1)))) (spacep (goto-char (point-min))