1 ;;; rfc2231.el --- Functions for decoding rfc2231 headers
3 ;; Copyright (C) 1998-2014 Free Software Foundation, Inc.
5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
6 ;; This file is part of GNU Emacs.
8 ;; GNU Emacs is free software: you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation, either version 3 of the License, or
11 ;; (at your option) any later version.
13 ;; GNU Emacs is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25 (eval-when-compile (require 'cl))
28 (autoload 'mm-encode-body "mm-bodies")
29 (autoload 'mail-header-remove-whitespace "mail-parse")
30 (autoload 'mail-header-remove-comments "mail-parse")
32 (defun rfc2231-get-value (ct attribute)
33 "Return the value of ATTRIBUTE from CT."
34 (cdr (assq attribute (cdr ct))))
36 (defun rfc2231-parse-qp-string (string)
37 "Parse QP-encoded string using `rfc2231-parse-string'.
38 N.B. This is in violation with RFC2047, but it seem to be in common use."
39 (rfc2231-parse-string (rfc2047-decode-string string)))
41 (defun rfc2231-parse-string (string &optional signal-error)
42 "Parse STRING and return a list.
43 The list will be on the form
44 `(name (attribute . value) (attribute . value)...)'.
46 If the optional SIGNAL-ERROR is non-nil, signal an error when this
47 function fails in parsing of parameters. Otherwise, this function
48 must never cause a Lisp error."
50 (let ((ttoken (ietf-drums-token-to-list ietf-drums-text-token))
51 (stoken (ietf-drums-token-to-list ietf-drums-tspecials))
52 (ntoken (ietf-drums-token-to-list "0-9"))
53 c type attribute encoded number parameters value)
56 (mail-header-remove-whitespace
57 (mail-header-remove-comments string))
58 ;; The most likely cause of an error is unbalanced parentheses
59 ;; or double-quotes. If all parentheses and double-quotes are
60 ;; quoted meaninglessly with backslashes, removing them might
61 ;; make it parsable. Let's try...
64 (when (and (string-match "\\\\\"" string)
65 (not (string-match "\\`\"\\|[^\\]\"" string)))
66 (setq string (mm-replace-in-string string "\\\\\"" "\"")
68 (when (and (string-match "\\\\(" string)
69 (string-match "\\\\)" string)
70 (not (string-match "\\`(\\|[^\\][()]" string)))
71 (setq string (mm-replace-in-string string "\\\\\\([()]\\)" "\\1")
75 (mail-header-remove-whitespace
76 (mail-header-remove-comments string))))
77 ;; Finally, attempt to extract only type.
79 (concat "\\`[\t\n ]*\\([^" ietf-drums-tspecials "\t\n ]+"
80 "\\(?:/[^" ietf-drums-tspecials
81 "\t\n ]+\\)?\\)\\(?:[\t\n ;]\\|\\'\\)")
83 (match-string 1 string)
85 (let ((table (copy-syntax-table ietf-drums-syntax-table)))
86 (modify-syntax-entry ?\' "w" table)
87 (modify-syntax-entry ?* " " table)
88 (modify-syntax-entry ?\; " " table)
89 (modify-syntax-entry ?= " " table)
90 ;; The following isn't valid, but one should be liberal
91 ;; in what one receives.
92 (modify-syntax-entry ?\: "w" table)
93 (set-syntax-table table))
95 (when (and (memq c ttoken)
97 (setq type (ignore-errors
99 (buffer-substring (point) (progn
106 (setq c (char-after))
108 (error "Invalid header: %s" string))
110 ;; If c in nil, then this is an invalid header, but
111 ;; since elm generates invalid headers on this form,
113 (when (setq c (char-after))
114 (if (and (memq c ttoken)
115 (not (memq c stoken)))
120 (point) (progn (forward-sexp 1) (point))))))
121 (error "Invalid header: %s" string))
122 (setq c (char-after))
126 (setq c (char-after))
127 (if (not (memq c ntoken))
133 (point) (progn (forward-sexp 1) (point)))))
134 (setq c (char-after))
138 (setq c (char-after)))))
142 (error "Invalid header: %s" string))
144 (setq c (char-after))
147 (setq value (buffer-substring (1+ (point))
152 (setq value (mapconcat (lambda (c) (format "%%%02x" c))
154 ((and (or (memq c ttoken)
155 ;; EXTENSION: Support non-ascii chars.
157 (not (memq c stoken)))
162 ;; Jump over asterisk, non-ASCII
163 ;; and non-boundary characters.
167 (not (eq (char-syntax c) ? ))))
169 (setq c (char-after)))
172 (error "Invalid header: %s" string)))
173 (push (list attribute value number encoded)
176 (setq parameters nil)
178 (signal (car err) (cdr err)))))
180 ;; Now collect and concatenate continuation parameters.
183 (loop for (attribute value part encoded)
184 in (sort parameters (lambda (e1 e2)
189 ((or (not (setq elem (assq attribute cparams)))
192 (push (list attribute value encoded) cparams))
193 ;; Repetition of a part; do nothing.
197 ;; Concatenate continuation parts.
199 (setcar (cdr elem) (concat (cadr elem) value)))))
200 ;; Finally decode encoded values.
205 (rfc2231-decode-encoded-string (nth 1 elem))
207 (nreverse cparams))))))))
209 (defun rfc2231-decode-encoded-string (string)
210 "Decode an RFC2231-encoded string.
212 \"us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\",
213 \"us-ascii''This%20is%20%2A%2A%2Afun%2A%2A%2A\",
214 \"'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\",
215 \"''This%20is%20%2A%2A%2Afun%2A%2A%2A\", or
216 \"This is ***fun***\"."
217 (string-match "\\`\\(?:\\([^']+\\)?'\\([^']+\\)?'\\)?\\(.+\\)" string)
218 (let ((coding-system (mm-charset-to-coding-system
219 (match-string 1 string) nil t))
220 ;;(language (match-string 2 string))
221 (value (match-string 3 string)))
222 (mm-with-unibyte-buffer
224 (goto-char (point-min))
225 (while (re-search-forward "%\\([0-9A-Fa-f][0-9A-Fa-f]\\)" nil t)
228 (string-to-number (match-string 1) 16)
229 (delete-region (match-beginning 0) (match-end 0)))))
230 ;; Decode using the charset, if any.
231 (if (memq coding-system '(nil ascii))
233 (mm-decode-coding-string (buffer-string) coding-system)))))
235 (defun rfc2231-encode-string (param value)
236 "Return and PARAM=VALUE string encoded according to RFC2231.
237 Use `mml-insert-parameter' or `mml-insert-parameter-string' to insert
238 the result of this function."
239 (let ((control (ietf-drums-token-to-list ietf-drums-no-ws-ctl-token))
240 (tspecial (ietf-drums-token-to-list ietf-drums-tspecials))
241 (special (ietf-drums-token-to-list "*'%\n\t"))
242 (ascii (ietf-drums-token-to-list ietf-drums-text-token))
244 ;; Don't make lines exceeding 76 column.
245 (limit (- 74 (length param)))
246 spacep encodep charsetp charset broken)
247 (mm-with-multibyte-buffer
249 (goto-char (point-min))
252 ((or (memq (following-char) control)
253 (memq (following-char) tspecial)
254 (memq (following-char) special))
256 ((eq (following-char) ? )
258 ((not (memq (following-char) ascii))
262 (setq charset (mm-encode-body)))
263 (mm-disable-multibyte)
265 ((or encodep charsetp
268 (> (current-column) (if spacep (- limit 2) limit))))
269 (setq limit (- limit 6))
270 (goto-char (point-min))
271 (insert (symbol-name (or charset 'us-ascii)) "''")
273 (if (or (not (memq (following-char) ascii))
274 (memq (following-char) control)
275 (memq (following-char) tspecial)
276 (memq (following-char) special)
277 (eq (following-char) ? ))
279 (when (>= (current-column) (1- limit))
282 (insert "%" (format "%02x" (following-char)))
284 (when (> (current-column) limit)
288 (goto-char (point-min))
292 (insert (if (>= num 0) " " "")
293 param "*" (format "%d" (incf num)) "*=")
296 (goto-char (point-min))
298 (goto-char (point-max))
301 (goto-char (point-min))
307 ;;; rfc2231.el ends here