5 ;; description: some support for rfc 2368 (mailto: urls)
6 ;; creation date: 1998-11-07
7 ;; author: "Sen Nagata" <sen@eccosys.com>
8 ;; warning: not optimized at all
12 ;; -browse-url.el (i think this comes w/ emacs)
13 ;; -string.el (from elib) -- this should not be necessary any more
17 ;; -place this file in a place where emacs can find it (emacs loadpath)
21 ;; -try to remove dependencies on other packages (because the code
22 ;; reuse is small, etc.)
23 ;; -i've used this code w/ emacs versions >= 20.3
27 ;; mailtoURL = "mailto:" [ to ] [ headers ]
29 ;; headers = "?" header *( "&" header )
30 ;; header = hname "=" hvalue
34 ;; "#mailbox" is as specified in RFC 822 [RFC822]. This means that it
35 ;; consists of zero or more comma-separated mail addresses, possibly
36 ;; including "phrase" and "comment" components. Note that all URL
37 ;; reserved characters in "to" must be encoded: in particular,
38 ;; parentheses, commas, and the percent sign ("%"), which commonly occur
39 ;; in the "mailbox" syntax.
41 ;; "hname" and "hvalue" are encodings of an RFC 822 header name and
42 ;; value, respectively. As with "to", all URL reserved characters must
45 ;; The special hname "body" indicates that the associated hvalue is the
46 ;; body of the message. The "body" hname should contain the content for
47 ;; the first text/plain body part of the message. The mailto URL is
48 ;; primarily intended for generation of short text messages that are
49 ;; actually the content of automatic processing (such as "subscribe"
50 ;; messages for mailing lists), not general MIME bodies.
52 ;; Within mailto URLs, the characters "?", "=", "&" are reserved.
54 ;; Because the "&" (ampersand) character is reserved in HTML, any mailto
55 ;; URL which contains an ampersand must be spelled differently in HTML
56 ;; than in other contexts. A mailto URL which appears in an HTML
57 ;; document must use "&" instead of "&".
59 ;; Also note that it is legal to specify both "to" and an "hname" whose
60 ;; value is "to". That is,
62 ;; mailto:addr1%2C%20addr2
66 ;; mailto:?to=addr1%2C%20addr2
70 ;; mailto:addr1?to=addr2
72 ;; 8-bit characters in mailto URLs are forbidden. MIME encoded words (as
73 ;; defined in [RFC2047]) are permitted in header values, but not for any
74 ;; part of a "body" hname.
76 (defconst mailto-version "mailto.el 0.8")
78 ;; it would be nice to have a function which could do the equivalent of
79 ;; perl's /e...so i wrote a limited version. see
80 ;; 'string-replace-match-using-function'
82 ;; not using string-replace-match from string.el from elib
85 ;; we could use 'url-unhex-string' from url.el in w3, but i don't want
86 ;; to force people to get w3 to use this...
87 (fset 'mailto-unhexify-string 'my-unhexify-string)
89 ;; uses 'browse-url', but this is loaded autmatically?
90 ;; i need to perform this 'require' explicity, because if i don't,
91 ;; 'browse-url-url-at-point' doesn't seem to get defined if emacs is
92 ;; running in terminal mode
95 ;; removing explicit dependencies on browse-url in code that uses mailto.el
96 (fset 'mailto-url-at-point 'browse-url-url-at-point)
98 ;; only an approximation...
100 (defconst mailto-url-regexp
101 ; "^\\([-a-zA-Z0-9+.]+:\\)\\([^?]*\\)\\?\\(.*\\)")
102 ; isn't there a better way to achieve case insensitivity?
103 ; need a better regexp...could we set 'case-fold-search' in
104 ; functions that use this regular expression?
105 ; "^\\([mM][aA][iI][lL][tT][oO]:\\)\\([^?]*\\)\\(\\?\\(.*\\)\\)?")
106 ; "\\([mM][aA][iI][lL][tT][oO]:\\)\\([^?]*\\)\\(\\?\\(.*\\)\\)?"
107 "\\([mM][aA][iI][lL][tT][oO]:\\)\\([^?]+\\)\\(\\?\\(.*\\)\\)*"
109 ; "\\([mM][aA][iI][lL][tT][oO]:\\)\\([^?]*\\)\\(\\?\\(.*\\)\\)*[^>]*.*$")
111 ;; describes 'mailto:'
112 (defconst mailto-url-scheme-index 1)
113 ;; describes the portion of the url between 'mailto:' and '?'
114 ;; i'm going to call this part the 'prequery'
115 (defconst mailto-url-prequery-index 2)
116 ;; describes the portion of the url after '?'
117 ;; i'm going to call this part the 'query'
118 (defconst mailto-url-query-index 4)
120 ;; replace all occurences of "&" w/ "&"
121 ;; warning: haven't tested this lately
122 (defun mailto-html-unescape-url (mailto-url)
123 ; (string-replace-match "&" mailto-url "&" t t))
124 ;; much slower but independent of string.el
125 (string-replace-match-using-function
131 ;; parse a mailto: url into parts and return an alist containing these
132 ;; components. returned alist looks something like:
134 ;; (("To:" . "mailinglist@domain.com")
135 ;; ("Subject:" . "this is a subject")
136 ;; ("Body:" . "this is a body"))
138 ;; WARNING: the mailto: url is assumed NOT to have been taken from html --
139 ;; so it should have '&' and not '&'...
140 (defun mailto-parse-url (mailto-url)
141 (let (scheme prequery query mailto-alist)
142 (string-match mailto-url-regexp mailto-url)
144 ;; possibly replace all occurences of "&" w/ "&"...
145 ;(setq mailto-url (mailto-html-unescape-url mailto-url))
149 (match-string mailto-url-scheme-index mailto-url))
152 ;; do we need to unhexify 'prequery'?
154 (match-string mailto-url-prequery-index mailto-url))
156 (match-string mailto-url-query-index mailto-url))
158 ;; build our mailto-alist if 'query' is non-empty
159 (if (not (null query))
163 ;; there's a better way...
164 (let* ((temp-list (split-string x "="))
165 (header-name (car temp-list))
166 (header-value (cadr temp-list)))
167 ;; each element of our result looks something like:
169 ;; ("From:" . "god@heaven.org")
171 ;; capitalize and append colon to an unhexified header-name
173 (capitalize (mailto-unhexify-string header-name))
175 ;; unhexify header-value
176 (mailto-unhexify-string header-value))))
177 (split-string query "&"))))
179 ;; if there is a 'prequery' portion, convert this part into the second
180 ;; form described on page 2 of rfc 2368:
182 ;; mailto:?to=addr1%2C%20addr2
184 ;; isn't it legal for this to be:
186 ;; mailto:?to=addr1,%20addr2
190 ;; actually, don't bother converting...but modify mailto-alist
191 ;; if necessary w.r.t. the "To:" cons cell
193 ;; we need to do more if the 'prequery' portion is not empty
194 (if (not (string-equal "" prequery))
196 ;; if there's already a "To:" cons cell, modify the value
197 (if (assoc "To:" mailto-alist)
198 (let* ((our-cons-cell
199 (assoc "To:" mailto-alist))
200 (our-car ; unnecessary
203 (cdr our-cons-cell)))
206 (concat our-cdr ", " prequery)))
207 ;; there isn't a "To:" cons cell, so make one and add it
210 (cons "To:" prequery)
213 ;; the value we return...
216 (defun mailto-extract-mailto-urls-from-string (string)
217 "Extract mailto: urls from a string, STRING.
219 STRING may contain multiple mailto: urls. The results are returned
220 as a list. If there are no mailto: urls, nil is returned."
221 (string-match-global "mailto:[^>]+" string t)
224 ;; what i want is something like m//g in perl...
225 (defun string-match-global (regexp string &optional case-insensitive)
226 "Extract strings in STRING which match REGEXP.
228 The result is returned as a list of the matched strings. If there were
229 no matches, nil is returned.
231 Optional arg CASE-INSENSTIVE toggles whether the search is case-insensitive.
232 By default, the search is case-sensitive."
233 (let ((case-fold-search case-insensitive)
234 (string-length (length string))
237 (while (< current-position string-length)
238 (if (setq current-position
239 (string-match regexp string current-position))
241 ;; keep track of each match
243 (cons (substring string
247 ;; prepare to search rest of string
248 (setq current-position (match-end 0)))
249 ;; there were no more matches, so make the loop end
250 ;; by making things so that the loop condition fails
251 (setq current-position string-length)))
256 ;; i'll make a better version some time...
257 ;; want to be able to hand my-func multiple arguments, may be...
258 ;; perhaps we can use '&rest args'...
259 (defun string-replace-match-using-function
260 (regexp string func &optional global)
261 "Replace match of REGEXP in STRING with result of evaluating FUNC.
262 If not match is found, string should be returned as is.
264 Optional arg GLOBAL means to replace all matches instead of only the first."
265 (let ((result-string "")
266 (case-fold-search t))
268 ;; look for every occurence?
270 (let ((last-position 0)
272 (string-length (length string)))
275 (while (< current-position string-length)
277 (if (setq current-position
278 (string-match regexp string current-position))
280 ;; there was a match, so append pre-match portion and
281 ;; transformed match portion
283 ;; work on building result-string
285 (concat result-string
286 ;; what didn't match which came before the
287 ;; match and after the last match (if any)
291 ;; transform what matched...
297 ;; update where we are looking in the string
298 (setq current-position (match-end 0))
299 (setq last-position current-position))
301 ;; there was no match, so append the rest of the string to
304 ;; finish building result-string
306 (concat result-string
309 ;; do this to fall out of the loop -- should be a better way
310 (setq current-position string-length))
313 ;; not global -- only do things once
314 (if (not (string-match regexp string 0))
315 (setq result-string string)
317 (concat (substring string 0 (match-beginning 0))
322 (substring string (match-end 0))))))
328 ;; warning: no uppercase letters -- perhaps i should do something about that...
329 (defun hex-to-dec (hex-digit)
330 "Convert a hex digit [0-9a-f] to decimal."
331 (if (>= hex-digit ?a)
332 (+ (- hex-digit ?a) 10)
335 ;; convert from a hex byte string (e.g. '20') to a string of one character
336 ;; (e.g. '20' -> ' ')
337 (defun hex-byte-string-to-char-string (hex-byte-string)
338 ;; convert to a string of one character
340 ;; add the converted byte values together
342 ;; convert the right/low byte
344 (string-to-char (substring hex-byte-string 1 2)))
345 ;; convert the left/high byte
348 (string-to-char (substring hex-byte-string 0 1)))))))
350 ;; takes a string like %20 and returns the corresponding ascii character
351 ;; should just place a lambda version of this in my-unhexify-string...
352 (defun our-func (string)
353 (hex-byte-string-to-char-string
354 (substring string 1 3)))
356 ;; just to remove the w3 dependency...
357 (defun my-unhexify-string (hex-string)
358 (string-replace-match-using-function
359 "%\\([0-9a-f][0-9a-f]\\)" hex-string 'our-func t))
364 ;(string-replace-match-using-function "%\\([0-9a-f][0-9a-f]\\)"
371 ; (defun string-replace-match-using-function (regexp string func &optional global)
372 ; "Replace match of REGEXP in STRING using the result of evaluating FUNC on the matched string.
373 ; If no match is found, nil is returned instead of the new string.
375 ; Optional arg GLOBAL means to replace all matches instead of only the first."
377 ; (let ((data (match-data)))
385 ; (while (string-match regexp string start)
386 ; (setq matchbeginning (match-beginning 0)
387 ; matchend (match-end 0)
388 ; result (concat result
389 ; (substring string start matchbeginning)
392 ; (match-beginning 0)
396 ; (if matchbeginning ; matched at least once
397 ; (concat result (substring string start))
401 ; (if (not (string-match regexp string 0))
403 ; (concat (substring string 0 (match-beginning 0))
406 ; (match-beginning 0)
408 ; (substring string (match-end 0)))))
409 ; (store-match-data data))))
412 ;(string-replace-match-using-function
413 ; "%\\([0-9a-f][0-9a-f]\\)"
414 ; "mailto:majordomo@huis-clos.mit.edu?body=unsubscribe%20scwm-discuss"
418 ; (defun string-replace-match-using-function
419 ; (regexp string func &optional global args)
420 ; "Replace match of REGEXP in STRING using the result of evaluating FUNC on the matched string.
421 ; If no match is found, nil is returned instead of the new string.
423 ; Optional arg GLOBAL means to replace all matches instead of only the first.
425 ; Optional arg ARGS is a list of indeces of subexpressions of REGEXP -- the
426 ; intended use is to construct a list of arguments (the matching substrings)
429 ; (let ((data (match-data)))
430 ; ; if 'args' was not specified, set it to a list containing 0 --
431 ; ; this means use the whole matched string as an argument to 'func'
433 ; (setq args (list 0)))
437 ; ;; find all matches
444 ; ; while there are matches...
445 ; (while (string-match regexp string start)
447 ; (setq matchbeginning (match-beginning 0)
448 ; matchend (match-end 0)
449 ; ; compute arguments to 'func'
453 ; (match-beginning index)
454 ; (match-end index)))
456 ; ; compute the resulting string
459 ; (substring string start matchbeginning)
460 ; ; compute the replacement text
461 ; (apply func func-args))
464 ; (if matchbeginning ; matched at least once
465 ; (concat result (substring string start))
469 ; (if (not (string-match regexp string 0))
472 ; ; compute arguments to 'func'
473 ; (setq func-args (mapcar
476 ; (match-beginning index)
477 ; (match-end index)))
479 ; ; compute the resulting string
480 ; (concat (substring string 0 (match-beginning 0))
481 ; ; compute the replacement text
482 ; (apply func func-args)
483 ; (substring string (match-end 0)))))
484 ; (store-match-data data)))))
487 ; (string-replace-match-using-function
488 ; "%\\([0-9a-f][0-9a-f]\\)"
489 ; "mailto:majordomo@huis-clos.mit.edu?body=unsubscribe%20scwm-discuss"
494 ; (string-replace-match-using-function
495 ; "%\\([0-9a-f][0-9a-f]\\)"
496 ; "mailto:majordomo@huis-clos.mit.edu?body=unsubscribe%20scwm-discuss"
497 ; 'hex-byte-string-to-char-string