1 ;;; nnwarchive.el --- interfacing with web archives
2 ;; Copyright (C) 1999 Free Software Foundation, Inc.
4 ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu>
7 ;; This file is part of GNU Emacs.
9 ;; GNU Emacs is free software; you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published
11 ;; by the Free Software Foundation; either version 2, or (at your
12 ;; option) any later version.
14 ;; GNU Emacs is distributed in the hope that it will be useful, but
15 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 ;; General Public License for more details.
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs; see the file COPYING. If not, write to the
21 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22 ;; Boston, MA 02111-1307, USA.
26 ;; Note: You need to have `url' and `w3' installed for this backend to
29 ;; A lot of codes stolen from mail-source, nnslashdot, nnweb.
32 ;; 1. To support more web archives.
33 ;; 2. Support nnwarchive-xover-is-evil.
35 ;; Known bugs: in w3 0.44, there are two copies of url-maybe-relative.
36 ;; If it is loaded from w3.el, (load-library "url"). Update to w3 0.46
37 ;; or greater version.
41 (eval-when-compile (require 'cl))
49 (require 'mail-source)
56 ;; Report failure to find w3 at load time if appropriate.
63 (nnoo-declare nnwarchive)
65 (defvar nnwarchive-type-definition
67 (address . "www.egroups.com")
69 "http://www.egroups.com/register?method=loginAction&email=%s&password=%s"
70 nnwarchive-login nnwarchive-passwd)
72 "http://www.egroups.com/UserGroupsPage?")
73 (list-dissect . nnwarchive-egroups-list)
74 (list-groups . nnwarchive-egroups-list-groups)
76 "http://www.egroups.com/group/%s/?fetchForward=1&start=%d" group aux)
78 "http://www.egroups.com/group/%s/?fetchForward=1" group)
79 (xover-page-size . 13)
80 (xover-dissect . nnwarchive-egroups-xover)
82 "http://www.egroups.com/group/%s/%d.html?raw=1" group article)
83 (article-dissect . nnwarchive-egroups-article)
85 (xover-files . nnwarchive-egroups-xover-files))
87 (address . "www.mail-archive.com")
89 "http://www.mail-archive.com/lists.html")
90 (list-dissect . nnwarchive-mail-archive-list)
91 (list-groups . nnwarchive-mail-archive-list-groups)
93 "http://www.mail-archive.com/%s/mail%d.html" group aux)
95 "http://www.mail-archive.com/%s/maillist.html" group)
96 (xover-dissect . nnwarchive-mail-archive-xover)
98 "http://www.mail-archive.com/%s/msg%05d.html" group article1)
99 (article-dissect . nnwarchive-mail-archive-article)
100 (xover-files . nnwarchive-mail-archive-xover-files)
101 (article-offset . 1))))
103 (defvar nnwarchive-default-type 'egroups)
105 (defvoo nnwarchive-directory (nnheader-concat gnus-directory "warchive/")
106 "Where nnwarchive will save its files.")
109 (defvoo nnwarchive-type nil
110 "The type of nnwarchive."))
112 (defvoo nnwarchive-address ""
113 "The address of nnwarchive.")
115 (defvoo nnwarchive-login nil
116 "Your login name for the group.")
118 (defvoo nnwarchive-passwd nil
119 "Your password for the group.")
121 (defvoo nnwarchive-groups nil)
123 (defvoo nnwarchive-headers-cache nil)
125 (defvoo nnwarchive-opened nil)
127 (defvoo nnwarchive-authentication nil)
129 (defvoo nnwarchive-xover-is-evil nil) ;; not implemented
131 (defconst nnwarchive-version "nnwarchive 1.0")
133 ;;; Internal variables
135 (defvoo nnwarchive-open-url nil)
136 (defvoo nnwarchive-open-dissect nil)
138 (defvoo nnwarchive-list-url nil)
139 (defvoo nnwarchive-list-dissect nil)
140 (defvoo nnwarchive-list-groups nil)
142 (defvoo nnwarchive-xover-files nil)
143 (defvoo nnwarchive-xover-url nil)
144 (defvoo nnwarchive-xover-last-url nil)
145 (defvoo nnwarchive-xover-dissect nil)
146 (defvoo nnwarchive-xover-page-size nil)
148 (defvoo nnwarchive-article-url nil)
149 (defvoo nnwarchive-article-dissect nil)
150 (defvoo nnwarchive-xover-files nil)
151 (defvoo nnwarchive-article-offset 0)
153 (defvoo nnwarchive-buffer nil)
155 (defvar nnwarchive-headers nil)
158 ;;; Interface functions
160 (nnoo-define-basics nnwarchive)
162 (defun nnwarchive-set-default (type)
163 (let ((defs (cdr (assq type nnwarchive-type-definition)))
166 (set (intern (concat "nnwarchive-" (symbol-name (car def))))
169 (deffoo nnwarchive-retrieve-headers (articles &optional group server fetch-old)
170 (nnwarchive-possibly-change-server group server)
171 (setq nnwarchive-headers (cdr (assoc group nnwarchive-headers-cache)))
173 (set-buffer nnwarchive-buffer)
175 (funcall nnwarchive-xover-files group articles))
177 (set-buffer nntp-server-buffer)
180 (dolist (art articles)
181 (if (setq header (assq art nnwarchive-headers))
182 (nnheader-insert-nov (cdr header))))))
183 (let ((elem (assoc group nnwarchive-headers-cache)))
185 (setcdr elem nnwarchive-headers)
186 (push (cons group nnwarchive-headers) nnwarchive-headers-cache)))
189 (deffoo nnwarchive-retrieve-groups (groups &optional server)
190 "Retrieve group info on GROUPS."
191 (nnwarchive-possibly-change-server nil server)
192 (if nnwarchive-list-groups
193 (funcall nnwarchive-list-groups groups))
194 (nnwarchive-write-groups)
195 (nnwarchive-generate-active)
198 (deffoo nnwarchive-request-group (group &optional server dont-check)
199 (nnwarchive-possibly-change-server nil server)
200 (if nnwarchive-list-groups
201 (funcall nnwarchive-list-groups (list group)))
202 (nnwarchive-write-groups)
203 (let ((elem (assoc group nnwarchive-groups)))
206 (nnheader-report 'nnwarchive "Group does not exist"))
208 (nnheader-report 'nnwarchive "Opened group %s" group)
210 "211 %d %d %d %s\n" (or (cadr elem) 0) 1 (or (cadr elem) 0)
211 (prin1-to-string group))
214 (deffoo nnwarchive-close-group (group &optional server)
215 (nnwarchive-possibly-change-server group server)
216 (when (gnus-buffer-live-p nnwarchive-buffer)
218 (set-buffer nnwarchive-buffer)
219 (kill-buffer nnwarchive-buffer)))
222 (deffoo nnwarchive-request-article (article &optional group server buffer)
223 (nnwarchive-possibly-change-server group server)
226 (set-buffer nnwarchive-buffer)
227 (goto-char (point-min))
228 (let ((article1 (- article nnwarchive-article-offset)))
229 (nnwarchive-url nnwarchive-article-url))
230 (setq contents (funcall nnwarchive-article-dissect group article)))
233 (set-buffer (or buffer nntp-server-buffer))
236 (nnheader-report 'nnwarchive "Fetched article %s" article)
237 (cons group article)))))
239 (deffoo nnwarchive-close-server (&optional server)
240 (when (and (nnwarchive-server-opened server)
241 (gnus-buffer-live-p nnwarchive-buffer))
243 (set-buffer nnwarchive-buffer)
244 (kill-buffer nnwarchive-buffer)))
245 (nnoo-close-server 'nnwarchive server))
247 (deffoo nnwarchive-request-list (&optional server)
248 (nnwarchive-possibly-change-server nil server)
250 (set-buffer nnwarchive-buffer)
252 (if nnwarchive-list-url
253 (nnwarchive-url nnwarchive-list-url))
254 (if nnwarchive-list-dissect
255 (funcall nnwarchive-list-dissect))
256 (nnwarchive-write-groups)
257 (nnwarchive-generate-active))
260 (deffoo nnwarchive-request-newgroups (date &optional server)
261 (nnwarchive-possibly-change-server nil server)
262 (nnwarchive-write-groups)
263 (nnwarchive-generate-active)
266 (deffoo nnwarchive-asynchronous-p ()
269 (deffoo nnwarchive-server-opened (&optional server)
272 (deffoo nnwarchive-open-server (server &optional defs connectionless)
273 (nnwarchive-init server)
274 (unless (nnwarchive-server-opened server)
275 (when nnwarchive-authentication
276 (setq nnwarchive-login
279 (format "Login at %s: " server)
281 (setq nnwarchive-passwd
282 (or nnwarchive-passwd
283 (mail-source-read-passwd
284 (format "Password for %s at %s: "
285 nnwarchive-login server)))))
286 (unless nnwarchive-groups
287 (nnwarchive-read-groups))
289 (set-buffer nnwarchive-buffer)
291 (if nnwarchive-open-url
292 (nnwarchive-url nnwarchive-open-url))
293 (if nnwarchive-open-dissect
294 (funcall nnwarchive-open-dissect))
295 (setq nnwarchive-opened t)))
298 (nnoo-define-skeleton nnwarchive)
300 ;;; Internal functions
302 (defun nnwarchive-possibly-change-server (&optional group server)
303 (nnwarchive-init server)
305 (not (nnwarchive-server-opened server)))
306 (nnwarchive-open-server server)))
308 (defun nnwarchive-read-groups ()
309 (let ((file (expand-file-name (concat "groups-" nnwarchive-address)
310 nnwarchive-directory)))
311 (when (file-exists-p file)
313 (insert-file-contents file)
314 (goto-char (point-min))
315 (setq nnwarchive-groups (read (current-buffer)))))))
317 (defun nnwarchive-write-groups ()
318 (with-temp-file (expand-file-name (concat "groups-" nnwarchive-address)
319 nnwarchive-directory)
320 (prin1 nnwarchive-groups (current-buffer))))
322 (defun nnwarchive-init (server)
323 "Initialize buffers and such."
324 (let ((type (intern server)) (defs nnwarchive-type-definition) def)
327 (setq type nnwarchive-default-type))
328 ((assq type nnwarchive-type-definition) t)
331 (while (setq def (pop defs))
332 (when (equal (cdr (assq 'address (cdr def))) server)
334 (setq type (car def))))
336 (error "Undefined server %s" server))))
337 (setq nnwarchive-type type))
338 (unless (file-exists-p nnwarchive-directory)
339 (gnus-make-directory nnwarchive-directory))
340 (unless (gnus-buffer-live-p nnwarchive-buffer)
341 (setq nnwarchive-buffer
343 (nnheader-set-temp-buffer
344 (format " *nnwarchive %s %s*" nnwarchive-type server)))))
345 (nnwarchive-set-default nnwarchive-type))
347 (defun nnwarchive-encode-www-form-urlencoded (pairs)
348 "Return PAIRS encoded for forms."
352 (concat (w3-form-encode-xwfu (car data)) "="
353 (w3-form-encode-xwfu (cdr data)))))
356 (defun nnwarchive-fetch-form (url pairs)
357 (let ((url-request-data (nnwarchive-encode-www-form-urlencoded pairs))
358 (url-request-method "POST")
359 (url-request-extra-headers
360 '(("Content-type" . "application/x-www-form-urlencoded"))))
364 (defun nnwarchive-eval (expr)
367 (cons (nnwarchive-eval (car expr)) (nnwarchive-eval (cdr expr))))
373 (defun nnwarchive-url (xurl)
374 (let ((url-confirmation-func 'identity))
376 ((eq (car xurl) 'post)
378 (nnwarchive-fetch-form (car xurl) (nnwarchive-eval (cdr xurl))))
380 (nnweb-insert (apply 'format (nnwarchive-eval xurl)))))))
382 (defun nnwarchive-decode-entities ()
383 (goto-char (point-min))
384 (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t)
385 (replace-match (char-to-string
386 (if (eq (aref (match-string 1) 0) ?\#)
387 (string-to-number (substring (match-string 1) 1))
388 (or (cdr (assq (intern (match-string 1))
393 (defun nnwarchive-decode-entities-string (str)
396 (nnwarchive-decode-entities)
397 (buffer-substring (point-min) (point-max))))
399 (defun nnwarchive-remove-markup ()
400 (goto-char (point-min))
401 (while (search-forward "<!--" nil t)
402 (delete-region (match-beginning 0)
403 (or (search-forward "-->" nil t)
405 (goto-char (point-min))
406 (while (re-search-forward "<[^>]+>" nil t)
407 (replace-match "" t t)))
409 (defun nnwarchive-date-to-date (sdate)
410 (let ((elem (split-string sdate)))
411 (concat (substring (nth 0 elem) 0 3) " "
412 (substring (nth 1 elem) 0 3) " "
413 (substring (nth 2 elem) 0 2) " "
414 (substring (nth 3 elem) 1 6) " "
415 (format-time-string "%Y") " "
418 (defun nnwarchive-generate-active ()
420 (set-buffer nntp-server-buffer)
422 (dolist (elem nnwarchive-groups)
423 (insert (prin1-to-string (car elem))
424 " " (number-to-string (or (cadr elem) 0)) " 1 y\n"))))
426 (defun nnwarchive-paged (articles)
427 (let (art narts next)
428 (while (setq art (pop articles))
429 (when (and (>= art (or next 0))
430 (not (assq art nnwarchive-headers)))
432 (setq next (+ art nnwarchive-xover-page-size))))
437 (defun nnwarchive-egroups-list-groups (groups)
440 (set-buffer nnwarchive-buffer)
441 (dolist (group groups)
443 (nnwarchive-url nnwarchive-xover-last-url)
444 (goto-char (point-min))
445 (when (re-search-forward "of \\([0-9]+\\)</title>" nil t)
446 (setq articles (string-to-number (match-string 1))))
447 (let ((elem (assoc group nnwarchive-groups)))
449 (setcar (cdr elem) articles)
450 (push (list group articles "") nnwarchive-groups)))
451 (setq nnwarchive-headers (cdr (assoc group nnwarchive-headers-cache)))
452 (nnwarchive-egroups-xover group)
453 (let ((elem (assoc group nnwarchive-headers-cache)))
455 (setcdr elem nnwarchive-headers)
456 (push (cons group nnwarchive-headers) nnwarchive-headers-cache)))))))
458 (defun nnwarchive-egroups-list ()
459 (let ((case-fold-search t)
460 group description elem articles)
461 (goto-char (point-min))
464 "/group/\\([^/]+\\)/info\\.html[^>]+>[^>]+>[\040\t]*-[\040\t]*\\([^<]+\\)<"
466 (setq group (match-string 1)
467 description (match-string 2))
469 (when (re-search-forward ">\\([0-9]+\\)<" nil t)
470 (setq articles (string-to-number (match-string 1))))
471 (if (setq elem (assoc group nnwarchive-groups))
472 (setcar (cdr elem) articles)
473 (push (list group articles description) nnwarchive-groups)))
474 (nnwarchive-egroups-list-groups (mapcar 'identity nnwarchive-groups)))
477 (defun nnwarchive-egroups-xover (group)
478 (let (article subject from date)
479 (goto-char (point-min))
480 (while (re-search-forward
481 "<a href=\"/group/\\([^/]+\\)/\\([0-9]+\\)\\.html[^>]+>\\([^<]+\\)<"
483 (setq group (match-string 1)
484 article (string-to-number (match-string 2))
485 subject (match-string 3))
487 (unless (assq article nnwarchive-headers)
488 (if (looking-at "<td[^>]+><font[^>]+>\\([^<]+\\)</font>")
489 (setq from (match-string 1)))
491 (if (looking-at "<td[^>]+><font[^>]+>\\([^<]+\\)</font>")
492 (setq date (identity (match-string 1))))
495 (make-full-mail-header
497 (nnwarchive-decode-entities-string subject)
498 (nnwarchive-decode-entities-string from)
500 (concat "<" group "%"
501 (number-to-string article)
504 0 0 "")) nnwarchive-headers))))
507 (defun nnwarchive-egroups-article (group articles)
508 (goto-char (point-min))
509 (if (search-forward "<pre>" nil t)
510 (delete-region (point-min) (point)))
511 (goto-char (point-max))
512 (if (search-backward "</pre>" nil t)
513 (delete-region (point) (point-max)))
514 (goto-char (point-min))
515 (while (re-search-forward "<a[^>]+>\\([^<]+\\)</a>" nil t)
516 (replace-match "<\\1>"))
517 (nnwarchive-decode-entities)
518 (buffer-substring (point-min) (point-max)))
520 (defun nnwarchive-egroups-xover-files (group articles)
522 (setq auxs (nnwarchive-paged (sort articles '<)))
523 (while (setq aux (pop auxs))
524 (goto-char (point-max))
525 (nnwarchive-url nnwarchive-xover-url))
526 (if nnwarchive-xover-dissect
527 (nnwarchive-egroups-xover group))))
531 (defun nnwarchive-mail-archive-list-groups (groups)
534 (set-buffer nnwarchive-buffer)
535 (dolist (group groups)
537 (nnwarchive-url nnwarchive-xover-last-url)
538 (goto-char (point-min))
539 (when (re-search-forward "msg\\([0-9]+\\)\\.html" nil t)
540 (setq articles (1+ (string-to-number (match-string 1)))))
541 (let ((elem (assoc group nnwarchive-groups)))
543 (setcar (cdr elem) articles)
544 (push (list group articles "") nnwarchive-groups)))
545 (setq nnwarchive-headers (cdr (assoc group nnwarchive-headers-cache)))
546 (nnwarchive-mail-archive-xover group)
547 (let ((elem (assoc group nnwarchive-headers-cache)))
549 (setcdr elem nnwarchive-headers)
550 (push (cons group nnwarchive-headers)
551 nnwarchive-headers-cache)))))))
553 (defun nnwarchive-mail-archive-list ()
554 (let ((case-fold-search t)
555 group description elem articles)
556 (goto-char (point-min))
557 (while (re-search-forward "<a href=\"\\([^/]+\\)/\">\\([^>]+\\)<" nil t)
558 (setq group (match-string 1)
559 description (match-string 2))
562 (if (setq elem (assoc group nnwarchive-groups))
563 (setcar (cdr elem) articles)
564 (push (list group articles description) nnwarchive-groups))))
567 (defun nnwarchive-mail-archive-xover (group)
568 (let (article subject from date)
569 (goto-char (point-min))
570 (while (re-search-forward
571 "<A[^>]*HREF=\"msg\\([0-9]+\\)\\.html[^>]+>\\([^<]+\\)<"
573 (setq article (1+ (string-to-number (match-string 1)))
574 subject (match-string 2))
576 (unless (assq article nnwarchive-headers)
577 (if (looking-at "<UL><LI><EM>From</EM>:\\([^&]+\\)<\\([^&]+\\)>")
579 (setq from (match-string 1)
580 date (identity (match-string 2))))
581 (setq from "" date ""))
584 (make-full-mail-header
586 (nnwarchive-decode-entities-string subject)
587 (nnwarchive-decode-entities-string from)
589 (format "<%05d%%%s>\n" (1- article) group)
591 0 0 "")) nnwarchive-headers))))
594 (defun nnwarchive-mail-archive-xover-files (group articles)
595 (unless nnwarchive-headers
597 (nnwarchive-url nnwarchive-xover-last-url)
598 (goto-char (point-min))
599 (nnwarchive-mail-archive-xover group))
600 (let ((minart (apply 'min articles))
601 (min (apply 'min (mapcar 'car nnwarchive-headers)))
603 (while (> min minart)
605 (nnwarchive-url nnwarchive-xover-url)
606 (nnwarchive-mail-archive-xover group)
607 (setq min (apply 'min (mapcar 'car nnwarchive-headers))))))
609 (defun nnwarchive-mail-archive-article (group article)
610 (let (p refs url mime file)
612 (goto-char (point-min))
613 (when (search-forward "<ul>" nil t)
615 (delete-region (point-min) (point))
616 (search-forward "</ul>" nil t)
618 (narrow-to-region (point-min) (point))
619 (nnwarchive-remove-markup)
620 (nnwarchive-decode-entities)
621 (goto-char (point-min))
623 (goto-char (point-max))
627 (when (search-forward "X-Body-of-Message" nil t)
629 (delete-region p (point))
630 (search-forward "X-Body-of-Message-End" nil t)
633 (narrow-to-region p (point))
634 (goto-char (point-min))
635 (if (looking-at "<PRE>")
639 (when (search-forward "</PRE>" nil t)
640 (goto-char (match-beginning 0))
643 (narrow-to-region p (point))
644 (nnwarchive-remove-markup)
645 (nnwarchive-decode-entities)
646 (goto-char (point-max))))
648 "[\040\n\r\t]*<P><A HREF=\"\\([^\"]+\\)[^>]*><[^>]*>\\([^<]+\\)")
649 (setq url (match-string 1)
650 file (match-string 2))
651 (goto-char (match-beginning 1))
654 (delete-region p (progn (forward-line) (point)))
655 (insert (format "http://www.mail-archive.com/%s/%s\n"
658 (goto-char (point-max))))
660 (when (search-forward "X-References-End" nil t)
661 (narrow-to-region p (point))
662 (goto-char (point-min))
663 (while (re-search-forward "msg\\([0-9]+\\)\\.html" nil t)
664 (push (concat "<" (match-string 1) "%" group ">") refs))
666 (delete-region p (point-max))
667 (goto-char (point-min))
668 (insert (format "Message-ID: <%05d%%%s>\n" (1- article) group))
670 (insert "References:")
672 (insert " " (pop refs)))
675 (insert "MIME-Version: 1.0\n"
676 "Content-Type: text/html\n")))
677 (buffer-string (point-min) (point-max))))
679 (provide 'nnwarchive)
681 ;;; nnwarchive.el ends here