1 ;;; nnwarchive.el --- interfacing with web archives
2 ;; Copyright (C) 1999 Free Software Foundation, Inc.
4 ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu>
7 ;; This file is part of GNU Emacs.
9 ;; GNU Emacs is free software; you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published
11 ;; by the Free Software Foundation; either version 2, or (at your
12 ;; option) any later version.
14 ;; GNU Emacs is distributed in the hope that it will be useful, but
15 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 ;; General Public License for more details.
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs; see the file COPYING. If not, write to the
21 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22 ;; Boston, MA 02111-1307, USA.
26 ;; Note: You need to have `url' and `w3' installed for this backend to
29 ;; A lot of codes stolen from mail-source, nnslashdot, nnweb.
31 ;; Todo: To support more web archives.
33 ;; Known bugs: in w3 0.44, there are two copies of url-maybe-relative.
34 ;; If it is loaded from w3.el, (load-library "url"). w3 0.45 should
39 (eval-when-compile (require 'cl))
47 (require 'mail-source)
53 ;; Report failure to find w3 at load time if appropriate.
59 (nnoo-declare nnwarchive)
62 (defvar nnwarchive-type-definition
65 "http://www.egroups.com/register?method=loginAction&email=%s&password=%s"
67 (open-dissect . nnwarchive-egroups-open)
69 "http://www.egroups.com/UserGroupsPage?")
70 (list-dissect . nnwarchive-egroups-list)
72 "http://www.egroups.com/group/%s/?fetchForward=1&start=%d" group start)
74 "http://www.egroups.com/group/%s/?fetchForward=1" group)
75 (xover-page-size . 13)
76 (xover-dissect . nnwarchive-egroups-xover)
78 "http://www.egroups.com/group/%s/%d.html?raw=1" group article)
79 (article-dissect . nnwarchive-egroups-article)))))
82 (defvar nnwarchive-short-names
85 (defvoo nnwarchive-directory (nnheader-concat gnus-directory "warchive/")
86 "Where nnwarchive will save its files.")
89 (defvoo nnwarchive-type 'egroups
90 "The type of nnwarchive."))
92 (defvoo nnwarchive-address "egroups.com"
93 "The address of nnwarchive.")
95 (defvoo nnwarchive-login nil
96 "Your login name for the group.")
98 (defvoo nnwarchive-passwd nil
99 "Your password for the group.")
101 (defvoo nnwarchive-groups nil)
103 (defvoo nnwarchive-headers-cache nil)
105 (defvoo nnwarchive-opened nil)
107 (defconst nnwarchive-version "nnwarchive 1.0")
109 ;;; Internal variables
111 (defvar nnwarchive-open-url nil)
112 (defvar nnwarchive-open-dissect nil)
114 (defvar nnwarchive-list-url nil)
115 (defvar nnwarchive-list-dissect nil)
117 (defvar nnwarchive-xover-url nil)
118 (defvar nnwarchive-xover-last-url nil)
119 (defvar nnwarchive-xover-dissect nil)
120 (defvar nnwarchive-xover-page-size nil)
122 (defvar nnwarchive-article-url nil)
123 (defvar nnwarchive-article-dissect nil)
125 (defvar nnwarchive-buffer nil)
127 (defvar nnwarchive-headers nil)
129 ;;; Interface functions
131 (nnoo-define-basics nnwarchive)
134 (defun nnwarchive-bind-1 ()
135 (let ((defaults (cdr (assq nnwarchive-type nnwarchive-type-definition)))
136 (short-names nnwarchive-short-names)
138 (while (setq default (pop defaults))
139 (push (list (intern (concat "nnwarchive-" (symbol-name (car default))))
140 (list 'quote (cdr default))) bind))
141 (while (setq default (pop short-names))
143 (intern (concat "nnwarchive-"
144 (symbol-name default))))
148 (defmacro nnwarchive-bind (&rest body)
149 "Return a `let' form that binds all variables in TYPE.
150 Read `mail-source-bind' for details."
151 `(let ,(nnwarchive-bind-1)
154 (put 'nnwarchive-bind 'lisp-indent-function 0)
155 (put 'nnwarchive-bind 'edebug-form-spec '(form body))
157 (deffoo nnwarchive-retrieve-headers (articles &optional group server fetch-old)
158 (nnwarchive-possibly-change-server group server)
160 (setq nnwarchive-headers (cdr (assoc group nnwarchive-headers-cache)))
162 (set-buffer nnwarchive-buffer)
164 (let (point start starts)
165 (setq starts (nnwarchive-paged (sort articles '<)))
166 (while (setq start (pop starts))
167 (goto-char (point-max))
168 (nnwarchive-url nnwarchive-xover-url))
169 (if nnwarchive-xover-dissect
170 (funcall nnwarchive-xover-dissect))))
172 (set-buffer nntp-server-buffer)
175 (dolist (art articles)
176 (if (setq header (assq art nnwarchive-headers))
177 (nnheader-insert-nov (cdr header))))))
178 (let ((elem (assoc group nnwarchive-headers-cache)))
180 (setcdr elem nnwarchive-headers)
181 (push (cons group nnwarchive-headers) nnwarchive-headers-cache)))
184 (deffoo nnwarchive-request-group (group &optional server dont-check)
185 (nnwarchive-possibly-change-server nil server)
186 (let ((elem (assoc group nnwarchive-groups)))
189 (nnheader-report 'nnwarchive "Group does not exist"))
191 (nnheader-report 'nnwarchive "Opened group %s" group)
193 "211 %d %d %d %s\n" (cadr elem) 1 (cadr elem)
194 (prin1-to-string group))))))
196 (deffoo nnwarchive-close-group (group &optional server)
197 (nnwarchive-possibly-change-server group server)
198 (when (gnus-buffer-live-p nnwarchive-buffer)
200 (set-buffer nnwarchive-buffer)
201 (kill-buffer nnwarchive-buffer)))
204 (deffoo nnwarchive-request-article (article &optional group server buffer)
205 (nnwarchive-possibly-change-server group server)
209 (set-buffer nnwarchive-buffer)
210 (goto-char (point-min))
211 (nnwarchive-url nnwarchive-article-url)
212 (setq contents (funcall nnwarchive-article-dissect)))
215 (set-buffer (or buffer nntp-server-buffer))
218 (nnheader-report 'nnwarchive "Fetched article %s" article)
219 (cons group article))))))
221 (deffoo nnwarchive-close-server (&optional server)
222 (when (and (nnwarchive-server-opened server)
223 (gnus-buffer-live-p nnwarchive-buffer))
225 (set-buffer nnwarchive-buffer)
226 (kill-buffer nnwarchive-buffer)))
227 (nnoo-close-server 'nnwarchive server))
229 (deffoo nnwarchive-request-list (&optional server)
230 (nnwarchive-possibly-change-server nil server)
233 (set-buffer nnwarchive-buffer)
235 (if nnwarchive-list-url
236 (nnwarchive-url nnwarchive-list-url))
237 (if nnwarchive-list-dissect
238 (funcall nnwarchive-list-dissect))
239 (nnwarchive-write-groups)
240 (nnwarchive-generate-active)))
243 (deffoo nnwarchive-request-newgroups (date &optional server)
244 (nnwarchive-possibly-change-server nil server)
246 (nnwarchive-generate-active))
249 (deffoo nnwarchive-asynchronous-p ()
252 (deffoo nnwarchive-server-opened (&optional server)
255 (deffoo nnwarchive-open-server (server &optional defs connectionless)
256 (nnwarchive-init server)
257 (setq nnwarchive-login
260 (format "Login at %s: " server)
262 (setq nnwarchive-passwd
263 (or nnwarchive-passwd
264 (mail-source-read-passwd
265 (format "Password for %s at %s: " nnwarchive-login server))))
268 (set-buffer nnwarchive-buffer)
270 (if nnwarchive-open-url
271 (nnwarchive-url nnwarchive-open-url))
272 (if nnwarchive-open-dissect
273 (funcall nnwarchive-open-dissect)
274 (setq nnwarchive-opened t))))
277 (nnoo-define-skeleton nnwarchive)
279 ;;; Internal functions
281 (defun nnwarchive-possibly-change-server (&optional group server)
282 (nnwarchive-init server)
284 (not (nnwarchive-server-opened server)))
285 (nnwarchive-open-server server)))
287 (defun nnwarchive-read-groups ()
288 (let ((file (expand-file-name (concat "groups-" nnwarchive-address)
289 nnwarchive-directory)))
290 (when (file-exists-p file)
292 (insert-file-contents file)
293 (goto-char (point-min))
294 (setq nnwarchive-groups (read (current-buffer)))))))
296 (defun nnwarchive-write-groups ()
297 (with-temp-file (expand-file-name (concat "groups-" nnwarchive-address)
298 nnwarchive-directory)
299 (prin1 nnwarchive-groups (current-buffer))))
301 (defun nnwarchive-init (server)
302 "Initialize buffers and such."
303 (unless (file-exists-p nnwarchive-directory)
304 (gnus-make-directory nnwarchive-directory))
305 (unless (gnus-buffer-live-p nnwarchive-buffer)
306 (setq nnwarchive-buffer
308 (nnheader-set-temp-buffer
309 (format " *nnwarchive %s %s*" nnwarchive-type server))))))
311 (defun nnwarchive-encode-www-form-urlencoded (pairs)
312 "Return PAIRS encoded for forms."
316 (concat (w3-form-encode-xwfu (car data)) "="
317 (w3-form-encode-xwfu (cdr data)))))
320 (defun nnwarchive-fetch-form (url pairs)
321 (let ((url-request-data (nnwarchive-encode-www-form-urlencoded pairs))
322 (url-request-method "POST")
323 (url-request-extra-headers
324 '(("Content-type" . "application/x-www-form-urlencoded"))))
325 (url-insert-file-contents url)
326 (setq buffer-file-name t))
329 (defun nnwarchive-eval (expr)
332 (cons (nnwarchive-eval (car expr)) (nnwarchive-eval (cdr expr))))
338 (defun nnwarchive-url (xurl)
339 (let ((url-confirmation-func 'identity))
341 ((eq (car xurl) 'post)
343 (nnwarchive-fetch-form (car xurl) (nnwarchive-eval (cdr xurl))))
345 (url-insert-file-contents (apply 'format (nnwarchive-eval xurl)))
346 (setq buffer-file-name nil)))))
348 (defun nnwarchive-decode-entities ()
349 (goto-char (point-min))
350 (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t)
351 (replace-match (char-to-string
352 (if (eq (aref (match-string 1) 0) ?\#)
353 (string-to-number (substring (match-string 1) 1))
354 (or (cdr (assq (intern (match-string 1))
359 (defun nnwarchive-decode-entities-string (str)
362 (nnwarchive-decode-entities)
363 (buffer-substring (point-min) (point-max))))
365 (defun nnwarchive-remove-markup ()
366 (goto-char (point-min))
367 (while (search-forward "<!--" nil t)
368 (delete-region (match-beginning 0)
369 (or (search-forward "-->" nil t)
371 (goto-char (point-min))
372 (while (re-search-forward "<[^>]+>" nil t)
373 (replace-match "" t t)))
375 (defun nnwarchive-date-to-date (sdate)
376 (let ((elem (split-string sdate)))
377 (concat (substring (nth 0 elem) 0 3) " "
378 (substring (nth 1 elem) 0 3) " "
379 (substring (nth 2 elem) 0 2) " "
380 (substring (nth 3 elem) 1 6) " "
381 (format-time-string "%Y") " "
384 (defun nnwarchive-generate-active ()
386 (set-buffer nntp-server-buffer)
388 (dolist (elem nnwarchive-groups)
389 (insert (prin1-to-string (car elem))
390 " " (number-to-string (cadr elem)) " 1 y\n"))))
392 (defun nnwarchive-paged (articles)
393 (let (art narts next)
394 (while (setq art (pop articles))
395 (when (and (>= art (or next 0))
396 (not (assq art nnwarchive-headers)))
398 (setq next (+ art nnwarchive-xover-page-size))))
403 (defun nnwarchive-egroups-open ()
404 (setq nnwarchive-opened t))
406 (defun nnwarchive-egroups-list ()
407 (let ((case-fold-search t)
408 group description elem articles)
409 (goto-char (point-min))
412 "/group/\\([^/]+\\)/info\\.html[^>]+>[^>]+>[\040\t]*-[\040\t]*\\([^<]+\\)<"
414 (setq group (match-string 1)
415 description (match-string 2))
417 (when (re-search-forward ">\\([0-9]+\\)<" nil t)
418 (setq articles (string-to-number (match-string 1))))
419 (if (setq elem (assoc group nnwarchive-groups))
420 (setcar (cdr elem) articles)
421 (push (list group articles description) nnwarchive-groups)))
422 (dolist (elem nnwarchive-groups)
423 (setq group (car elem))
425 (nnwarchive-url nnwarchive-xover-last-url)
426 (goto-char (point-min))
427 (when (re-search-forward "of \\([0-9]+\\)</title>" nil t)
428 (setq articles (string-to-number (match-string 1))))
429 (setcar (cdr elem) articles)
430 (setq nnwarchive-headers (cdr (assoc group nnwarchive-headers-cache)))
431 (nnwarchive-egroups-xover)
432 (let ((elem (assoc group nnwarchive-headers-cache)))
434 (setcdr elem nnwarchive-headers)
435 (push (cons group nnwarchive-headers) nnwarchive-headers-cache)))))
438 (defun nnwarchive-egroups-xover()
439 (let (article subject from date group)
440 (goto-char (point-min))
441 (while (re-search-forward
442 "<a href=\"/group/\\([^/]+\\)/\\([0-9]+\\)\\.html[^>]+>\\([^<]+\\)<"
444 (setq group (match-string 1)
445 article (string-to-number (match-string 2))
446 subject (match-string 3))
448 (unless (assq article nnwarchive-headers)
449 (if (looking-at "<td[^>]+><font[^>]+>\\([^<]+\\)</font>")
450 (setq from (match-string 1)))
452 (if (looking-at "<td[^>]+><font[^>]+>\\([^<]+\\)</font>")
453 (setq date (identity (match-string 1))))
456 (make-full-mail-header
458 (nnwarchive-decode-entities-string subject)
459 (nnwarchive-decode-entities-string from)
461 (concat "<" group "%"
462 (number-to-string article)
465 0 nil nil nil)) nnwarchive-headers))))
468 (defun nnwarchive-egroups-article ()
469 (goto-char (point-min))
470 (if (search-forward "<pre>" nil t)
471 (delete-region (point-min) (point)))
472 (goto-char (point-max))
473 (if (search-backward "</pre>" nil t)
474 (delete-region (point) (point-max)))
475 (goto-char (point-min))
476 (while (re-search-forward "<a[^>]+>\\([^<]+\\)</a>" nil t)
477 (replace-match "<\\1>"))
478 (nnwarchive-decode-entities)
479 (buffer-substring (point-min) (point-max)))
481 (provide 'nnwarchive)
483 ;;; nnwarchive.el ends here