;;; nnweb.el --- retrieving articles via web search engines ;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, ;; 2004, 2005, 2006 Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen ;; Keywords: news ;; This file is part of GNU Emacs. ;; GNU Emacs is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 2, or (at your option) ;; any later version. ;; GNU Emacs is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs; see the file COPYING. If not, write to the ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ;; Boston, MA 02110-1301, USA. ;;; Commentary: ;; Note: You need to have `w3' installed for some functions to work. ;; FIXME: Due to changes in the HTML output of Gmane, stuff related to Gmane ;; web groups (`gnus-group-make-web-group') doesn't work anymore. ;;; Code: (eval-when-compile (require 'cl)) (require 'nnoo) (require 'message) (require 'gnus-util) (require 'gnus) (require 'nnmail) (require 'mm-util) (require 'mm-url) (eval-and-compile (ignore-errors (require 'url))) (autoload 'w3-parse-buffer "w3-parse") (nnoo-declare nnweb) (defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/") "Where nnweb will save its files.") (defvoo nnweb-type 'google "What search engine type is being used. Valid types include `google', `dejanews', and `gmane'.") (defvar nnweb-type-definition '((google (id . "http://www.google.com/groups?as_umsgid=%s&hl=en&dmode=source") (result . "http://groups.google.com/group/%s/msg/%s?dmode=source") (article . nnweb-google-wash-article) (reference . identity) (map . nnweb-google-create-mapping) (search . nnweb-google-search) (address . "http://groups.google.com/groups") (base . "http://groups.google.com") (identifier . nnweb-google-identity)) (dejanews ;; alias of google (id . "http://www.google.com/groups?as_umsgid=%s&hl=en&dmode=source") (result . "http://groups.google.com/group/%s/msg/%s?dmode=source") (article . nnweb-google-wash-article) (reference . identity) (map . nnweb-google-create-mapping) (search . nnweb-google-search) (address . "http://groups.google.com/groups") (base . "http://groups.google.com") (identifier . nnweb-google-identity)) (gmane (article . nnweb-gmane-wash-article) (id . "http://gmane.org/view.php?group=%s") (reference . identity) (map . nnweb-gmane-create-mapping) (search . nnweb-gmane-search) (address . "http://gmane.org/") (identifier . nnweb-gmane-identity))) "Type-definition alist.") (defvoo nnweb-search nil "Search string to feed to Google.") (defvoo nnweb-max-hits 999 "Maximum number of hits to display.") (defvoo nnweb-ephemeral-p nil "Whether this nnweb server is ephemeral.") ;;; Internal variables (defvoo nnweb-articles nil) (defvoo nnweb-buffer nil) (defvoo nnweb-group-alist nil) (defvoo nnweb-group nil) (defvoo nnweb-hashtb nil) ;;; Interface functions (nnoo-define-basics nnweb) (deffoo nnweb-retrieve-headers (articles &optional group server fetch-old) (nnweb-possibly-change-server group server) (save-excursion (set-buffer nntp-server-buffer) (erase-buffer) (let (article header) (mm-with-unibyte-current-buffer (while (setq article (pop articles)) (when (setq header (cadr (assq article nnweb-articles))) (nnheader-insert-nov header)))) 'nov))) (deffoo nnweb-request-scan (&optional group server) (nnweb-possibly-change-server group server) (if nnweb-ephemeral-p (setq nnweb-hashtb (gnus-make-hashtable 4095)) (unless nnweb-articles (nnweb-read-overview group))) (funcall (nnweb-definition 'map)) (unless nnweb-ephemeral-p (nnweb-write-active) (nnweb-write-overview group))) (deffoo nnweb-request-group (group &optional server dont-check) (nnweb-possibly-change-server group server) (unless (or nnweb-ephemeral-p dont-check nnweb-articles) (nnweb-read-overview group)) (cond ((not nnweb-articles) (nnheader-report 'nnweb "No matching articles")) (t (let ((active (if nnweb-ephemeral-p (cons (caar nnweb-articles) (caar (last nnweb-articles))) (cadr (assoc group nnweb-group-alist))))) (nnheader-report 'nnweb "Opened group %s" group) (nnheader-insert "211 %d %d %d %s\n" (length nnweb-articles) (car active) (cdr active) group))))) (deffoo nnweb-close-group (group &optional server) (nnweb-possibly-change-server group server) (when (gnus-buffer-live-p nnweb-buffer) (save-excursion (set-buffer nnweb-buffer) (set-buffer-modified-p nil) (kill-buffer nnweb-buffer))) t) (deffoo nnweb-request-article (article &optional group server buffer) (nnweb-possibly-change-server group server) (save-excursion (set-buffer (or buffer nntp-server-buffer)) (let* ((header (cadr (assq article nnweb-articles))) (url (and header (mail-header-xref header)))) (when (or (and url (mm-with-unibyte-current-buffer (mm-url-insert url))) (and (stringp article) (nnweb-definition 'id t) (let ((fetch (nnweb-definition 'id)) art active) (when (string-match "^<\\(.*\\)>$" article) (setq art (match-string 1 article))) (when (and fetch art) (setq url (format fetch art)) (mm-with-unibyte-current-buffer (mm-url-insert url)) (if (nnweb-definition 'reference t) (setq article (funcall (nnweb-definition 'reference) article))))))) (unless nnheader-callback-function (funcall (nnweb-definition 'article))) (nnheader-report 'nnweb "Fetched article %s" article) (cons group (and (numberp article) article)))))) (deffoo nnweb-close-server (&optional server) (when (and (nnweb-server-opened server) (gnus-buffer-live-p nnweb-buffer)) (save-excursion (set-buffer nnweb-buffer) (set-buffer-modified-p nil) (kill-buffer nnweb-buffer))) (nnoo-close-server 'nnweb server)) (deffoo nnweb-request-list (&optional server) (nnweb-possibly-change-server nil server) (save-excursion (set-buffer nntp-server-buffer) (nnmail-generate-active (list (assoc server nnweb-group-alist))) t)) (deffoo nnweb-request-update-info (group info &optional server) (nnweb-possibly-change-server group server)) (deffoo nnweb-asynchronous-p () nil) (deffoo nnweb-request-create-group (group &optional server args) (nnweb-possibly-change-server nil server) (nnweb-request-delete-group group) (push `(,group ,(cons 1 0)) nnweb-group-alist) (nnweb-write-active) t) (deffoo nnweb-request-delete-group (group &optional force server) (nnweb-possibly-change-server group server) (gnus-pull group nnweb-group-alist t) (nnweb-write-active) (gnus-delete-file (nnweb-overview-file group)) t) (nnoo-define-skeleton nnweb) ;;; Internal functions (defun nnweb-read-overview (group) "Read the overview of GROUP and build the map." (when (file-exists-p (nnweb-overview-file group)) (mm-with-unibyte-buffer (nnheader-insert-file-contents (nnweb-overview-file group)) (goto-char (point-min)) (let (header) (while (not (eobp)) (setq header (nnheader-parse-nov)) (forward-line 1) (push (list (mail-header-number header) header (mail-header-xref header)) nnweb-articles) (nnweb-set-hashtb header (car nnweb-articles))))))) (defun nnweb-write-overview (group) "Write the overview file for GROUP." (with-temp-file (nnweb-overview-file group) (let ((articles nnweb-articles)) (while articles (nnheader-insert-nov (cadr (pop articles))))))) (defun nnweb-set-hashtb (header data) (gnus-sethash (nnweb-identifier (mail-header-xref header)) data nnweb-hashtb)) (defun nnweb-get-hashtb (url) (gnus-gethash (nnweb-identifier url) nnweb-hashtb)) (defun nnweb-identifier (ident) (funcall (nnweb-definition 'identifier) ident)) (defun nnweb-overview-file (group) "Return the name of the overview file of GROUP." (nnheader-concat nnweb-directory group ".overview")) (defun nnweb-write-active () "Save the active file." (gnus-make-directory nnweb-directory) (with-temp-file (nnheader-concat nnweb-directory "active") (prin1 `(setq nnweb-group-alist ',nnweb-group-alist) (current-buffer)))) (defun nnweb-read-active () "Read the active file." (load (nnheader-concat nnweb-directory "active") t t t)) (defun nnweb-definition (type &optional noerror) "Return the definition of TYPE." (let ((def (cdr (assq type (assq nnweb-type nnweb-type-definition))))) (when (and (not def) (not noerror)) (error "Undefined definition %s" type)) def)) (defun nnweb-possibly-change-server (&optional group server) (when server (unless (nnweb-server-opened server) (nnweb-open-server server)) (nnweb-init server)) (unless nnweb-group-alist (nnweb-read-active)) (unless nnweb-hashtb (setq nnweb-hashtb (gnus-make-hashtable 4095))) (when group (setq nnweb-group group))) (defun nnweb-init (server) "Initialize buffers and such." (unless (gnus-buffer-live-p nnweb-buffer) (setq nnweb-buffer (save-excursion (mm-with-unibyte (nnheader-set-temp-buffer (format " *nnweb %s %s %s*" nnweb-type nnweb-search server)) (current-buffer)))))) ;;; ;;; groups.google.com ;;; (defun nnweb-google-wash-article () ;; We have Google's masked e-mail addresses here. :-/ (let ((case-fold-search t) (start-re "
\n *")
	(end-re "\n *
")) (goto-char (point-min)) (if (save-excursion (or (re-search-forward "The requested message.*could not be found." nil t) (not (and (re-search-forward start-re nil t) (re-search-forward end-re nil t))))) ;; FIXME: Don't know how to indicate "not found". ;; Should this function throw an error? --rsteib (progn (gnus-message 3 "Requested article not found") (erase-buffer)) (delete-region (point-min) (re-search-forward start-re)) (goto-char (point-min)) (delete-region (progn (re-search-forward end-re) (match-beginning 0)) (point-max)) (mm-url-decode-entities)))) (defun nnweb-google-parse-1 (&optional Message-ID) "Parse search result in current buffer." (let ((i 0) (case-fold-search t) (active (cadr (assoc nnweb-group nnweb-group-alist))) Subject Score Date Newsgroups From map url mid) (unless active (push (list nnweb-group (setq active (cons 1 0))) nnweb-group-alist)) ;; Go through all the article hits on this page. (goto-char (point-min)) (while (re-search-forward "a +href=\"/group/\\([^>\"]+\\)/browse_thread/[^>]+#\\([0-9a-f]+\\)" nil t) (setq Newsgroups (match-string-no-properties 1) ;; Note: Starting with Google Groups 2, `mid' is a Google-internal ;; ID, not a proper Message-ID. mid (match-string-no-properties 2) url (format (nnweb-definition 'result) Newsgroups mid)) (narrow-to-region (search-forward ">" nil t) (search-forward "" nil t)) (mm-url-remove-markup) (mm-url-decode-entities) (setq Subject (buffer-string)) (goto-char (point-max)) (widen) (narrow-to-region (point) (search-forward "\"]+\\)\">= i nnweb-max-hits)) (setq more nil) ;; Yup, there are more articles (setq more (concat (nnweb-definition 'base) (match-string 1))) (when more (erase-buffer) (mm-url-insert more)))) ;; Return the articles in the right order. (setq nnweb-articles (sort nnweb-articles 'car-less-than-car)))))) (defun nnweb-google-search (search) (mm-url-insert (concat (nnweb-definition 'address) "?" (mm-url-encode-www-form-urlencoded `(("q" . ,search) ("num" . "100") ("hq" . "") ("hl" . "en") ("lr" . "") ("safe" . "off") ("sites" . "groups") ("filter" . "0"))))) t) (defun nnweb-google-identity (url) "Return an unique identifier based on URL." (if (string-match "selm=\\([^ &>]+\\)" url) (match-string 1 url) url)) ;;; ;;; gmane.org ;;; (defun nnweb-gmane-create-mapping () "Perform the search and create a number-to-url alist." (save-excursion (set-buffer nnweb-buffer) (erase-buffer) (when (funcall (nnweb-definition 'search) nnweb-search) (let ((more t) (case-fold-search t) (active (or (cadr (assoc nnweb-group nnweb-group-alist)) (cons 1 0))) subject group url map) ;; Remove stuff from the beginning of results (goto-char (point-min)) (search-forward "Search Results