X-Git-Url: http://cgit.sxemacs.org/?a=blobdiff_plain;f=lisp%2Fnnweb.el;h=402d865b8504ea65f829f0480e0c470f3e765e4d;hb=a7c213c57c3e3fac5302d4c2fac24422cffa425c;hp=6d7576de4ee53de02f0d222ad96eda00e96d1ebe;hpb=81901d17becb84f18612bd2ab9b539ee48a2250e;p=gnus diff --git a/lisp/nnweb.el b/lisp/nnweb.el index 6d7576de4..402d865b8 100644 --- a/lisp/nnweb.el +++ b/lisp/nnweb.el @@ -1,7 +1,8 @@ ;;; nnweb.el --- retrieving articles via web search engines -;; Copyright (C) 1996 Free Software Foundation, Inc. +;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003 +;; Free Software Foundation, Inc. -;; Author: Lars Magne Ingebrigtsen +;; Author: Lars Magne Ingebrigtsen ;; Keywords: news ;; This file is part of GNU Emacs. @@ -23,85 +24,130 @@ ;;; Commentary: -;; Note: You need to have `url' and `w3' installed for this -;; backend to work. +;; Note: You need to have `w3' installed for some functions to work. ;;; Code: +(eval-when-compile (require 'cl)) + (require 'nnoo) (require 'message) (require 'gnus-util) -(require 'w3) -(require 'w3-forms) -(require 'url) +(require 'gnus) +(require 'nnmail) +(require 'mm-util) +(require 'mm-url) +(eval-and-compile + (ignore-errors + (require 'url))) +(autoload 'w3-parse-buffer "w3-parse") (nnoo-declare nnweb) -(defvoo nnweb-type 'dejanews - "What search engine type is being used.") +(defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/") + "Where nnweb will save its files.") + +(defvoo nnweb-type 'google + "What search engine type is being used. +Valid types include `google', `dejanews', and `gmane'.") (defvar nnweb-type-definition - '((dejanews - (article . nnweb-dejanews-wash-article) - (map . nnweb-dejanews-create-mapping) - (search . nnweb-dejanews-search) - (address . "http://search.dejanews.com/dnquery.xp")) - (reference - (article . nnweb-reference-wash-article) - (map . nnweb-reference-create-mapping) - (search . nnweb-reference-search) - (address . "http://www.reference.com/cgi-bin/pn/go")) - (altavista - (article . nnweb-altavista-wash-article) - (map . nnweb-altavista-create-mapping) - (search . nnweb-altavista-search) - (address . "http://www.altavista.digital.com/cgi-bin/query") - (id . "/cgi-bin/news?id@%s"))) + '((google + (article . ignore) + (id . "http://groups.google.com/groups?selm=%s&output=gplain") + (reference . identity) + (map . nnweb-google-create-mapping) + (search . nnweb-google-search) + (address . "http://groups.google.com/groups") + (identifier . nnweb-google-identity)) + (dejanews ;; alias of google + (article . ignore) + (id . "http://groups.google.com/groups?selm=%s&output=gplain") + (reference . identity) + (map . nnweb-google-create-mapping) + (search . nnweb-google-search) + (address . "http://groups.google.com/groups") + (identifier . nnweb-google-identity)) + (gmane + (article . nnweb-gmane-wash-article) + (id . "http://gmane.org/view.php?group=%s") + (reference . identity) + (map . nnweb-gmane-create-mapping) + (search . nnweb-gmane-search) + (address . "http://gmane.org/") + (identifier . nnweb-gmane-identity))) "Type-definition alist.") (defvoo nnweb-search nil - "Search string to feed to DejaNews.") + "Search string to feed to Google.") -(defvoo nnweb-max-hits 100 +(defvoo nnweb-max-hits 999 "Maximum number of hits to display.") +(defvoo nnweb-ephemeral-p nil + "Whether this nnweb server is ephemeral.") + ;;; Internal variables (defvoo nnweb-articles nil) (defvoo nnweb-buffer nil) +(defvoo nnweb-group-alist nil) +(defvoo nnweb-group nil) +(defvoo nnweb-hashtb nil) ;;; Interface functions (nnoo-define-basics nnweb) (deffoo nnweb-retrieve-headers (articles &optional group server fetch-old) - (nnweb-possibly-change-server server) + (nnweb-possibly-change-server group server) (save-excursion (set-buffer nntp-server-buffer) (erase-buffer) (let (article header) - (while (setq article (pop articles)) - (when (setq header (cadr (assq article nnweb-articles))) - (nnheader-insert-nov header))) + (mm-with-unibyte-current-buffer + (while (setq article (pop articles)) + (when (setq header (cadr (assq article nnweb-articles))) + (nnheader-insert-nov header)))) 'nov))) +(deffoo nnweb-request-scan (&optional group server) + (nnweb-possibly-change-server group server) + (if nnweb-ephemeral-p + (setq nnweb-hashtb (gnus-make-hashtable 4095))) + (funcall (nnweb-definition 'map)) + (unless nnweb-ephemeral-p + (nnweb-write-active) + (nnweb-write-overview group))) + (deffoo nnweb-request-group (group &optional server dont-check) - (nnweb-possibly-change-server server) - (when (or (not dont-check) - (not nnweb-articles)) - (funcall (nnweb-definition 'map))) + (nnweb-possibly-change-server nil server) + (when (and group + (not (equal group nnweb-group)) + (not nnweb-ephemeral-p)) + (setq nnweb-group group + nnweb-articles nil) + (let ((info (assoc group nnweb-group-alist))) + (when info + (setq nnweb-type (nth 2 info)) + (setq nnweb-search (nth 3 info)) + (unless dont-check + (nnweb-read-overview group))))) (cond ((not nnweb-articles) (nnheader-report 'nnweb "No matching articles")) (t - (nnheader-report 'nnweb "Opened group %s" group) - (nnheader-insert - "211 %d %d %d %s\n" (length nnweb-articles) - (caar nnweb-articles) (caar (last nnweb-articles)) - group)))) + (let ((active (if nnweb-ephemeral-p + (cons (caar nnweb-articles) + (caar (last nnweb-articles))) + (cadr (assoc group nnweb-group-alist))))) + (nnheader-report 'nnweb "Opened group %s" group) + (nnheader-insert + "211 %d %d %d %s\n" (length nnweb-articles) + (car active) (cdr active) group))))) (deffoo nnweb-close-group (group &optional server) - (nnweb-possibly-change-server server) + (nnweb-possibly-change-server group server) (when (gnus-buffer-live-p nnweb-buffer) (save-excursion (set-buffer nnweb-buffer) @@ -110,26 +156,32 @@ t) (deffoo nnweb-request-article (article &optional group server buffer) - (nnweb-possibly-change-server server) + (nnweb-possibly-change-server group server) (save-excursion (set-buffer (or buffer nntp-server-buffer)) - (let ((url (caddr (assq article nnweb-articles)))) + (let* ((header (cadr (assq article nnweb-articles))) + (url (and header (mail-header-xref header)))) (when (or (and url - (nnweb-fetch-url url)) + (mm-with-unibyte-current-buffer + (mm-url-insert url))) (and (stringp article) + (nnweb-definition 'id t) (let ((fetch (nnweb-definition 'id)) - art) + art active) (when (string-match "^<\\(.*\\)>$" article) (setq art (match-string 1 article))) - (and fetch - art - (nnweb-fetch-url - (format fetch article)))))) + (when (and fetch art) + (setq url (format fetch art)) + (mm-with-unibyte-current-buffer + (mm-url-insert url)) + (if (nnweb-definition 'reference t) + (setq article + (funcall (nnweb-definition + 'reference) article))))))) (unless nnheader-callback-function - (funcall (nnweb-definition 'article)) - (nnweb-decode-entities)) + (funcall (nnweb-definition 'article))) (nnheader-report 'nnweb "Fetched article %s" article) - t)))) + (cons group (and (numberp article) article)))))) (deffoo nnweb-close-server (&optional server) (when (and (nnweb-server-opened server) @@ -140,420 +192,398 @@ (kill-buffer nnweb-buffer))) (nnoo-close-server 'nnweb server)) +(deffoo nnweb-request-list (&optional server) + (nnweb-possibly-change-server nil server) + (save-excursion + (set-buffer nntp-server-buffer) + (nnmail-generate-active nnweb-group-alist) + t)) + (deffoo nnweb-request-update-info (group info &optional server) - (nnweb-possibly-change-server server) - (setcar (cddr info) nil)) + (nnweb-possibly-change-server group server)) (deffoo nnweb-asynchronous-p () + nil) + +(deffoo nnweb-request-create-group (group &optional server args) + (nnweb-possibly-change-server nil server) + (nnweb-request-delete-group group) + (push `(,group ,(cons 1 0) ,@args) nnweb-group-alist) + (nnweb-write-active) + t) + +(deffoo nnweb-request-delete-group (group &optional force server) + (nnweb-possibly-change-server group server) + (gnus-pull group nnweb-group-alist t) + (nnweb-write-active) + (gnus-delete-file (nnweb-overview-file group)) t) (nnoo-define-skeleton nnweb) ;;; Internal functions -(defun nnweb-definition (type) +(defun nnweb-read-overview (group) + "Read the overview of GROUP and build the map." + (when (file-exists-p (nnweb-overview-file group)) + (mm-with-unibyte-buffer + (nnheader-insert-file-contents (nnweb-overview-file group)) + (goto-char (point-min)) + (let (header) + (while (not (eobp)) + (setq header (nnheader-parse-nov)) + (forward-line 1) + (push (list (mail-header-number header) + header (mail-header-xref header)) + nnweb-articles) + (nnweb-set-hashtb header (car nnweb-articles))))))) + +(defun nnweb-write-overview (group) + "Write the overview file for GROUP." + (with-temp-file (nnweb-overview-file group) + (let ((articles nnweb-articles)) + (while articles + (nnheader-insert-nov (cadr (pop articles))))))) + +(defun nnweb-set-hashtb (header data) + (gnus-sethash (nnweb-identifier (mail-header-xref header)) + data nnweb-hashtb)) + +(defun nnweb-get-hashtb (url) + (gnus-gethash (nnweb-identifier url) nnweb-hashtb)) + +(defun nnweb-identifier (ident) + (funcall (nnweb-definition 'identifier) ident)) + +(defun nnweb-overview-file (group) + "Return the name of the overview file of GROUP." + (nnheader-concat nnweb-directory group ".overview")) + +(defun nnweb-write-active () + "Save the active file." + (gnus-make-directory nnweb-directory) + (with-temp-file (nnheader-concat nnweb-directory "active") + (prin1 `(setq nnweb-group-alist ',nnweb-group-alist) (current-buffer)))) + +(defun nnweb-read-active () + "Read the active file." + (load (nnheader-concat nnweb-directory "active") t t t)) + +(defun nnweb-definition (type &optional noerror) "Return the definition of TYPE." (let ((def (cdr (assq type (assq nnweb-type nnweb-type-definition))))) - (unless def + (when (and (not def) + (not noerror)) (error "Undefined definition %s" type)) def)) -(defun nnweb-possibly-change-server (&optional server) +(defun nnweb-possibly-change-server (&optional group server) (nnweb-init server) (when server (unless (nnweb-server-opened server) - (nnweb-open-server server)))) + (nnweb-open-server server))) + (unless nnweb-group-alist + (nnweb-read-active)) + (unless nnweb-hashtb + (setq nnweb-hashtb (gnus-make-hashtable 4095))) + (when group + (when (and (not nnweb-ephemeral-p) + (equal group nnweb-group)) + (nnweb-request-group group nil t)))) (defun nnweb-init (server) "Initialize buffers and such." (unless (gnus-buffer-live-p nnweb-buffer) (setq nnweb-buffer (save-excursion - (nnheader-set-temp-buffer - (format " *nnweb %s %s %s*" nnweb-type nnweb-search server)))))) - -(defun nnweb-fetch-url (url) - (save-excursion - (if (not nnheader-callback-function) - (let ((buf (current-buffer))) - (save-excursion - (set-buffer nnweb-buffer) - (erase-buffer) - (prog1 - (url-insert-file-contents url) - (copy-to-buffer buf (point-min) (point-max))))) - (nnweb-url-retrieve-asynch - url 'nnweb-callback (current-buffer) nnheader-callback-function) - t))) - -(defun nnweb-callback (buffer callback) - (when (gnus-buffer-live-p url-working-buffer) - (save-excursion - (set-buffer url-working-buffer) - (funcall (nnweb-definition 'article)) - (nnweb-decode-entities) - (set-buffer buffer) - (goto-char (point-max)) - (insert-buffer-substring url-working-buffer)) - (funcall callback t) - (gnus-kill-buffer url-working-buffer))) - -(defun nnweb-url-retrieve-asynch (url callback &rest data) - (let ((url-request-method "GET") - (old-asynch url-be-asynchronous) - (url-request-data nil) - (url-request-extra-headers nil) - (url-working-buffer (generate-new-buffer-name " *nnweb*"))) - (setq-default url-be-asynchronous t) - (save-excursion - (set-buffer (get-buffer-create url-working-buffer)) - (setq url-current-callback-data data - url-be-asynchronous t - url-current-callback-func callback) - (url-retrieve url)) - (setq-default url-be-asynchronous old-asynch))) - -(defun nnweb-encode-www-form-urlencoded (pairs) - "Return PAIRS encoded for forms." - (mapconcat - (function - (lambda (data) - (concat (w3-form-encode-xwfu (car data)) "=" - (w3-form-encode-xwfu (cdr data))))) pairs "&")) - -(defun nnweb-fetch-form (url pairs) - (let ((url-request-data (nnweb-encode-www-form-urlencoded pairs)) - (url-request-method 'POST) - (url-request-extra-headers - '(("Content-type" . "application/x-www-form-urlencoded")))) - (url-insert-file-contents url) - (setq buffer-file-name nil)) - t) - -(defun nnweb-decode-entities () - (goto-char (point-min)) - (while (re-search-forward "&\\([a-z]+\\);" nil t) - (replace-match (char-to-string (or (cdr (assq (intern (match-string 1)) - w3-html-entities )) - ?#)) - t t))) - -(defun nnweb-remove-markup () - (goto-char (point-min)) - (while (search-forward "" nil t) - (point-max)))) - (goto-char (point-min)) - (while (re-search-forward "<[^>]+>" nil t) - (replace-match "" t t))) + (mm-with-unibyte + (nnheader-set-temp-buffer + (format " *nnweb %s %s %s*" + nnweb-type nnweb-search server)) + (current-buffer)))))) ;;; -;;; DejaNews functions. +;;; Deja bought by google.com ;;; -(defun nnweb-dejanews-create-mapping () - "Perform the search and create an number-to-url alist." +(defun nnweb-google-wash-article () + (let ((case-fold-search t) url) + (goto-char (point-min)) + (re-search-forward "^
" nil t)
+    (narrow-to-region (point-min) (point))
+    (search-backward "" nil t)
+      (replace-match "\n"))
+    (mm-url-remove-markup)
+    (goto-char (point-min))
+    (while (re-search-forward "^[ \t]*\n" nil t)
+      (replace-match ""))
+    (goto-char (point-max))
+    (insert "\n")
+    (widen)
+    (narrow-to-region (point) (point-max))
+    (search-forward "" nil t)
+    (delete-region (point) (point-max))
+    (mm-url-remove-markup)
+    (widen)))
+
+(defun nnweb-google-parse-1 (&optional Message-ID)
+  (let ((i 0)
+	(case-fold-search t)
+	(active (cadr (assoc nnweb-group nnweb-group-alist)))
+	Subject Score Date Newsgroups From
+	map url mid)
+    (unless active
+      (push (list nnweb-group (setq active (cons 1 0))
+		  nnweb-type nnweb-search)
+	    nnweb-group-alist))
+    ;; Go through all the article hits on this page.
+    (goto-char (point-min))
+    (while (re-search-forward
+	    "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t)
+      (setq mid (match-string 2)
+	    url (format
+		 "http://groups.google.com/groups?selm=%s&output=gplain" mid))
+      (narrow-to-region (search-forward ">" nil t)
+			(search-forward "" nil t))
+      (mm-url-remove-markup)
+      (mm-url-decode-entities)
+      (setq Subject (buffer-string))
+      (goto-char (point-max))
+      (widen)
+      (forward-line 1)
+      (when (looking-at "
]+>") + (goto-char (match-end 0))) + (if (not (looking-at "]+>")) + (skip-chars-forward " \t") + (narrow-to-region (point) + (search-forward "" nil t)) + (mm-url-remove-markup) + (mm-url-decode-entities) + (setq Newsgroups (buffer-string)) + (goto-char (point-max)) + (widen) + (skip-chars-forward "- \t")) + (when (looking-at + "\\([0-9]+\\)[/ ]\\([A-Za-z]+\\)[/ ]\\([0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - ") - nil 0 (string-to-int Score) nil) - url) - map)) - ;; See whether there is a "Get next 20 hits" button here. - (if (or (not (re-search-forward - "HREF=\"\\([^\"]+\\)\">Get next" nil t)) - (>= i nnweb-max-hits)) - (setq more nil) - ;; Yup -- fetch it. - (setq more (match-string 1)) - (erase-buffer) - (url-insert-file-contents more))) - ;; Return the articles in the right order. - (setq nnweb-articles (nreverse map)))))) - -(defun nnweb-dejanews-wash-article () - (let ((case-fold-search t)) - (goto-char (point-min)) - (re-search-forward "
" nil t)
-    (delete-region (point-min) (point))
-    (re-search-forward "
" nil t) - (delete-region (point) (point-max)) - (nnweb-remove-markup) - (goto-char (point-min)) - (while (and (looking-at " *$") - (not (eobp))) - (gnus-delete-line)) - (while (looking-at "\\(^[^ ]+:\\) *") - (replace-match "\\1 " t) - (forward-line 1)) - (when (re-search-forward "\n\n+" nil t) - (replace-match "\n" t t)))) - -(defun nnweb-dejanews-search (search) - (nnweb-fetch-form - (nnweb-definition 'address) - `(("query" . ,search) - ("defaultOp" . "AND") - ("svcclass" . "dncurrent") - ("maxhits" . "100") - ("format" . "verbose") - ("threaded" . "0") - ("showsort" . "score") - ("agesign" . "1") - ("ageweight" . "1"))) + (incf i 100) + (if (or (not (re-search-forward + "
]+\\).*Next" nil t)) + (>= i nnweb-max-hits)) + (setq more nil) + ;; Yup, there are more articles + (setq more (concat "http://groups.google.com" (match-string 1))) + (when more + (erase-buffer) + (mm-url-insert more)))) + ;; Return the articles in the right order. + (setq nnweb-articles + (sort nnweb-articles 'car-less-than-car)))))) + +(defun nnweb-google-search (search) + (mm-url-insert + (concat + (nnweb-definition 'address) + "?" + (mm-url-encode-www-form-urlencoded + `(("q" . ,search) + ("num". "100") + ("hq" . "") + ("hl" . "") + ("lr" . "") + ("safe" . "off") + ("sites" . "groups"))))) t) +(defun nnweb-google-identity (url) + "Return an unique identifier based on URL." + (if (string-match "selm=\\([^ &>]+\\)" url) + (match-string 1 url) + url)) + ;;; -;;; InReference +;;; gmane.org ;;; - -(defun nnweb-reference-create-mapping () - "Perform the search and create an number-to-url alist." +(defun nnweb-gmane-create-mapping () + "Perform the search and create a number-to-url alist." (save-excursion (set-buffer nnweb-buffer) (erase-buffer) (when (funcall (nnweb-definition 'search) nnweb-search) - (let ((i 0) - (more t) + (let ((more t) (case-fold-search t) - Subject Score Date Newsgroups From Message-ID - map url) - (while more - ;; Go through all the article hits on this page. - (goto-char (point-min)) - (search-forward "
" nil t) - (delete-region (point-min) (point)) - ;(nnweb-decode-entities) - (goto-char (point-min)) - (while (re-search-forward "^ +[0-9]+\\." nil t) - (narrow-to-region - (point) - (if (re-search-forward "^$" nil t) - (match-beginning 0) - (point-max))) - (goto-char (point-min)) - (when (looking-at ".*href=\"\\([^\"]+\\)\"") - (setq url (match-string 1))) - (nnweb-remove-markup) - (goto-char (point-min)) - (while (search-forward "\t" nil t) - (replace-match " ")) - (goto-char (point-min)) - (while (re-search-forward "^\\([^:]+\\): \\(.*\\)$" nil t) - (set (intern (match-string 1)) (match-string 2))) - (widen) - (search-forward "" nil t) + (active (or (cadr (assoc nnweb-group nnweb-group-alist)) + (cons 1 0))) + subject group url + map) + ;; Remove stuff from the beginning of results + (goto-char (point-min)) + (search-forward "Search Results