X-Git-Url: http://cgit.sxemacs.org/?p=gnus;a=blobdiff_plain;f=lisp%2Fnnweb.el;h=7863bbc2c7fddc3f14fcca6285e7d3b16bea99f7;hp=4cc1fc9dc53ce19a0c2ebd23969ffb80d1d8c2ff;hb=20bc985a3232ebba106d335afcfd6b596bb8efba;hpb=9eb9d4904cd4cf0d9317355ebcb430390833babc diff --git a/lisp/nnweb.el b/lisp/nnweb.el index 4cc1fc9dc..7863bbc2c 100644 --- a/lisp/nnweb.el +++ b/lisp/nnweb.el @@ -1,7 +1,8 @@ ;;; nnweb.el --- retrieving articles via web search engines -;; Copyright (C) 1996,97 Free Software Foundation, Inc. +;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004 +;; Free Software Foundation, Inc. -;; Author: Lars Magne Ingebrigtsen +;; Author: Lars Magne Ingebrigtsen ;; Keywords: news ;; This file is part of GNU Emacs. @@ -18,13 +19,12 @@ ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. +;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. ;;; Commentary: -;; Note: You need to have `url' and `w3' installed for this -;; backend to work. +;; Note: You need to have `w3' installed for some functions to work. ;;; Code: @@ -34,46 +34,56 @@ (require 'message) (require 'gnus-util) (require 'gnus) -(require 'w3) -(require 'url) (require 'nnmail) -(ignore-errors - (require 'w3-forms)) +(require 'mm-util) +(require 'mm-url) +(eval-and-compile + (ignore-errors + (require 'url))) +(autoload 'w3-parse-buffer "w3-parse") (nnoo-declare nnweb) (defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/") "Where nnweb will save its files.") -(defvoo nnweb-type 'dejanews - "What search engine type is being used.") +(defvoo nnweb-type 'google + "What search engine type is being used. +Valid types include `google', `dejanews', and `gmane'.") (defvar nnweb-type-definition - '((dejanews - (article . nnweb-dejanews-wash-article) - (map . nnweb-dejanews-create-mapping) - (search . nnweb-dejanews-search) - (address . "http://xp9.dejanews.com/dnquery.xp") - (identifier . nnweb-dejanews-identity)) - (reference - (article . nnweb-reference-wash-article) - (map . nnweb-reference-create-mapping) - (search . nnweb-reference-search) - (address . "http://www.reference.com/cgi-bin/pn/go") - (identifier . identity)) - (altavista - (article . nnweb-altavista-wash-article) - (map . nnweb-altavista-create-mapping) - (search . nnweb-altavista-search) - (address . "http://www.altavista.digital.com/cgi-bin/query") - (id . "/cgi-bin/news?id@%s") - (identifier . identity))) + '((google + (article . ignore) + (id . "http://groups.google.de/groups?selm=%s&output=gplain") + (reference . identity) + (map . nnweb-google-create-mapping) + (search . nnweb-google-search) + (address . "http://groups.google.de/groups") + (base . "http://groups.google.de") + (identifier . nnweb-google-identity)) + (dejanews ;; alias of google + (article . ignore) + (id . "http://groups.google.com/groups?selm=%s&output=gplain") + (reference . identity) + (map . nnweb-google-create-mapping) + (search . nnweb-google-search) + (address . "http://groups.google.com/groups") + (base . "http://groups.google.com") + (identifier . nnweb-google-identity)) + (gmane + (article . nnweb-gmane-wash-article) + (id . "http://gmane.org/view.php?group=%s") + (reference . identity) + (map . nnweb-gmane-create-mapping) + (search . nnweb-gmane-search) + (address . "http://gmane.org/") + (identifier . nnweb-gmane-identity))) "Type-definition alist.") (defvoo nnweb-search nil - "Search string to feed to DejaNews.") + "Search string to feed to Google.") -(defvoo nnweb-max-hits 100 +(defvoo nnweb-max-hits 999 "Maximum number of hits to display.") (defvoo nnweb-ephemeral-p nil @@ -97,14 +107,16 @@ (set-buffer nntp-server-buffer) (erase-buffer) (let (article header) - (while (setq article (pop articles)) - (when (setq header (cadr (assq article nnweb-articles))) - (nnheader-insert-nov header))) + (mm-with-unibyte-current-buffer + (while (setq article (pop articles)) + (when (setq header (cadr (assq article nnweb-articles))) + (nnheader-insert-nov header)))) 'nov))) (deffoo nnweb-request-scan (&optional group server) (nnweb-possibly-change-server group server) - (setq nnweb-hashtb (gnus-make-hashtable 4095)) + (if nnweb-ephemeral-p + (setq nnweb-hashtb (gnus-make-hashtable 4095))) (funcall (nnweb-definition 'map)) (unless nnweb-ephemeral-p (nnweb-write-active) @@ -115,12 +127,14 @@ (when (and group (not (equal group nnweb-group)) (not nnweb-ephemeral-p)) + (setq nnweb-group group + nnweb-articles nil) (let ((info (assoc group nnweb-group-alist))) - (setq nnweb-group group) - (setq nnweb-type (nth 2 info)) - (setq nnweb-search (nth 3 info)) - (unless dont-check - (nnweb-read-overview group)))) + (when info + (setq nnweb-type (nth 2 info)) + (setq nnweb-search (nth 3 info)) + (unless dont-check + (nnweb-read-overview group))))) (cond ((not nnweb-articles) (nnheader-report 'nnweb "No matching articles")) @@ -150,22 +164,26 @@ (let* ((header (cadr (assq article nnweb-articles))) (url (and header (mail-header-xref header)))) (when (or (and url - (nnweb-fetch-url url)) + (mm-with-unibyte-current-buffer + (mm-url-insert url))) (and (stringp article) (nnweb-definition 'id t) (let ((fetch (nnweb-definition 'id)) - art) + art active) (when (string-match "^<\\(.*\\)>$" article) (setq art (match-string 1 article))) - (and fetch - art - (nnweb-fetch-url - (format fetch article)))))) + (when (and fetch art) + (setq url (format fetch art)) + (mm-with-unibyte-current-buffer + (mm-url-insert url)) + (if (nnweb-definition 'reference t) + (setq article + (funcall (nnweb-definition + 'reference) article))))))) (unless nnheader-callback-function - (funcall (nnweb-definition 'article)) - (nnweb-decode-entities)) + (funcall (nnweb-definition 'article))) (nnheader-report 'nnweb "Fetched article %s" article) - t)))) + (cons group (and (numberp article) article)))))) (deffoo nnweb-close-server (&optional server) (when (and (nnweb-server-opened server) @@ -184,12 +202,10 @@ t)) (deffoo nnweb-request-update-info (group info &optional server) - (nnweb-possibly-change-server group server) - ;;(setcar (cddr info) nil) - ) + (nnweb-possibly-change-server group server)) (deffoo nnweb-asynchronous-p () - t) + nil) (deffoo nnweb-request-create-group (group &optional server args) (nnweb-possibly-change-server nil server) @@ -200,7 +216,8 @@ (deffoo nnweb-request-delete-group (group &optional force server) (nnweb-possibly-change-server group server) - (gnus-delete-assoc group nnweb-group-alist) + (gnus-pull group nnweb-group-alist t) + (nnweb-write-active) (gnus-delete-file (nnweb-overview-file group)) t) @@ -211,7 +228,7 @@ (defun nnweb-read-overview (group) "Read the overview of GROUP and build the map." (when (file-exists-p (nnweb-overview-file group)) - (nnheader-temp-write nil + (mm-with-unibyte-buffer (nnheader-insert-file-contents (nnweb-overview-file group)) (goto-char (point-min)) (let (header) @@ -225,7 +242,7 @@ (defun nnweb-write-overview (group) "Write the overview file for GROUP." - (nnheader-temp-write (nnweb-overview-file group) + (with-temp-file (nnweb-overview-file group) (let ((articles nnweb-articles)) (while articles (nnheader-insert-nov (cadr (pop articles))))))) @@ -246,7 +263,8 @@ (defun nnweb-write-active () "Save the active file." - (nnheader-temp-write (nnheader-concat nnweb-directory "active") + (gnus-make-directory nnweb-directory) + (with-temp-file (nnheader-concat nnweb-directory "active") (prin1 `(setq nnweb-group-alist ',nnweb-group-alist) (current-buffer)))) (defun nnweb-read-active () @@ -268,9 +286,11 @@ (nnweb-open-server server))) (unless nnweb-group-alist (nnweb-read-active)) + (unless nnweb-hashtb + (setq nnweb-hashtb (gnus-make-hashtable 4095))) (when group (when (and (not nnweb-ephemeral-p) - (not (equal group nnweb-group))) + (equal group nnweb-group)) (nnweb-request-group group nil t)))) (defun nnweb-init (server) @@ -278,413 +298,296 @@ (unless (gnus-buffer-live-p nnweb-buffer) (setq nnweb-buffer (save-excursion - (nnheader-set-temp-buffer - (format " *nnweb %s %s %s*" nnweb-type nnweb-search server)))))) - -(defun nnweb-fetch-url (url) - (save-excursion - (if (not nnheader-callback-function) - (let ((buf (current-buffer))) - (save-excursion - (set-buffer nnweb-buffer) - (erase-buffer) - (url-insert-file-contents url) - (copy-to-buffer buf (point-min) (point-max)) - t)) - (nnweb-url-retrieve-asynch - url 'nnweb-callback (current-buffer) nnheader-callback-function) - t))) - -(defun nnweb-callback (buffer callback) - (when (gnus-buffer-live-p url-working-buffer) - (save-excursion - (set-buffer url-working-buffer) - (funcall (nnweb-definition 'article)) - (nnweb-decode-entities) - (set-buffer buffer) - (goto-char (point-max)) - (insert-buffer-substring url-working-buffer)) - (funcall callback t) - (gnus-kill-buffer url-working-buffer))) - -(defun nnweb-url-retrieve-asynch (url callback &rest data) - (let ((url-request-method "GET") - (old-asynch url-be-asynchronous) - (url-request-data nil) - (url-request-extra-headers nil) - (url-working-buffer (generate-new-buffer-name " *nnweb*"))) - (setq-default url-be-asynchronous t) - (save-excursion - (set-buffer (get-buffer-create url-working-buffer)) - (setq url-current-callback-data data - url-be-asynchronous t - url-current-callback-func callback) - (url-retrieve url)) - (setq-default url-be-asynchronous old-asynch))) - -(defun nnweb-encode-www-form-urlencoded (pairs) - "Return PAIRS encoded for forms." - (mapconcat - (function - (lambda (data) - (concat (w3-form-encode-xwfu (car data)) "=" - (w3-form-encode-xwfu (cdr data))))) - pairs "&")) - -(defun nnweb-fetch-form (url pairs) - (let ((url-request-data (nnweb-encode-www-form-urlencoded pairs)) - (url-request-method "POST") - (url-request-extra-headers - '(("Content-type" . "application/x-www-form-urlencoded")))) - (url-insert-file-contents url) - (setq buffer-file-name nil)) - t) - -(defun nnweb-decode-entities () - (goto-char (point-min)) - (while (re-search-forward "&\\([a-z]+\\);" nil t) - (replace-match (char-to-string (or (cdr (assq (intern (match-string 1)) - w3-html-entities)) - ?#)) - t t))) - -(defun nnweb-remove-markup () - (goto-char (point-min)) - (while (search-forward "" nil t) - (point-max)))) - (goto-char (point-min)) - (while (re-search-forward "<[^>]+>" nil t) - (replace-match "" t t))) + (mm-with-unibyte + (nnheader-set-temp-buffer + (format " *nnweb %s %s %s*" + nnweb-type nnweb-search server)) + (current-buffer)))))) ;;; -;;; DejaNews functions. +;;; Deja bought by google.com ;;; -(defun nnweb-dejanews-create-mapping () - "Perform the search and create an number-to-url alist." +(defun nnweb-google-wash-article () + (let ((case-fold-search t) url) + (goto-char (point-min)) + (re-search-forward "^
" nil t)
+    (narrow-to-region (point-min) (point))
+    (search-backward "" nil t)
+      (replace-match "\n"))
+    (mm-url-remove-markup)
+    (goto-char (point-min))
+    (while (re-search-forward "^[ \t]*\n" nil t)
+      (replace-match ""))
+    (goto-char (point-max))
+    (insert "\n")
+    (widen)
+    (narrow-to-region (point) (point-max))
+    (search-forward "" nil t)
+    (delete-region (point) (point-max))
+    (mm-url-remove-markup)
+    (widen)))
+
+(defun nnweb-google-parse-1 (&optional Message-ID)
+  (let ((i 0)
+	(case-fold-search t)
+	(active (cadr (assoc nnweb-group nnweb-group-alist)))
+	Subject Score Date Newsgroups From
+	map url mid)
+    (unless active
+      (push (list nnweb-group (setq active (cons 1 0))
+		  nnweb-type nnweb-search)
+	    nnweb-group-alist))
+    ;; Go through all the article hits on this page.
+    (goto-char (point-min))
+    (while (re-search-forward
+	    "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t)
+      (setq mid (match-string 2)
+	    url (format
+		 (nnweb-definition 'id) mid))
+      (narrow-to-region (search-forward ">" nil t)
+			(search-forward "" nil t))
+      (mm-url-remove-markup)
+      (mm-url-decode-entities)
+      (setq Subject (buffer-string))
+      (goto-char (point-max))
+      (widen)
+      (forward-line 2)
+      (when (looking-at "
]+>") + (goto-char (match-end 0))) + (if (not (looking-at "]+>")) + (skip-chars-forward " \t") + (narrow-to-region (point) + (search-forward "" nil t)) + (mm-url-remove-markup) + (mm-url-decode-entities) + (setq Newsgroups (buffer-string)) + (goto-char (point-max)) + (widen) + (skip-chars-forward "- \t")) + (when (looking-at + "\\([0-9]+\\)[/ ]\\([A-Za-z]+\\)[/ ]\\([0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - ") - nil 0 (string-to-int Score) url)) - map) - (nnweb-set-hashtb (cadar map) (car map)))) - ;; See whether there is a "Get next 20 hits" button here. - (if (or (not (re-search-forward - "HREF=\"\\([^\"]+\\)\">Get next" nil t)) - (>= i nnweb-max-hits)) - (setq more nil) - ;; Yup -- fetch it. - (setq more (match-string 1)) - (erase-buffer) - (url-insert-file-contents more))) - ;; Return the articles in the right order. - (setq nnweb-articles - (sort (nconc nnweb-articles map) 'car-less-than-car)))))) + (incf i 100) + (if (or (not (re-search-forward + "
]+\\).*Next" nil t)) + (>= i nnweb-max-hits)) + (setq more nil) + ;; Yup, there are more articles + (setq more (concat (nnweb-definition 'base) (match-string 1))) + (when more + (erase-buffer) + (mm-url-insert more)))) + ;; Return the articles in the right order. + (setq nnweb-articles + (sort nnweb-articles 'car-less-than-car)))))) -(defun nnweb-dejanews-wash-article () - (let ((case-fold-search t)) - (goto-char (point-min)) - (re-search-forward "
" nil t)
-    (delete-region (point-min) (point))
-    (re-search-forward "
" nil t) - (delete-region (point) (point-max)) - (nnweb-remove-markup) - (goto-char (point-min)) - (while (and (looking-at " *$") - (not (eobp))) - (gnus-delete-line)) - (while (looking-at "\\(^[^ ]+:\\) *") - (replace-match "\\1 " t) - (forward-line 1)) - (when (re-search-forward "\n\n+" nil t) - (replace-match "\n" t t)) - (goto-char (point-min)) - (when (search-forward "[More Headers]" nil t) - (replace-match "" t t)))) - -(defun nnweb-dejanews-search (search) - (nnweb-fetch-form - (nnweb-definition 'address) - `(("query" . ,search) - ("defaultOp" . "AND") - ("svcclass" . "dncurrent") - ("maxhits" . "100") - ("format" . "verbose") - ("threaded" . "0") - ("showsort" . "score") - ("agesign" . "1") - ("ageweight" . "1"))) +(defun nnweb-google-search (search) + (mm-url-insert + (concat + (nnweb-definition 'address) + "?" + (mm-url-encode-www-form-urlencoded + `(("q" . ,search) + ("num" . "100") + ("hq" . "") + ("hl" . "en") + ("lr" . "") + ("safe" . "off") + ("sites" . "groups"))))) t) -(defun nnweb-dejanews-identity (url) +(defun nnweb-google-identity (url) "Return an unique identifier based on URL." - (if (string-match "recnum=\\([0-9]+\\)" url) + (if (string-match "selm=\\([^ &>]+\\)" url) (match-string 1 url) url)) ;;; -;;; InReference +;;; gmane.org ;;; - -(defun nnweb-reference-create-mapping () - "Perform the search and create an number-to-url alist." +(defun nnweb-gmane-create-mapping () + "Perform the search and create a number-to-url alist." (save-excursion (set-buffer nnweb-buffer) (erase-buffer) (when (funcall (nnweb-definition 'search) nnweb-search) - (let ((i 0) - (more t) + (let ((more t) (case-fold-search t) (active (or (cadr (assoc nnweb-group nnweb-group-alist)) (cons 1 0))) - Subject Score Date Newsgroups From Message-ID - map url) - (while more - ;; Go through all the article hits on this page. - (goto-char (point-min)) - (search-forward "
" nil t) - (delete-region (point-min) (point)) - ;(nnweb-decode-entities) - (goto-char (point-min)) - (while (re-search-forward "^ +[0-9]+\\." nil t) - (narrow-to-region - (point) - (if (re-search-forward "^$" nil t) - (match-beginning 0) - (point-max))) - (goto-char (point-min)) - (when (looking-at ".*href=\"\\([^\"]+\\)\"") - (setq url (match-string 1))) - (nnweb-remove-markup) - (goto-char (point-min)) - (while (search-forward "\t" nil t) - (replace-match " ")) - (goto-char (point-min)) - (while (re-search-forward "^\\([^:]+\\): \\(.*\\)$" nil t) - (set (intern (match-string 1)) (match-string 2))) - (widen) - (search-forward "" nil t) - (incf i) - (unless (nnweb-get-hashtb url) - (push - (list - (incf (cdr active)) - (make-full-mail-header - (cdr active) (concat "(" Newsgroups ") " Subject) From Date - Message-ID - nil 0 (string-to-int Score) url)) - map) - (nnweb-set-hashtb (cadar map) (car map)))) - (setq more nil)) + subject group url + map) + ;; Remove stuff from the beginning of results + (goto-char (point-min)) + (search-forward "Search Results