X-Git-Url: http://cgit.sxemacs.org/?p=gnus;a=blobdiff_plain;f=lisp%2Fnnweb.el;h=5600ad88e34e3e7181b156b6b36cba35e583e329;hp=beba951b522f5cea45150d54fae84a3aff9a4837;hb=786d05e27f23ae1e1254d90a50f61487e168c616;hpb=1ad0fb802b45c86efafcbddcc637677503c838c8 diff --git a/lisp/nnweb.el b/lisp/nnweb.el index beba951b5..5600ad88e 100644 --- a/lisp/nnweb.el +++ b/lisp/nnweb.el @@ -1,6 +1,7 @@ ;;; nnweb.el --- retrieving articles via web search engines -;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001 -;; Free Software Foundation, Inc. + +;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, +;; 2004, 2005 Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen ;; Keywords: news @@ -19,13 +20,12 @@ ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. +;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. ;;; Commentary: -;; Note: You need to have `url' and `w3' installed for this -;; backend to work. +;; Note: You need to have `w3' installed for some functions to work. ;;; Code: @@ -37,69 +37,52 @@ (require 'gnus) (require 'nnmail) (require 'mm-util) -(eval-when-compile +(require 'mm-url) +(eval-and-compile (ignore-errors - (require 'w3) - (require 'url) - (require 'w3-forms))) - -;; Report failure to find w3 at load time if appropriate. -(unless noninteractive - (eval '(progn - (require 'w3) - (require 'url) - (require 'w3-forms)))) + (require 'url))) +(autoload 'w3-parse-buffer "w3-parse") (nnoo-declare nnweb) (defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/") "Where nnweb will save its files.") -(defvoo nnweb-type 'dejanews +(defvoo nnweb-type 'google "What search engine type is being used. -Valid types include `dejanews', `dejanewsold', `reference', -and `altavista'.") +Valid types include `google', `dejanews', and `gmane'.") (defvar nnweb-type-definition - '( - (dejanews ;; bought by google.com - (article . nnweb-google-wash-article) - (id . "http://groups.google.com/groups?as_umsgid=%s") - (reference . nnweb-google-reference) + '((google + (article . ignore) + (id . "http://groups.google.de/groups?selm=%s&output=gplain") + (reference . identity) + (map . nnweb-google-create-mapping) + (search . nnweb-google-search) + (address . "http://groups.google.de/groups") + (base . "http://groups.google.de") + (identifier . nnweb-google-identity)) + (dejanews ;; alias of google + (article . ignore) + (id . "http://groups.google.com/groups?selm=%s&output=gplain") + (reference . identity) (map . nnweb-google-create-mapping) (search . nnweb-google-search) (address . "http://groups.google.com/groups") + (base . "http://groups.google.com") (identifier . nnweb-google-identity)) -;;; (dejanews -;;; (article . ignore) -;;; (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text") -;;; (map . nnweb-dejanews-create-mapping) -;;; (search . nnweb-dejanews-search) -;;; (address . "http://www.deja.com/=dnc/qs.xp") -;;; (identifier . nnweb-dejanews-identity)) -;;; (dejanewsold -;;; (article . ignore) -;;; (map . nnweb-dejanews-create-mapping) -;;; (search . nnweb-dejanewsold-search) -;;; (address . "http://www.deja.com/dnquery.xp") -;;; (identifier . nnweb-dejanews-identity)) - (reference - (article . nnweb-reference-wash-article) - (map . nnweb-reference-create-mapping) - (search . nnweb-reference-search) - (address . "http://www.reference.com/cgi-bin/pn/go") - (identifier . identity)) - (altavista - (article . nnweb-altavista-wash-article) - (map . nnweb-altavista-create-mapping) - (search . nnweb-altavista-search) - (address . "http://www.altavista.digital.com/cgi-bin/query") - (id . "/cgi-bin/news?id@%s") - (identifier . identity))) + (gmane + (article . nnweb-gmane-wash-article) + (id . "http://gmane.org/view.php?group=%s") + (reference . identity) + (map . nnweb-gmane-create-mapping) + (search . nnweb-gmane-search) + (address . "http://gmane.org/") + (identifier . nnweb-gmane-identity))) "Type-definition alist.") (defvoo nnweb-search nil - "Search string to feed to DejaNews.") + "Search string to feed to Google.") (defvoo nnweb-max-hits 999 "Maximum number of hits to display.") @@ -133,6 +116,8 @@ and `altavista'.") (deffoo nnweb-request-scan (&optional group server) (nnweb-possibly-change-server group server) + (if nnweb-ephemeral-p + (setq nnweb-hashtb (gnus-make-hashtable 4095))) (funcall (nnweb-definition 'map)) (unless nnweb-ephemeral-p (nnweb-write-active) @@ -181,7 +166,7 @@ and `altavista'.") (url (and header (mail-header-xref header)))) (when (or (and url (mm-with-unibyte-current-buffer - (nnweb-fetch-url url))) + (mm-url-insert url))) (and (stringp article) (nnweb-definition 'id t) (let ((fetch (nnweb-definition 'id)) @@ -189,16 +174,15 @@ and `altavista'.") (when (string-match "^<\\(.*\\)>$" article) (setq art (match-string 1 article))) (when (and fetch art) - (setq url (format fetch article)) + (setq url (format fetch art)) (mm-with-unibyte-current-buffer - (nnweb-fetch-url url)) + (mm-url-insert url)) (if (nnweb-definition 'reference t) (setq article - (funcall (nnweb-definition + (funcall (nnweb-definition 'reference) article))))))) (unless nnheader-callback-function - (funcall (nnweb-definition 'article)) - (nnweb-decode-entities)) + (funcall (nnweb-definition 'article))) (nnheader-report 'nnweb "Fetched article %s" article) (cons group (and (numberp article) article)))))) @@ -222,7 +206,7 @@ and `altavista'.") (nnweb-possibly-change-server group server)) (deffoo nnweb-asynchronous-p () - t) + nil) (deffoo nnweb-request-create-group (group &optional server args) (nnweb-possibly-change-server nil server) @@ -303,10 +287,11 @@ and `altavista'.") (nnweb-open-server server))) (unless nnweb-group-alist (nnweb-read-active)) + (unless nnweb-hashtb + (setq nnweb-hashtb (gnus-make-hashtable 4095))) (when group (when (and (not nnweb-ephemeral-p) - (not (equal group nnweb-group))) - (setq nnweb-hashtb (gnus-make-hashtable 4095)) + (equal group nnweb-group)) (nnweb-request-group group nil t)))) (defun nnweb-init (server) @@ -320,381 +305,6 @@ and `altavista'.") nnweb-type nnweb-search server)) (current-buffer)))))) -(defun nnweb-fetch-url (url) - (let (buf) - (save-excursion - (if (not nnheader-callback-function) - (progn - (with-temp-buffer - (mm-enable-multibyte) - (let ((coding-system-for-read 'binary) - (coding-system-for-write 'binary) - (default-process-coding-system 'binary)) - (nnweb-insert url)) - (setq buf (buffer-string))) - (erase-buffer) - (insert buf) - t) - (nnweb-url-retrieve-asynch - url 'nnweb-callback (current-buffer) nnheader-callback-function) - t)))) - -(defun nnweb-callback (buffer callback) - (when (gnus-buffer-live-p url-working-buffer) - (save-excursion - (set-buffer url-working-buffer) - (funcall (nnweb-definition 'article)) - (nnweb-decode-entities) - (set-buffer buffer) - (goto-char (point-max)) - (insert-buffer-substring url-working-buffer)) - (funcall callback t) - (gnus-kill-buffer url-working-buffer))) - -(defun nnweb-url-retrieve-asynch (url callback &rest data) - (let ((url-request-method "GET") - (old-asynch url-be-asynchronous) - (url-request-data nil) - (url-request-extra-headers nil) - (url-working-buffer (generate-new-buffer-name " *nnweb*"))) - (setq-default url-be-asynchronous t) - (save-excursion - (set-buffer (get-buffer-create url-working-buffer)) - (setq url-current-callback-data data - url-be-asynchronous t - url-current-callback-func callback) - (url-retrieve url nil)) - (setq-default url-be-asynchronous old-asynch))) - -(if (fboundp 'url-retrieve-synchronously) - (defun nnweb-url-retrieve-asynch (url callback &rest data) - (url-retrieve url callback data))) - -;;; -;;; DejaNews functions. -;;; - -(defun nnweb-dejanews-create-mapping () - "Perform the search and create an number-to-url alist." - (save-excursion - (set-buffer nnweb-buffer) - (erase-buffer) - (when (funcall (nnweb-definition 'search) nnweb-search) - (let ((i 0) - (more t) - (case-fold-search t) - (active (or (cadr (assoc nnweb-group nnweb-group-alist)) - (cons 1 0))) - subject date from - map url parse a table group text) - (while more - ;; Go through all the article hits on this page. - (goto-char (point-min)) - (setq parse (w3-parse-buffer (current-buffer)) - table (nth 1 (nnweb-parse-find-all 'table parse))) - (dolist (row (nth 2 (car (nth 2 table)))) - (setq a (nnweb-parse-find 'a row) - url (cdr (assq 'href (nth 1 a))) - text (nreverse (nnweb-text row))) - (when a - (setq subject (nth 4 text) - group (nth 2 text) - date (nth 1 text) - from (nth 0 text)) - (if (string-match "\\([0-9]+\\)/\\([0-9]+\\)/\\([0-9]+\\)" date) - (setq date (format "%s %s 00:00:00 %s" - (car (rassq (string-to-number - (match-string 2 date)) - parse-time-months)) - (match-string 3 date) - (match-string 1 date))) - (setq date "Jan 1 00:00:00 0000")) - (incf i) - (setq url (concat url "&fmt=text")) - (when (string-match "&context=[^&]+" url) - (setq url (replace-match "" t t url))) - (unless (nnweb-get-hashtb url) - (push - (list - (incf (cdr active)) - (make-full-mail-header - (cdr active) (concat subject " (" group ")") from date - (concat "<" (nnweb-identifier url) "@dejanews>") - nil 0 0 url)) - map) - (nnweb-set-hashtb (cadar map) (car map))))) - ;; See whether there is a "Get next 20 hits" button here. - (goto-char (point-min)) - (if (or (not (re-search-forward - "HREF=\"\\([^\"]+\\)\"[<>b]+Next result" nil t)) - (>= i nnweb-max-hits)) - (setq more nil) - ;; Yup -- fetch it. - (setq more (match-string 1)) - (erase-buffer) - (url-insert-file-contents more))) - ;; Return the articles in the right order. - (setq nnweb-articles - (sort (nconc nnweb-articles map) 'car-less-than-car)))))) - -(defun nnweb-dejanews-search (search) - (nnweb-insert - (concat - (nnweb-definition 'address) - "?" - (nnweb-encode-www-form-urlencoded - `(("ST" . "PS") - ("svcclass" . "dnyr") - ("QRY" . ,search) - ("defaultOp" . "AND") - ("DBS" . "1") - ("OP" . "dnquery.xp") - ("LNG" . "ALL") - ("maxhits" . "100") - ("threaded" . "0") - ("format" . "verbose2") - ("showsort" . "date") - ("agesign" . "1") - ("ageweight" . "1"))))) - t) - -(defun nnweb-dejanewsold-search (search) - (nnweb-fetch-form - (nnweb-definition 'address) - `(("query" . ,search) - ("defaultOp" . "AND") - ("svcclass" . "dnold") - ("maxhits" . "100") - ("format" . "verbose2") - ("threaded" . "0") - ("showsort" . "date") - ("agesign" . "1") - ("ageweight" . "1"))) - t) - -(defun nnweb-dejanews-identity (url) - "Return an unique identifier based on URL." - (if (string-match "AN=\\([0-9]+\\)" url) - (match-string 1 url) - url)) - -;;; -;;; InReference -;;; - -(defun nnweb-reference-create-mapping () - "Perform the search and create an number-to-url alist." - (save-excursion - (set-buffer nnweb-buffer) - (erase-buffer) - (when (funcall (nnweb-definition 'search) nnweb-search) - (let ((i 0) - (more t) - (case-fold-search t) - (active (or (cadr (assoc nnweb-group nnweb-group-alist)) - (cons 1 0))) - Subject Score Date Newsgroups From Message-ID - map url) - (while more - ;; Go through all the article hits on this page. - (goto-char (point-min)) - (search-forward "
" nil t) - (delete-region (point-min) (point)) - (goto-char (point-min)) - (while (re-search-forward "^ +[0-9]+\\." nil t) - (narrow-to-region - (point) - (if (re-search-forward "^$" nil t) - (match-beginning 0) - (point-max))) - (goto-char (point-min)) - (when (looking-at ".*href=\"\\([^\"]+\\)\"") - (setq url (match-string 1))) - (nnweb-remove-markup) - (goto-char (point-min)) - (while (search-forward "\t" nil t) - (replace-match " ")) - (goto-char (point-min)) - (while (re-search-forward "^\\([^:]+\\): \\(.*\\)$" nil t) - (set (intern (match-string 1)) (match-string 2))) - (widen) - (search-forward "" nil t) - (incf i) - (unless (nnweb-get-hashtb url) - (push - (list - (incf (cdr active)) - (make-full-mail-header - (cdr active) (concat "(" Newsgroups ") " Subject) From Date - Message-ID - nil 0 (string-to-int Score) url)) - map) - (nnweb-set-hashtb (cadar map) (car map)))) - (setq more nil)) - ;; Return the articles in the right order. - (setq nnweb-articles - (sort (nconc nnweb-articles map) 'car-less-than-car)))))) - -(defun nnweb-reference-wash-article () - (let ((case-fold-search t)) - (goto-char (point-min)) - (re-search-forward "^
" nil t) - (delete-region (point-min) (point)) - (search-forward "
" nil t)
-    (forward-line -1)
-    (let ((body (point-marker)))
-      (search-forward "
" nil t) - (delete-region (point) (point-max)) - (nnweb-remove-markup) - (goto-char (point-min)) - (while (looking-at " *$") - (gnus-delete-line)) - (narrow-to-region (point-min) body) - (while (and (re-search-forward "^$" nil t) - (not (eobp))) - (gnus-delete-line)) - (goto-char (point-min)) - (while (looking-at "\\(^[^ ]+:\\) *") - (replace-match "\\1 " t) - (forward-line 1)) - (goto-char (point-min)) - (when (re-search-forward "^References:" nil t) - (narrow-to-region - (point) (if (re-search-forward "^$\\|^[^:]+:" nil t) - (match-beginning 0) - (point-max))) - (goto-char (point-min)) - (while (not (eobp)) - (unless (looking-at "References") - (insert "\t") - (forward-line 1))) - (goto-char (point-min)) - (while (search-forward "," nil t) - (replace-match " " t t))) - (widen) - (set-marker body nil)))) - -(defun nnweb-reference-search (search) - (url-insert-file-contents - (concat - (nnweb-definition 'address) - "?" - (nnweb-encode-www-form-urlencoded - `(("search" . "advanced") - ("querytext" . ,search) - ("subj" . "") - ("name" . "") - ("login" . "") - ("host" . "") - ("organization" . "") - ("groups" . "") - ("keywords" . "") - ("choice" . "Search") - ("startmonth" . "Jul") - ("startday" . "25") - ("startyear" . "1996") - ("endmonth" . "Aug") - ("endday" . "24") - ("endyear" . "1996") - ("mode" . "Quick") - ("verbosity" . "Verbose") - ("ranking" . "Relevance") - ("first" . "1") - ("last" . "25") - ("score" . "50"))))) - (setq buffer-file-name nil) - t) - -;;; -;;; Alta Vista -;;; - -(defun nnweb-altavista-create-mapping () - "Perform the search and create an number-to-url alist." - (save-excursion - (set-buffer nnweb-buffer) - (erase-buffer) - (let ((part 0)) - (when (funcall (nnweb-definition 'search) nnweb-search part) - (let ((i 0) - (more t) - (case-fold-search t) - (active (or (cadr (assoc nnweb-group nnweb-group-alist)) - (cons 1 0))) - subject date from id group - map url) - (while more - ;; Go through all the article hits on this page. - (goto-char (point-min)) - (search-forward "
" nil t) - (delete-region (point-min) (match-beginning 0)) - (goto-char (point-min)) - (while (search-forward "
" nil t) - (replace-match "\n")) - (nnweb-decode-entities) - (goto-char (point-min)) - (while (re-search-forward ".*href=\"\\([^\"]+\\)\">\\([^>]*\\)
\\([^-]+\\)- \\([^<]+\\)<.*href=\"news:\\([^\"]+\\)\">.*\">\\(.+\\)

" - nil t) - (setq url (match-string 1) - subject (match-string 2) - date (match-string 3) - group (match-string 4) - id (concat "<" (match-string 5) ">") - from (match-string 6)) - (incf i) - (unless (nnweb-get-hashtb url) - (push - (list - (incf (cdr active)) - (make-full-mail-header - (cdr active) (concat "(" group ") " subject) from date - id nil 0 0 url)) - map) - (nnweb-set-hashtb (cadar map) (car map)))) - ;; See if we want more. - (when (or (not nnweb-articles) - (>= i nnweb-max-hits) - (not (funcall (nnweb-definition 'search) - nnweb-search (incf part)))) - (setq more nil))) - ;; Return the articles in the right order. - (setq nnweb-articles - (sort (nconc nnweb-articles map) 'car-less-than-car))))))) - -(defun nnweb-altavista-wash-article () - (goto-char (point-min)) - (let ((case-fold-search t)) - (when (re-search-forward "^" nil t) - (delete-region (point-min) (match-beginning 0))) - (goto-char (point-min)) - (while (looking-at "\\([^ ]+\\) + +\\(.*\\)$") - (replace-match "\\1: \\2" t) - (forward-line 1)) - (when (re-search-backward "^References:" nil t) - (narrow-to-region (point) (progn (forward-line 1) (point))) - (goto-char (point-min)) - (while (re-search-forward "[0-9]+" nil t) - (replace-match "<\\1> " t))) - (widen) - (nnweb-remove-markup))) - -(defun nnweb-altavista-search (search &optional part) - (url-insert-file-contents - (concat - (nnweb-definition 'address) - "?" - (nnweb-encode-www-form-urlencoded - `(("pg" . "aq") - ("what" . "news") - ,@(when part `(("stq" . ,(int-to-string (* part 30))))) - ("fmt" . "d") - ("q" . ,search) - ("r" . "") - ("d0" . "") - ("d1" . ""))))) - (setq buffer-file-name nil) - t) - ;;; ;;; Deja bought by google.com ;;; @@ -713,7 +323,7 @@ and `altavista'.") (goto-char (point-min)) (while (search-forward "
" nil t) (replace-match "\n")) - (nnweb-remove-markup) + (mm-url-remove-markup) (goto-char (point-min)) (while (re-search-forward "^[ \t]*\n" nil t) (replace-match "")) @@ -723,7 +333,7 @@ and `altavista'.") (narrow-to-region (point) (point-max)) (search-forward "" nil t) (delete-region (point) (point-max)) - (nnweb-remove-markup) + (mm-url-remove-markup) (widen))) (defun nnweb-google-parse-1 (&optional Message-ID) @@ -731,22 +341,22 @@ and `altavista'.") (case-fold-search t) (active (cadr (assoc nnweb-group nnweb-group-alist))) Subject Score Date Newsgroups From - map url) + map url mid) (unless active - (push (list nnweb-group (setq active (cons 1 0)) + (push (list nnweb-group (setq active (cons 1 0)) nnweb-type nnweb-search) nnweb-group-alist)) ;; Go through all the article hits on this page. (goto-char (point-min)) (while (re-search-forward - "a href=/groups\\(\\?[^ \">]*selm=[^ \">]*\\)" nil t) - (setq url - (concat (nnweb-definition 'address) - (match-string 1))) + "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t) + (setq mid (match-string 2) + url (format + (nnweb-definition 'id) mid)) (narrow-to-region (search-forward ">" nil t) (search-forward "" nil t)) - (nnweb-remove-markup) - (nnweb-decode-entities) + (mm-url-remove-markup) + (mm-url-decode-entities) (setq Subject (buffer-string)) (goto-char (point-max)) (widen) @@ -757,16 +367,18 @@ and `altavista'.") (skip-chars-forward " \t") (narrow-to-region (point) (search-forward "" nil t)) - (nnweb-remove-markup) - (nnweb-decode-entities) + (mm-url-remove-markup) + (mm-url-decode-entities) (setq Newsgroups (buffer-string)) (goto-char (point-max)) (widen) (skip-chars-forward "- \t")) - (when (looking-at - "\\([0-9]+[/ ][A-Za-z]+[/ ][0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - ]+\\).*Next" nil t)) + (>= i nnweb-max-hits)) + (setq more nil) + ;; Yup, there are more articles + (setq more (concat (nnweb-definition 'base) (match-string 1))) + (when more + (erase-buffer) + (mm-url-insert more)))) ;; Return the articles in the right order. (setq nnweb-articles (sort nnweb-articles 'car-less-than-car)))))) (defun nnweb-google-search (search) - (nnweb-insert + (mm-url-insert (concat (nnweb-definition 'address) "?" - (nnweb-encode-www-form-urlencoded + (mm-url-encode-www-form-urlencoded `(("q" . ,search) - ("num". "100") + ("num" . "100") ("hq" . "") - ("hl" . "") + ("hl" . "en") ("lr" . "") ("safe" . "off") ("sites" . "groups"))))) @@ -825,7 +448,72 @@ and `altavista'.") (defun nnweb-google-identity (url) "Return an unique identifier based on URL." - (if (string-match "seld=\\([0-9]+\\)" url) + (if (string-match "selm=\\([^ &>]+\\)" url) + (match-string 1 url) + url)) + +;;; +;;; gmane.org +;;; +(defun nnweb-gmane-create-mapping () + "Perform the search and create a number-to-url alist." + (save-excursion + (set-buffer nnweb-buffer) + (erase-buffer) + (when (funcall (nnweb-definition 'search) nnweb-search) + (let ((more t) + (case-fold-search t) + (active (or (cadr (assoc nnweb-group nnweb-group-alist)) + (cons 1 0))) + subject group url + map) + ;; Remove stuff from the beginning of results + (goto-char (point-min)) + (search-forward "Search Results