X-Git-Url: http://cgit.sxemacs.org/?a=blobdiff_plain;f=lisp%2Fnnweb.el;h=321508c9adeeec1eb45fd1a7c57bd3e7c124f436;hb=deb6f0bca02ca58c55e75a9cb5965fdc869aec37;hp=a35794ff73085751c83dc33f6e7f5a27ec068870;hpb=34e6ef9901f0a79f6f723eb942731e154c0933b4;p=gnus diff --git a/lisp/nnweb.el b/lisp/nnweb.el index a35794ff7..321508c9a 100644 --- a/lisp/nnweb.el +++ b/lisp/nnweb.el @@ -1,7 +1,7 @@ ;;; nnweb.el --- retrieving articles via web search engines -;; Copyright (C) 1996 Free Software Foundation, Inc. +;; Copyright (C) 1996,97,98 Free Software Foundation, Inc. -;; Author: Lars Magne Ingebrigtsen +;; Author: Lars Magne Ingebrigtsen ;; Keywords: news ;; This file is part of GNU Emacs. @@ -28,14 +28,17 @@ ;;; Code: +(eval-when-compile (require 'cl)) + (require 'nnoo) (require 'message) (require 'gnus-util) +(require 'gnus) (require 'w3) (require 'url) -(condition-case () - (require 'w3-forms) - (error)) +(require 'nnmail) +(ignore-errors + (require 'w3-forms)) (nnoo-declare nnweb) @@ -45,29 +48,38 @@ (defvoo nnweb-type 'dejanews "What search engine type is being used.") -(defvar nnweb-type-definition +(defvoo nnweb-type-definition '((dejanews (article . nnweb-dejanews-wash-article) (map . nnweb-dejanews-create-mapping) (search . nnweb-dejanews-search) - (address . "http://search.dejanews.com/dnquery.xp")) + (address . "http://x8.dejanews.com/dnquery.xp") + (identifier . nnweb-dejanews-identity)) + (dejanewsold + (article . nnweb-dejanews-wash-article) + (map . nnweb-dejanews-create-mapping) + (search . nnweb-dejanewsold-search) + (address . "http://x8.dejanews.com/dnquery.xp") + (identifier . nnweb-dejanews-identity)) (reference (article . nnweb-reference-wash-article) (map . nnweb-reference-create-mapping) (search . nnweb-reference-search) - (address . "http://www.reference.com/cgi-bin/pn/go")) + (address . "http://www.reference.com/cgi-bin/pn/go") + (identifier . identity)) (altavista (article . nnweb-altavista-wash-article) (map . nnweb-altavista-create-mapping) (search . nnweb-altavista-search) (address . "http://www.altavista.digital.com/cgi-bin/query") - (id . "/cgi-bin/news?id@%s"))) + (id . "/cgi-bin/news?id@%s") + (identifier . identity))) "Type-definition alist.") (defvoo nnweb-search nil "Search string to feed to DejaNews.") -(defvoo nnweb-max-hits 100 +(defvoo nnweb-max-hits 999 "Maximum number of hits to display.") (defvoo nnweb-ephemeral-p nil @@ -98,6 +110,7 @@ (deffoo nnweb-request-scan (&optional group server) (nnweb-possibly-change-server group server) + (setq nnweb-hashtb (gnus-make-hashtable 4095)) (funcall (nnweb-definition 'map)) (unless nnweb-ephemeral-p (nnweb-write-active) @@ -105,7 +118,7 @@ (deffoo nnweb-request-group (group &optional server dont-check) (nnweb-possibly-change-server nil server) - (when (and group + (when (and group (not (equal group nnweb-group)) (not nnweb-ephemeral-p)) (let ((info (assoc group nnweb-group-alist))) @@ -140,10 +153,12 @@ (nnweb-possibly-change-server group server) (save-excursion (set-buffer (or buffer nntp-server-buffer)) - (let ((url (caddr (assq article nnweb-articles)))) + (let* ((header (cadr (assq article nnweb-articles))) + (url (and header (mail-header-xref header)))) (when (or (and url (nnweb-fetch-url url)) (and (stringp article) + (nnweb-definition 'id t) (let ((fetch (nnweb-definition 'id)) art) (when (string-match "^<\\(.*\\)>$" article) @@ -191,10 +206,10 @@ (deffoo nnweb-request-delete-group (group &optional force server) (nnweb-possibly-change-server group server) - (gnus-delete-assoc group nnweb-group-alist) + (gnus-pull group nnweb-group-alist) (gnus-delete-file (nnweb-overview-file group)) t) - + (nnoo-define-skeleton nnweb) ;;; Internal functions @@ -203,16 +218,14 @@ "Read the overview of GROUP and build the map." (when (file-exists-p (nnweb-overview-file group)) (nnheader-temp-write nil - (insert-file-contents (nnweb-overview-file group)) + (nnheader-insert-file-contents (nnweb-overview-file group)) (goto-char (point-min)) - (setq nnweb-hashtb (gnus-make-hashtable - (count-lines (point-min) (point-max)))) (let (header) (while (not (eobp)) (setq header (nnheader-parse-nov)) (forward-line 1) (push (list (mail-header-number header) - header (nnheader-header-xref header)) + header (mail-header-xref header)) nnweb-articles) (nnweb-set-hashtb header (car nnweb-articles))))))) @@ -224,14 +237,14 @@ (nnheader-insert-nov (cadr (pop articles))))))) (defun nnweb-set-hashtb (header data) - (gnus-sethastb (nnweb-identifier (mail-header-xrefs header)) - data nnweb-hashtb)) + (gnus-sethash (nnweb-identifier (mail-header-xref header)) + data nnweb-hashtb)) (defun nnweb-get-hashtb (url) (gnus-gethash (nnweb-identifier url) nnweb-hashtb)) (defun nnweb-identifier (ident) - ident) + (funcall (nnweb-definition 'identifier) ident)) (defun nnweb-overview-file (group) "Return the name of the overview file of GROUP." @@ -245,11 +258,12 @@ (defun nnweb-read-active () "Read the active file." (load (nnheader-concat nnweb-directory "active") t t t)) - -(defun nnweb-definition (type) + +(defun nnweb-definition (type &optional noerror) "Return the definition of TYPE." (let ((def (cdr (assq type (assq nnweb-type nnweb-type-definition))))) - (unless def + (when (and (not def) + (not noerror)) (error "Undefined definition %s" type)) def)) @@ -258,6 +272,8 @@ (when server (unless (nnweb-server-opened server) (nnweb-open-server server))) + (unless nnweb-group-alist + (nnweb-read-active)) (when group (when (and (not nnweb-ephemeral-p) (not (equal group nnweb-group))) @@ -278,9 +294,9 @@ (save-excursion (set-buffer nnweb-buffer) (erase-buffer) - (prog1 - (url-insert-file-contents url) - (copy-to-buffer buf (point-min) (point-max))))) + (url-insert-file-contents url) + (copy-to-buffer buf (point-min) (point-max)) + t)) (nnweb-url-retrieve-asynch url 'nnweb-callback (current-buffer) nnheader-callback-function) t))) @@ -314,7 +330,7 @@ (defun nnweb-encode-www-form-urlencoded (pairs) "Return PAIRS encoded for forms." - (mapconcat + (mapconcat (function (lambda (data) (concat (w3-form-encode-xwfu (car data)) "=" @@ -323,8 +339,8 @@ (defun nnweb-fetch-form (url pairs) (let ((url-request-data (nnweb-encode-www-form-urlencoded pairs)) - (url-request-method 'POST) - (url-request-extra-headers + (url-request-method "POST") + (url-request-extra-headers '(("Content-type" . "application/x-www-form-urlencoded")))) (url-insert-file-contents url) (setq buffer-file-name nil)) @@ -334,7 +350,7 @@ (goto-char (point-min)) (while (re-search-forward "&\\([a-z]+\\);" nil t) (replace-match (char-to-string (or (cdr (assq (intern (match-string 1)) - w3-html-entities )) + w3-html-entities)) ?#)) t t))) @@ -363,50 +379,53 @@ (case-fold-search t) (active (or (cadr (assoc nnweb-group nnweb-group-alist)) (cons 1 0))) - Subject Score Date Newsgroup Author + Subject (Score "0") Date Newsgroup Author map url) (while more ;; Go through all the article hits on this page. (goto-char (point-min)) (nnweb-decode-entities) (goto-char (point-min)) - (while (re-search-forward "^ +[0-9]+\\." nil t) - (narrow-to-region + (while (re-search-forward "^

\n" nil t) + (narrow-to-region (point) - (cond ((re-search-forward "^ +[0-9]+\\." nil t) + (cond ((re-search-forward "^

\n" nil t) (match-beginning 0)) ((search-forward "\n\n" nil t) (point)) (t (point-max)))) (goto-char (point-min)) - (when (looking-at ".*HREF=\"\\([^\"]+\\)\"") - (setq url (match-string 1))) - (nnweb-remove-markup) - (goto-char (point-min)) - (while (search-forward "\t" nil t) - (replace-match " ")) - (goto-char (point-min)) - (while (re-search-forward "^ +\\([^:]+\\): +\\(.*\\)$" nil t) - (set (intern (match-string 1)) (match-string 2))) + (looking-at ".*HREF=\"\\([^\"]+\\)\"\\(.*\\)") + (setq url (match-string 1)) + (let ((begin (point))) + (nnweb-remove-markup) + (goto-char begin) + (while (search-forward "\t" nil t) + (replace-match " ")) + (goto-char begin) + (end-of-line) + (setq Subject (buffer-substring begin (point))) + (if (re-search-forward + "^ Newsgroup: \\(.*\\)\n Posted on \\([0-9/]+\\) by \\(.*\\)$" nil t) + (setq Newsgroup (match-string 1) + Date (match-string 2) + Author (match-string 3)))) (widen) - (when (string-match "#[0-9]+/[0-9]+ *$" Subject) - (setq Subject (substring Subject 0 (match-beginning 0)))) + (incf i) (unless (nnweb-get-hashtb url) - (incf i) (push (list (incf (cdr active)) (make-full-mail-header - (cdr active) (concat "(" Newsgroup ") " Subject) Author Date - (concat "<" (message-unique-id) "-" (int-to-string i) - "@dejanews>") - nil 0 (string-to-int Score) nil) - url) - map))) + (cdr active) Subject Author Date + (concat "<" (nnweb-identifier url) "@dejanews>") + nil 0 (string-to-int Score) url)) + map) + (nnweb-set-hashtb (cadar map) (car map)))) ;; See whether there is a "Get next 20 hits" button here. (if (or (not (re-search-forward - "HREF=\"\\([^\"]+\\)\">Get next" nil t)) + "HREF=\"\\([^\"]+\\)\"[<>b]+Next result" nil t)) (>= i nnweb-max-hits)) (setq more nil) ;; Yup -- fetch it. @@ -415,7 +434,7 @@ (url-insert-file-contents more))) ;; Return the articles in the right order. (setq nnweb-articles - (sort map (lambda (s1 s2) (< (car s1) (car s2))))))))) + (sort (nconc nnweb-articles map) 'car-less-than-car)))))) (defun nnweb-dejanews-wash-article () (let ((case-fold-search t)) @@ -433,22 +452,45 @@ (replace-match "\\1 " t) (forward-line 1)) (when (re-search-forward "\n\n+" nil t) - (replace-match "\n" t t)))) + (replace-match "\n" t t)) + (goto-char (point-min)) + (when (search-forward "[More Headers]" nil t) + (replace-match "" t t)))) (defun nnweb-dejanews-search (search) - (nnweb-fetch-form + (nnweb-fetch-form (nnweb-definition 'address) `(("query" . ,search) ("defaultOp" . "AND") ("svcclass" . "dncurrent") ("maxhits" . "100") - ("format" . "verbose") + ("format" . "verbose2") ("threaded" . "0") - ("showsort" . "score") + ("showsort" . "date") ("agesign" . "1") ("ageweight" . "1"))) t) +(defun nnweb-dejanewsold-search (search) + (nnweb-fetch-form + (nnweb-definition 'address) + `(("query" . ,search) + ("defaultOp" . "AND") + ("svcclass" . "dnold") + ("maxhits" . "100") + ("format" . "verbose2") + ("threaded" . "0") + ("showsort" . "date") + ("agesign" . "1") + ("ageweight" . "1"))) + t) + +(defun nnweb-dejanews-identity (url) + "Return an unique identifier based on URL." + (if (string-match "recnum=\\([0-9]+\\)" url) + (match-string 1 url) + url)) + ;;; ;;; InReference ;;; @@ -474,7 +516,7 @@ ;(nnweb-decode-entities) (goto-char (point-min)) (while (re-search-forward "^ +[0-9]+\\." nil t) - (narrow-to-region + (narrow-to-region (point) (if (re-search-forward "^$" nil t) (match-beginning 0) @@ -491,21 +533,21 @@ (set (intern (match-string 1)) (match-string 2))) (widen) (search-forward "" nil t) + (incf i) (unless (nnweb-get-hashtb url) - (incf i) (push (list (incf (cdr active)) (make-full-mail-header (cdr active) (concat "(" Newsgroups ") " Subject) From Date Message-ID - nil 0 (string-to-int Score) nil) - url) - map))) + nil 0 (string-to-int Score) url)) + map) + (nnweb-set-hashtb (cadar map) (car map)))) (setq more nil)) ;; Return the articles in the right order. (setq nnweb-articles - (sort map (lambda (s1 s2) (< (car s1) (car s2))))))))) + (sort (nconc nnweb-articles map) 'car-less-than-car)))))) (defun nnweb-reference-wash-article () (let ((case-fold-search t)) @@ -547,35 +589,34 @@ (set-marker body nil)))) (defun nnweb-reference-search (search) - (prog1 - (url-insert-file-contents - (concat - (nnweb-definition 'address) - "?" - (nnweb-encode-www-form-urlencoded - `(("search" . "advanced") - ("querytext" . ,search) - ("subj" . "") - ("name" . "") - ("login" . "") - ("host" . "") - ("organization" . "") - ("groups" . "") - ("keywords" . "") - ("choice" . "Search") - ("startmonth" . "Jul") - ("startday" . "25") - ("startyear" . "1996") - ("endmonth" . "Aug") - ("endday" . "24") - ("endyear" . "1996") - ("mode" . "Quick") - ("verbosity" . "Verbose") - ("ranking" . "Relevance") - ("first" . "1") - ("last" . "25") - ("score" . "50"))))) - (setq buffer-file-name nil)) + (url-insert-file-contents + (concat + (nnweb-definition 'address) + "?" + (nnweb-encode-www-form-urlencoded + `(("search" . "advanced") + ("querytext" . ,search) + ("subj" . "") + ("name" . "") + ("login" . "") + ("host" . "") + ("organization" . "") + ("groups" . "") + ("keywords" . "") + ("choice" . "Search") + ("startmonth" . "Jul") + ("startday" . "25") + ("startyear" . "1996") + ("endmonth" . "Aug") + ("endday" . "24") + ("endyear" . "1996") + ("mode" . "Quick") + ("verbosity" . "Verbose") + ("ranking" . "Relevance") + ("first" . "1") + ("last" . "25") + ("score" . "50"))))) + (setq buffer-file-name nil) t) ;;; @@ -614,16 +655,16 @@ group (match-string 4) id (concat "<" (match-string 5) ">") from (match-string 6)) + (incf i) (unless (nnweb-get-hashtb url) - (incf i) (push (list (incf (cdr active)) (make-full-mail-header (cdr active) (concat "(" group ") " subject) from date - id nil 0 0 nil) - url) - map))) + id nil 0 0 url)) + map) + (nnweb-set-hashtb (cadar map) (car map)))) ;; See if we want more. (when (or (not nnweb-articles) (>= i nnweb-max-hits) @@ -632,7 +673,7 @@ (setq more nil))) ;; Return the articles in the right order. (setq nnweb-articles - (sort map (lambda (s1 s2) (< (car s1) (car s2)))))))))) + (sort (nconc nnweb-articles map) 'car-less-than-car))))))) (defun nnweb-altavista-wash-article () (goto-char (point-min)) @@ -652,21 +693,21 @@ (nnweb-remove-markup))) (defun nnweb-altavista-search (search &optional part) - (prog1 - (url-insert-file-contents - (concat - (nnweb-definition 'address) - "?" - (nnweb-encode-www-form-urlencoded - `(("pg" . "aq") - ("what" . "news") - ,@(when part `(("stq" . ,(int-to-string (* part 30))))) - ("fmt" . "d") - ("q" . ,search) - ("r" . "") - ("d0" . "") - ("d1" . ""))))) - (setq buffer-file-name nil))) + (url-insert-file-contents + (concat + (nnweb-definition 'address) + "?" + (nnweb-encode-www-form-urlencoded + `(("pg" . "aq") + ("what" . "news") + ,@(when part `(("stq" . ,(int-to-string (* part 30))))) + ("fmt" . "d") + ("q" . ,search) + ("r" . "") + ("d0" . "") + ("d1" . ""))))) + (setq buffer-file-name nil) + t) (provide 'nnweb)