X-Git-Url: http://cgit.sxemacs.org/?a=blobdiff_plain;f=lisp%2Fnnweb.el;h=fceb2a387aaa9e5ce4904343fe1832a0e4db535a;hb=707f51a37c63cc5df71b0c829d4648b0742ee62c;hp=d5f7f13b5fba351d47f5324863af11553ae8e033;hpb=48df4c0efd45903b82f6a651215357a8095fc17e;p=gnus diff --git a/lisp/nnweb.el b/lisp/nnweb.el index d5f7f13b5..fceb2a387 100644 --- a/lisp/nnweb.el +++ b/lisp/nnweb.el @@ -1,16 +1,17 @@ ;;; nnweb.el --- retrieving articles via web search engines -;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004 -;; Free Software Foundation, Inc. + +;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, +;; 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen ;; Keywords: news ;; This file is part of GNU Emacs. -;; GNU Emacs is free software; you can redistribute it and/or modify +;; GNU Emacs is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) -;; any later version. +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. ;; GNU Emacs is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -18,9 +19,7 @@ ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License -;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. +;; along with GNU Emacs. If not, see . ;;; Commentary: @@ -53,17 +52,19 @@ Valid types include `google', `dejanews', and `gmane'.") (defvar nnweb-type-definition '((google - (article . ignore) - (id . "http://groups.google.de/groups?selm=%s&output=gplain") + (id . "http://www.google.com/groups?as_umsgid=%s&hl=en&dmode=source") + (result . "http://groups.google.com/group/%s/msg/%s?dmode=source") + (article . nnweb-google-wash-article) (reference . identity) (map . nnweb-google-create-mapping) (search . nnweb-google-search) - (address . "http://groups.google.de/groups") - (base . "http://groups.google.de") + (address . "http://groups.google.com/groups") + (base . "http://groups.google.com") (identifier . nnweb-google-identity)) (dejanews ;; alias of google - (article . ignore) - (id . "http://groups.google.com/groups?selm=%s&output=gplain") + (id . "http://www.google.com/groups?as_umsgid=%s&hl=en&dmode=source") + (result . "http://groups.google.com/group/%s/msg/%s?dmode=source") + (article . nnweb-google-wash-article) (reference . identity) (map . nnweb-google-create-mapping) (search . nnweb-google-search) @@ -76,7 +77,7 @@ Valid types include `google', `dejanews', and `gmane'.") (reference . identity) (map . nnweb-gmane-create-mapping) (search . nnweb-gmane-search) - (address . "http://gmane.org/") + (address . "http://search.gmane.org/nov.php") (identifier . nnweb-gmane-identity))) "Type-definition alist.") @@ -103,8 +104,7 @@ Valid types include `google', `dejanews', and `gmane'.") (deffoo nnweb-retrieve-headers (articles &optional group server fetch-old) (nnweb-possibly-change-server group server) - (save-excursion - (set-buffer nntp-server-buffer) + (with-current-buffer nntp-server-buffer (erase-buffer) (let (article header) (mm-with-unibyte-current-buffer @@ -116,25 +116,20 @@ Valid types include `google', `dejanews', and `gmane'.") (deffoo nnweb-request-scan (&optional group server) (nnweb-possibly-change-server group server) (if nnweb-ephemeral-p - (setq nnweb-hashtb (gnus-make-hashtable 4095))) + (setq nnweb-hashtb (gnus-make-hashtable 4095)) + (unless nnweb-articles + (nnweb-read-overview group))) (funcall (nnweb-definition 'map)) (unless nnweb-ephemeral-p (nnweb-write-active) (nnweb-write-overview group))) -(deffoo nnweb-request-group (group &optional server dont-check) - (nnweb-possibly-change-server nil server) - (when (and group - (not (equal group nnweb-group)) - (not nnweb-ephemeral-p)) - (setq nnweb-group group - nnweb-articles nil) - (let ((info (assoc group nnweb-group-alist))) - (when info - (setq nnweb-type (nth 2 info)) - (setq nnweb-search (nth 3 info)) - (unless dont-check - (nnweb-read-overview group))))) +(deffoo nnweb-request-group (group &optional server dont-check info) + (nnweb-possibly-change-server group server) + (unless (or nnweb-ephemeral-p + dont-check + nnweb-articles) + (nnweb-read-overview group)) (cond ((not nnweb-articles) (nnheader-report 'nnweb "No matching articles")) @@ -151,16 +146,14 @@ Valid types include `google', `dejanews', and `gmane'.") (deffoo nnweb-close-group (group &optional server) (nnweb-possibly-change-server group server) (when (gnus-buffer-live-p nnweb-buffer) - (save-excursion - (set-buffer nnweb-buffer) + (with-current-buffer nnweb-buffer (set-buffer-modified-p nil) (kill-buffer nnweb-buffer))) t) (deffoo nnweb-request-article (article &optional group server buffer) (nnweb-possibly-change-server group server) - (save-excursion - (set-buffer (or buffer nntp-server-buffer)) + (with-current-buffer (or buffer nntp-server-buffer) (let* ((header (cadr (assq article nnweb-articles))) (url (and header (mail-header-xref header)))) (when (or (and url @@ -173,7 +166,8 @@ Valid types include `google', `dejanews', and `gmane'.") (when (string-match "^<\\(.*\\)>$" article) (setq art (match-string 1 article))) (when (and fetch art) - (setq url (format fetch art)) + (setq url (format fetch + (mm-url-form-encode-xwfu art))) (mm-with-unibyte-current-buffer (mm-url-insert url)) (if (nnweb-definition 'reference t) @@ -188,17 +182,15 @@ Valid types include `google', `dejanews', and `gmane'.") (deffoo nnweb-close-server (&optional server) (when (and (nnweb-server-opened server) (gnus-buffer-live-p nnweb-buffer)) - (save-excursion - (set-buffer nnweb-buffer) + (with-current-buffer nnweb-buffer (set-buffer-modified-p nil) (kill-buffer nnweb-buffer))) (nnoo-close-server 'nnweb server)) (deffoo nnweb-request-list (&optional server) (nnweb-possibly-change-server nil server) - (save-excursion - (set-buffer nntp-server-buffer) - (nnmail-generate-active nnweb-group-alist) + (with-current-buffer nntp-server-buffer + (nnmail-generate-active (list (assoc server nnweb-group-alist))) t)) (deffoo nnweb-request-update-info (group info &optional server) @@ -210,7 +202,7 @@ Valid types include `google', `dejanews', and `gmane'.") (deffoo nnweb-request-create-group (group &optional server args) (nnweb-possibly-change-server nil server) (nnweb-request-delete-group group) - (push `(,group ,(cons 1 0) ,@args) nnweb-group-alist) + (push `(,group ,(cons 1 0)) nnweb-group-alist) (nnweb-write-active) t) @@ -280,78 +272,79 @@ Valid types include `google', `dejanews', and `gmane'.") def)) (defun nnweb-possibly-change-server (&optional group server) - (nnweb-init server) (when server (unless (nnweb-server-opened server) - (nnweb-open-server server))) + (nnweb-open-server server)) + (nnweb-init server)) (unless nnweb-group-alist (nnweb-read-active)) (unless nnweb-hashtb (setq nnweb-hashtb (gnus-make-hashtable 4095))) (when group - (when (and (not nnweb-ephemeral-p) - (equal group nnweb-group)) - (nnweb-request-group group nil t)))) + (setq nnweb-group group))) (defun nnweb-init (server) "Initialize buffers and such." (unless (gnus-buffer-live-p nnweb-buffer) (setq nnweb-buffer - (save-excursion - (mm-with-unibyte - (nnheader-set-temp-buffer - (format " *nnweb %s %s %s*" - nnweb-type nnweb-search server)) - (current-buffer)))))) + (save-current-buffer + (nnheader-set-temp-buffer + (format " *nnweb %s %s %s*" + nnweb-type nnweb-search server)) + (mm-disable-multibyte) + (current-buffer))))) ;;; -;;; Deja bought by google.com +;;; groups.google.com ;;; (defun nnweb-google-wash-article () - (let ((case-fold-search t) url) + ;; We have Google's masked e-mail addresses here. :-/ + (let ((case-fold-search t) + (start-re "
[\r\n ]*")
+	(end-re "[\r\n ]*
")) (goto-char (point-min)) - (re-search-forward "^
" nil t)
-    (narrow-to-region (point-min) (point))
-    (search-backward "" nil t)
-      (replace-match "\n"))
-    (mm-url-remove-markup)
-    (goto-char (point-min))
-    (while (re-search-forward "^[ \t]*\n" nil t)
-      (replace-match ""))
-    (goto-char (point-max))
-    (insert "\n")
-    (widen)
-    (narrow-to-region (point) (point-max))
-    (search-forward "" nil t)
-    (delete-region (point) (point-max))
-    (mm-url-remove-markup)
-    (widen)))
+    (if (save-excursion
+	  (or (re-search-forward "The requested message.*could not be found."
+				 nil t)
+	      (not (and (re-search-forward start-re nil t)
+			(re-search-forward end-re nil t)))))
+	;; FIXME: Don't know how to indicate "not found".
+	;; Should this function throw an error?  --rsteib
+	(progn
+	  (gnus-message 3 "Requested article not found")
+	  (erase-buffer))
+      (delete-region (point-min)
+		     (re-search-forward start-re))
+      (goto-char (point-min))
+      (delete-region (progn
+		       (re-search-forward end-re)
+		       (match-beginning 0))
+		     (point-max))
+      (mm-url-decode-entities))))
 
 (defun nnweb-google-parse-1 (&optional Message-ID)
+  "Parse search result in current buffer."
   (let ((i 0)
 	(case-fold-search t)
 	(active (cadr (assoc nnweb-group nnweb-group-alist)))
 	Subject Score Date Newsgroups From
 	map url mid)
     (unless active
-      (push (list nnweb-group (setq active (cons 1 0))
-		  nnweb-type nnweb-search)
+      (push (list nnweb-group (setq active (cons 1 0)))
 	    nnweb-group-alist))
     ;; Go through all the article hits on this page.
     (goto-char (point-min))
-    (while (re-search-forward
-	    "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t)
-      (setq mid (match-string 2)
+    (while
+	(re-search-forward
+	 "a +href=\"/group/\\([^>\"]+\\)/browse_thread/[^>]+#\\([0-9a-f]+\\)"
+	 nil t)
+      (setq Newsgroups (match-string-no-properties 1)
+	    ;; Note: Starting with Google Groups 2, `mid' is a Google-internal
+	    ;; ID, not a proper Message-ID.
+	    mid (match-string-no-properties 2)
 	    url (format
-		 (nnweb-definition 'id) mid))
+		 (nnweb-definition 'result) Newsgroups mid))
       (narrow-to-region (search-forward ">" nil t)
 			(search-forward "" nil t))
       (mm-url-remove-markup)
@@ -359,26 +352,25 @@ Valid types include `google', `dejanews', and `gmane'.")
       (setq Subject (buffer-string))
       (goto-char (point-max))
       (widen)
-      (forward-line 2)
-      (when (looking-at "
]+>") - (goto-char (match-end 0))) - (if (not (looking-at "]+>")) - (skip-chars-forward " \t") - (narrow-to-region (point) - (search-forward "" nil t)) - (mm-url-remove-markup) - (mm-url-decode-entities) - (setq Newsgroups (buffer-string)) - (goto-char (point-max)) - (widen) - (skip-chars-forward "- \t")) - (when (looking-at - "\\([0-9]+\\)[/ ]\\([A-Za-z]+\\)[/ ]\\([0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - ]+\\).*Next" nil t)) + "]+href=\"\n?\\([^>\" \n\t]+\\)[^<]*]+src=[^>]+next" + nil t)) (>= i nnweb-max-hits)) (setq more nil) ;; Yup, there are more articles (setq more (concat (nnweb-definition 'base) (match-string 1))) (when more (erase-buffer) + (nnheader-message 7 "Searching google...(%d)" i) (mm-url-insert more)))) ;; Return the articles in the right order. + (nnheader-message 7 "Searching google...done") (setq nnweb-articles (sort nnweb-articles 'car-less-than-car)))))) @@ -437,12 +432,14 @@ Valid types include `google', `dejanews', and `gmane'.") "?" (mm-url-encode-www-form-urlencoded `(("q" . ,search) - ("num" . "100") + ("num" . ,(number-to-string + (min 100 nnweb-max-hits))) ("hq" . "") ("hl" . "en") ("lr" . "") ("safe" . "off") - ("sites" . "groups"))))) + ("sites" . "groups") + ("filter" . "0"))))) t) (defun nnweb-google-identity (url) @@ -456,48 +453,59 @@ Valid types include `google', `dejanews', and `gmane'.") ;;; (defun nnweb-gmane-create-mapping () "Perform the search and create a number-to-url alist." - (save-excursion - (set-buffer nnweb-buffer) - (erase-buffer) - (when (funcall (nnweb-definition 'search) nnweb-search) - (let ((more t) - (case-fold-search t) - (active (or (cadr (assoc nnweb-group nnweb-group-alist)) - (cons 1 0))) - subject group url - map) - ;; Remove stuff from the beginning of results + (with-current-buffer nnweb-buffer + (let ((case-fold-search t) + (active (or (cadr (assoc nnweb-group nnweb-group-alist)) + (cons 1 0))) + map) + (erase-buffer) + (nnheader-message 7 "Searching Gmane..." ) + (when (funcall (nnweb-definition 'search) nnweb-search) (goto-char (point-min)) - (search-forward "Search Results
    " nil t) - (delete-region (point-min) (point)) - (goto-char (point-min)) - ;; Iterate over the actual hits - (while (re-search-forward ".*href=\"\\([^\"]+\\)\">\\(.*\\)" nil t) - (setq url (concat "http://gmane.org/" (match-string 1))) - (setq subject (match-string 2)) - (unless (nnweb-get-hashtb url) - (push - (list - (incf (cdr active)) - (make-full-mail-header - (cdr active) (concat "(" group ") " subject) nil nil - nil nil 0 0 url)) - map) - (nnweb-set-hashtb (cadar map) (car map)))) - ;; Return the articles in the right order. - (setq nnweb-articles - (sort (nconc nnweb-articles map) 'car-less-than-car)))))) + ;; Skip the status line + (forward-line 1) + ;; Thanks to Olly Betts we now have NOV lines in our buffer! + (while (not (eobp)) + (unless (or (eolp) (looking-at "\x0d")) + (let ((header (nnheader-parse-nov))) + (let ((xref (mail-header-xref header)) + (from (mail-header-from header)) + (subject (mail-header-subject header)) + (rfc2047-encoding-type 'mime)) + (when (string-match " \\([^:]+\\)[:/]\\([0-9]+\\)" xref) + (mail-header-set-xref + header + (format "http://article.gmane.org/%s/%s/raw" + (match-string 1 xref) + (match-string 2 xref)))) + + ;; Add host part to gmane-encrypted addresses + (when (string-match "@$" from) + (mail-header-set-from header + (concat from "public.gmane.org"))) + + (mail-header-set-subject header + (rfc2047-encode-string subject)) + + (unless (nnweb-get-hashtb (mail-header-xref header)) + (mail-header-set-number header (incf (cdr active))) + (push (list (mail-header-number header) header) map) + (nnweb-set-hashtb (cadar map) (car map)))))) + (forward-line 1))) + (nnheader-message 7 "Searching Gmane...done") + (setq nnweb-articles + (sort (nconc nnweb-articles map) 'car-less-than-car))))) (defun nnweb-gmane-wash-article () (let ((case-fold-search t)) (goto-char (point-min)) - (search-forward "" nil t) - (delete-region (point-min) (point)) - (goto-char (point-min)) - (while (looking-at "^
  • \\([^ ]+\\).*
  • ") - (replace-match "\\1\\2" t) - (forward-line 1)) - (mm-url-remove-markup))) + (when (search-forward "" nil t) + (delete-region (point-min) (point)) + (goto-char (point-min)) + (while (looking-at "^
  • \\([^ ]+\\).*
  • ") + (replace-match "\\1\\2" t) + (forward-line 1)) + (mm-url-remove-markup)))) (defun nnweb-gmane-search (search) (mm-url-insert @@ -505,11 +513,15 @@ Valid types include `google', `dejanews', and `gmane'.") (nnweb-definition 'address) "?" (mm-url-encode-www-form-urlencoded - `(("query" . ,search))))) + `(("query" . ,search) + ("HITSPERPAGE" . ,(number-to-string nnweb-max-hits)) + ;;("TOPDOC" . "1000") + )))) (setq buffer-file-name nil) + (unless (featurep 'xemacs) (set-buffer-multibyte t)) + (mm-decode-coding-region (point-min) (point-max) 'utf-8) t) - (defun nnweb-gmane-identity (url) "Return a unique identifier based on URL." (if (string-match "group=\\(.+\\)" url) @@ -523,7 +535,11 @@ Valid types include `google', `dejanews', and `gmane'.") (defun nnweb-insert-html (parse) "Insert HTML based on a w3 parse tree." (if (stringp parse) - (insert (nnheader-string-as-multibyte parse)) + ;; We used to call nnheader-string-as-multibyte here, but it cannot + ;; be right, so I removed it. If a bug shows up because of this change, + ;; please do not blindly revert the change, but help me find the real + ;; cause of the bug instead. --Stef + (insert parse) (insert "<" (symbol-name (car parse)) " ") (insert (mapconcat (lambda (param) @@ -589,5 +605,4 @@ Valid types include `google', `dejanews', and `gmane'.") (provide 'nnweb) -;;; arch-tag: f59307eb-c90f-479f-b7d2-dbd8bf51b697 ;;; nnweb.el ends here