X-Git-Url: https://cgit.sxemacs.org/?p=gnus;a=blobdiff_plain;f=lisp%2Fnnweb.el;h=1cfa7a4cbc30544135451c5ad180000f97b06892;hp=e3a9625a07d3ab58f3e7f529a2c4eb34db61b8ae;hb=51d18a93d423dbdbd46dd7757605dbfa05d92976;hpb=46ce76377c25b05eda270f7349f97db032e53e4a diff --git a/lisp/nnweb.el b/lisp/nnweb.el index e3a9625a0..1cfa7a4cb 100644 --- a/lisp/nnweb.el +++ b/lisp/nnweb.el @@ -1,16 +1,17 @@ ;;; nnweb.el --- retrieving articles via web search engines -;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002 -;; Free Software Foundation, Inc. + +;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, +;; 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen ;; Keywords: news ;; This file is part of GNU Emacs. -;; GNU Emacs is free software; you can redistribute it and/or modify +;; GNU Emacs is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) -;; any later version. +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. ;; GNU Emacs is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -18,9 +19,7 @@ ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License -;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. +;; along with GNU Emacs. If not, see . ;;; Commentary: @@ -53,20 +52,24 @@ Valid types include `google', `dejanews', and `gmane'.") (defvar nnweb-type-definition '((google - (article . ignore) - (id . "http://groups.google.com/groups?selm=%s&output=gplain") + (id . "http://www.google.com/groups?as_umsgid=%s&hl=en&dmode=source") + (result . "http://groups.google.com/group/%s/msg/%s?dmode=source") + (article . nnweb-google-wash-article) (reference . identity) (map . nnweb-google-create-mapping) (search . nnweb-google-search) (address . "http://groups.google.com/groups") + (base . "http://groups.google.com") (identifier . nnweb-google-identity)) (dejanews ;; alias of google - (article . ignore) - (id . "http://groups.google.com/groups?selm=%s&output=gplain") + (id . "http://www.google.com/groups?as_umsgid=%s&hl=en&dmode=source") + (result . "http://groups.google.com/group/%s/msg/%s?dmode=source") + (article . nnweb-google-wash-article) (reference . identity) (map . nnweb-google-create-mapping) (search . nnweb-google-search) (address . "http://groups.google.com/groups") + (base . "http://groups.google.com") (identifier . nnweb-google-identity)) (gmane (article . nnweb-gmane-wash-article) @@ -74,7 +77,7 @@ Valid types include `google', `dejanews', and `gmane'.") (reference . identity) (map . nnweb-gmane-create-mapping) (search . nnweb-gmane-search) - (address . "http://gmane.org/") + (address . "http://search.gmane.org/nov.php") (identifier . nnweb-gmane-identity))) "Type-definition alist.") @@ -101,8 +104,7 @@ Valid types include `google', `dejanews', and `gmane'.") (deffoo nnweb-retrieve-headers (articles &optional group server fetch-old) (nnweb-possibly-change-server group server) - (save-excursion - (set-buffer nntp-server-buffer) + (with-current-buffer nntp-server-buffer (erase-buffer) (let (article header) (mm-with-unibyte-current-buffer @@ -114,25 +116,20 @@ Valid types include `google', `dejanews', and `gmane'.") (deffoo nnweb-request-scan (&optional group server) (nnweb-possibly-change-server group server) (if nnweb-ephemeral-p - (setq nnweb-hashtb (gnus-make-hashtable 4095))) + (setq nnweb-hashtb (gnus-make-hashtable 4095)) + (unless nnweb-articles + (nnweb-read-overview group))) (funcall (nnweb-definition 'map)) (unless nnweb-ephemeral-p (nnweb-write-active) (nnweb-write-overview group))) -(deffoo nnweb-request-group (group &optional server dont-check) - (nnweb-possibly-change-server nil server) - (when (and group - (not (equal group nnweb-group)) - (not nnweb-ephemeral-p)) - (setq nnweb-group group - nnweb-articles nil) - (let ((info (assoc group nnweb-group-alist))) - (when info - (setq nnweb-type (nth 2 info)) - (setq nnweb-search (nth 3 info)) - (unless dont-check - (nnweb-read-overview group))))) +(deffoo nnweb-request-group (group &optional server dont-check info) + (nnweb-possibly-change-server group server) + (unless (or nnweb-ephemeral-p + dont-check + nnweb-articles) + (nnweb-read-overview group)) (cond ((not nnweb-articles) (nnheader-report 'nnweb "No matching articles")) @@ -149,16 +146,14 @@ Valid types include `google', `dejanews', and `gmane'.") (deffoo nnweb-close-group (group &optional server) (nnweb-possibly-change-server group server) (when (gnus-buffer-live-p nnweb-buffer) - (save-excursion - (set-buffer nnweb-buffer) + (with-current-buffer nnweb-buffer (set-buffer-modified-p nil) (kill-buffer nnweb-buffer))) t) (deffoo nnweb-request-article (article &optional group server buffer) (nnweb-possibly-change-server group server) - (save-excursion - (set-buffer (or buffer nntp-server-buffer)) + (with-current-buffer (or buffer nntp-server-buffer) (let* ((header (cadr (assq article nnweb-articles))) (url (and header (mail-header-xref header)))) (when (or (and url @@ -171,7 +166,8 @@ Valid types include `google', `dejanews', and `gmane'.") (when (string-match "^<\\(.*\\)>$" article) (setq art (match-string 1 article))) (when (and fetch art) - (setq url (format fetch art)) + (setq url (format fetch + (mm-url-form-encode-xwfu art))) (mm-with-unibyte-current-buffer (mm-url-insert url)) (if (nnweb-definition 'reference t) @@ -186,21 +182,18 @@ Valid types include `google', `dejanews', and `gmane'.") (deffoo nnweb-close-server (&optional server) (when (and (nnweb-server-opened server) (gnus-buffer-live-p nnweb-buffer)) - (save-excursion - (set-buffer nnweb-buffer) + (with-current-buffer nnweb-buffer (set-buffer-modified-p nil) (kill-buffer nnweb-buffer))) (nnoo-close-server 'nnweb server)) (deffoo nnweb-request-list (&optional server) (nnweb-possibly-change-server nil server) - (save-excursion - (set-buffer nntp-server-buffer) - (nnmail-generate-active nnweb-group-alist) + (with-current-buffer nntp-server-buffer + (nnmail-generate-active (list (assoc server nnweb-group-alist))) t)) -(deffoo nnweb-request-update-info (group info &optional server) - (nnweb-possibly-change-server group server)) +(deffoo nnweb-request-update-info (group info &optional server)) (deffoo nnweb-asynchronous-p () nil) @@ -208,7 +201,7 @@ Valid types include `google', `dejanews', and `gmane'.") (deffoo nnweb-request-create-group (group &optional server args) (nnweb-possibly-change-server nil server) (nnweb-request-delete-group group) - (push `(,group ,(cons 1 0) ,@args) nnweb-group-alist) + (push `(,group ,(cons 1 0)) nnweb-group-alist) (nnweb-write-active) t) @@ -278,78 +271,79 @@ Valid types include `google', `dejanews', and `gmane'.") def)) (defun nnweb-possibly-change-server (&optional group server) - (nnweb-init server) (when server (unless (nnweb-server-opened server) - (nnweb-open-server server))) + (nnweb-open-server server)) + (nnweb-init server)) (unless nnweb-group-alist (nnweb-read-active)) (unless nnweb-hashtb (setq nnweb-hashtb (gnus-make-hashtable 4095))) (when group - (when (and (not nnweb-ephemeral-p) - (equal group nnweb-group)) - (nnweb-request-group group nil t)))) + (setq nnweb-group group))) (defun nnweb-init (server) "Initialize buffers and such." (unless (gnus-buffer-live-p nnweb-buffer) (setq nnweb-buffer - (save-excursion - (mm-with-unibyte - (nnheader-set-temp-buffer - (format " *nnweb %s %s %s*" - nnweb-type nnweb-search server)) - (current-buffer)))))) + (save-current-buffer + (nnheader-set-temp-buffer + (format " *nnweb %s %s %s*" + nnweb-type nnweb-search server)) + (mm-disable-multibyte) + (current-buffer))))) ;;; -;;; Deja bought by google.com +;;; groups.google.com ;;; (defun nnweb-google-wash-article () - (let ((case-fold-search t) url) - (goto-char (point-min)) - (re-search-forward "^
" nil t)
-    (narrow-to-region (point-min) (point))
-    (search-backward "" nil t)
-      (replace-match "\n"))
-    (mm-url-remove-markup)
+  ;; We have Google's masked e-mail addresses here.  :-/
+  (let ((case-fold-search t)
+	(start-re "
[\r\n ]*")
+	(end-re "[\r\n ]*
")) (goto-char (point-min)) - (while (re-search-forward "^[ \t]*\n" nil t) - (replace-match "")) - (goto-char (point-max)) - (insert "\n") - (widen) - (narrow-to-region (point) (point-max)) - (search-forward "" nil t) - (delete-region (point) (point-max)) - (mm-url-remove-markup) - (widen))) + (if (save-excursion + (or (re-search-forward "The requested message.*could not be found." + nil t) + (not (and (re-search-forward start-re nil t) + (re-search-forward end-re nil t))))) + ;; FIXME: Don't know how to indicate "not found". + ;; Should this function throw an error? --rsteib + (progn + (gnus-message 3 "Requested article not found") + (erase-buffer)) + (delete-region (point-min) + (re-search-forward start-re)) + (goto-char (point-min)) + (delete-region (progn + (re-search-forward end-re) + (match-beginning 0)) + (point-max)) + (mm-url-decode-entities)))) (defun nnweb-google-parse-1 (&optional Message-ID) + "Parse search result in current buffer." (let ((i 0) (case-fold-search t) (active (cadr (assoc nnweb-group nnweb-group-alist))) Subject Score Date Newsgroups From map url mid) (unless active - (push (list nnweb-group (setq active (cons 1 0)) - nnweb-type nnweb-search) + (push (list nnweb-group (setq active (cons 1 0))) nnweb-group-alist)) ;; Go through all the article hits on this page. (goto-char (point-min)) - (while (re-search-forward - "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t) - (setq mid (match-string 2) + (while + (re-search-forward + "a +href=\"/group/\\([^>\"]+\\)/browse_thread/[^>]+#\\([0-9a-f]+\\)" + nil t) + (setq Newsgroups (match-string-no-properties 1) + ;; Note: Starting with Google Groups 2, `mid' is a Google-internal + ;; ID, not a proper Message-ID. + mid (match-string-no-properties 2) url (format - "http://groups.google.com/groups?selm=%s&output=gplain" mid)) + (nnweb-definition 'result) Newsgroups mid)) (narrow-to-region (search-forward ">" nil t) (search-forward "" nil t)) (mm-url-remove-markup) @@ -357,26 +351,25 @@ Valid types include `google', `dejanews', and `gmane'.") (setq Subject (buffer-string)) (goto-char (point-max)) (widen) - (forward-line 1) - (when (looking-at "
]+>") - (goto-char (match-end 0))) - (if (not (looking-at "]+>")) - (skip-chars-forward " \t") - (narrow-to-region (point) - (search-forward "" nil t)) - (mm-url-remove-markup) - (mm-url-decode-entities) - (setq Newsgroups (buffer-string)) - (goto-char (point-max)) - (widen) - (skip-chars-forward "- \t")) - (when (looking-at - "\\([0-9]+\\)[/ ]\\([A-Za-z]+\\)[/ ]\\([0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - ]+\\).*Next" nil t)) + "]+href=\"\n?\\([^>\" \n\t]+\\)[^<]*]+src=[^>]+next" + nil t)) (>= i nnweb-max-hits)) (setq more nil) ;; Yup, there are more articles - (setq more (concat "http://groups.google.com" (match-string 1))) + (setq more (concat (nnweb-definition 'base) (match-string 1))) (when more (erase-buffer) + (nnheader-message 7 "Searching google...(%d)" i) (mm-url-insert more)))) ;; Return the articles in the right order. + (nnheader-message 7 "Searching google...done") (setq nnweb-articles (sort nnweb-articles 'car-less-than-car)))))) @@ -435,12 +431,14 @@ Valid types include `google', `dejanews', and `gmane'.") "?" (mm-url-encode-www-form-urlencoded `(("q" . ,search) - ("num". "100") + ("num" . ,(number-to-string + (min 100 nnweb-max-hits))) ("hq" . "") - ("hl" . "") + ("hl" . "en") ("lr" . "") ("safe" . "off") - ("sites" . "groups"))))) + ("sites" . "groups") + ("filter" . "0"))))) t) (defun nnweb-google-identity (url) @@ -454,48 +452,59 @@ Valid types include `google', `dejanews', and `gmane'.") ;;; (defun nnweb-gmane-create-mapping () "Perform the search and create a number-to-url alist." - (save-excursion - (set-buffer nnweb-buffer) - (erase-buffer) - (when (funcall (nnweb-definition 'search) nnweb-search) - (let ((more t) - (case-fold-search t) - (active (or (cadr (assoc nnweb-group nnweb-group-alist)) - (cons 1 0))) - subject group url - map) - ;; Remove stuff from the beginning of results - (goto-char (point-min)) - (search-forward "Search Results
    " nil t) - (delete-region (point-min) (point)) + (with-current-buffer nnweb-buffer + (let ((case-fold-search t) + (active (or (cadr (assoc nnweb-group nnweb-group-alist)) + (cons 1 0))) + map) + (erase-buffer) + (nnheader-message 7 "Searching Gmane..." ) + (when (funcall (nnweb-definition 'search) nnweb-search) (goto-char (point-min)) - ;; Iterate over the actual hits - (while (re-search-forward ".*href=\"\\([^\"]+\\)\">\\(.*\\)" nil t) - (setq url (concat "http://gmane.org/" (match-string 1))) - (setq subject (match-string 2)) - (unless (nnweb-get-hashtb url) - (push - (list - (incf (cdr active)) - (make-full-mail-header - (cdr active) (concat "(" group ") " subject) nil nil - nil nil 0 0 url)) - map) - (nnweb-set-hashtb (cadar map) (car map)))) - ;; Return the articles in the right order. - (setq nnweb-articles - (sort (nconc nnweb-articles map) 'car-less-than-car)))))) + ;; Skip the status line + (forward-line 1) + ;; Thanks to Olly Betts we now have NOV lines in our buffer! + (while (not (eobp)) + (unless (or (eolp) (looking-at "\x0d")) + (let ((header (nnheader-parse-nov))) + (let ((xref (mail-header-xref header)) + (from (mail-header-from header)) + (subject (mail-header-subject header)) + (rfc2047-encoding-type 'mime)) + (when (string-match " \\([^:]+\\)[:/]\\([0-9]+\\)" xref) + (mail-header-set-xref + header + (format "http://article.gmane.org/%s/%s/raw" + (match-string 1 xref) + (match-string 2 xref)))) + + ;; Add host part to gmane-encrypted addresses + (when (string-match "@$" from) + (mail-header-set-from header + (concat from "public.gmane.org"))) + + (mail-header-set-subject header + (rfc2047-encode-string subject)) + + (unless (nnweb-get-hashtb (mail-header-xref header)) + (mail-header-set-number header (incf (cdr active))) + (push (list (mail-header-number header) header) map) + (nnweb-set-hashtb (cadar map) (car map)))))) + (forward-line 1))) + (nnheader-message 7 "Searching Gmane...done") + (setq nnweb-articles + (sort (nconc nnweb-articles map) 'car-less-than-car))))) (defun nnweb-gmane-wash-article () (let ((case-fold-search t)) (goto-char (point-min)) - (re-search-forward "" nil t) - (delete-region (point-min) (point)) - (goto-char (point-min)) - (while (looking-at "^
  • \\([^ ]+\\).*
  • ") - (replace-match "\\1\\2" t) - (forward-line 1)) - (mm-url-remove-markup))) + (when (search-forward "" nil t) + (delete-region (point-min) (point)) + (goto-char (point-min)) + (while (looking-at "^
  • \\([^ ]+\\).*
  • ") + (replace-match "\\1\\2" t) + (forward-line 1)) + (mm-url-remove-markup)))) (defun nnweb-gmane-search (search) (mm-url-insert @@ -503,11 +512,15 @@ Valid types include `google', `dejanews', and `gmane'.") (nnweb-definition 'address) "?" (mm-url-encode-www-form-urlencoded - `(("query" . ,search))))) + `(("query" . ,search) + ("HITSPERPAGE" . ,(number-to-string nnweb-max-hits)) + ;;("TOPDOC" . "1000") + )))) (setq buffer-file-name nil) + (unless (featurep 'xemacs) (set-buffer-multibyte t)) + (mm-decode-coding-region (point-min) (point-max) 'utf-8) t) - (defun nnweb-gmane-identity (url) "Return a unique identifier based on URL." (if (string-match "group=\\(.+\\)" url) @@ -521,7 +534,11 @@ Valid types include `google', `dejanews', and `gmane'.") (defun nnweb-insert-html (parse) "Insert HTML based on a w3 parse tree." (if (stringp parse) - (insert (nnheader-string-as-multibyte parse)) + ;; We used to call nnheader-string-as-multibyte here, but it cannot + ;; be right, so I removed it. If a bug shows up because of this change, + ;; please do not blindly revert the change, but help me find the real + ;; cause of the bug instead. --Stef + (insert parse) (insert "<" (symbol-name (car parse)) " ") (insert (mapconcat (lambda (param) @@ -533,7 +550,7 @@ Valid types include `google', `dejanews', and `gmane'.") (nth 1 parse) " ")) (insert ">\n") - (mapcar 'nnweb-insert-html (nth 2 parse)) + (mapc 'nnweb-insert-html (nth 2 parse)) (insert "\n"))) (defun nnweb-parse-find (type parse &optional maxdepth)