;;; nnweb.el --- retrieving articles via web search engines
-;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
-;; Free Software Foundation, Inc.
+
+;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+;; 2004, 2005 Free Software Foundation, Inc.
;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
;; Keywords: news
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING. If not, write to the
-;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA.
+;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
;;; Commentary:
;; Note: You need to have `w3' installed for some functions to work.
+;; FIXME: Due to changes in the HTML output of Google Groups and Gmane, stuff
+;; related to web groups (gnus-group-make-web-group) doesn't work anymore.
+
+;; Fetching an article by MID (cf. gnus-refer-article-method) over Google
+;; Groups should work.
+
;;; Code:
(eval-when-compile (require 'cl))
(defvar nnweb-type-definition
'((google
- (article . ignore)
- (id . "http://groups.google.com/groups?selm=%s&output=gplain")
+ (id . "http://www.google.com/groups?as_umsgid=%s&hl=en&dmode=source")
+ (article . nnweb-google-wash-article)
(reference . identity)
(map . nnweb-google-create-mapping)
(search . nnweb-google-search)
(address . "http://groups.google.com/groups")
+ (base . "http://groups.google.com")
(identifier . nnweb-google-identity))
(dejanews ;; alias of google
(article . ignore)
(map . nnweb-google-create-mapping)
(search . nnweb-google-search)
(address . "http://groups.google.com/groups")
+ (base . "http://groups.google.com")
(identifier . nnweb-google-identity))
(gmane
(article . nnweb-gmane-wash-article)
(current-buffer))))))
;;;
-;;; Deja bought by google.com
+;;; groups.google.com
;;;
(defun nnweb-google-wash-article ()
- (let ((case-fold-search t) url)
- (goto-char (point-min))
- (re-search-forward "^<pre>" nil t)
- (narrow-to-region (point-min) (point))
- (search-backward "<table " nil t 2)
- (delete-region (point-min) (point))
- (if (re-search-forward "Search Result [0-9]+" nil t)
- (replace-match ""))
- (if (re-search-forward "View complete thread ([0-9]+ articles?)" nil t)
- (replace-match ""))
+ ;; We have Google's masked e-mail addresses here. :-/
+ (let ((case-fold-search t))
(goto-char (point-min))
- (while (search-forward "<br>" nil t)
- (replace-match "\n"))
- (mm-url-remove-markup)
+ (delete-region (point-min)
+ (1+ (re-search-forward "^<pre>" nil t)))
(goto-char (point-min))
- (while (re-search-forward "^[ \t]*\n" nil t)
- (replace-match ""))
- (goto-char (point-max))
- (insert "\n")
- (widen)
- (narrow-to-region (point) (point-max))
- (search-forward "</pre>" nil t)
- (delete-region (point) (point-max))
- (mm-url-remove-markup)
- (widen)))
+ (delete-region (- (re-search-forward "^</pre>" nil t) (length "</pre>"))
+ (point-max))
+ (mm-url-decode-entities)))
(defun nnweb-google-parse-1 (&optional Message-ID)
(let ((i 0)
"a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t)
(setq mid (match-string 2)
url (format
- "http://groups.google.com/groups?selm=%s&output=gplain" mid))
+ (nnweb-definition 'id) mid))
(narrow-to-region (search-forward ">" nil t)
(search-forward "</a>" nil t))
(mm-url-remove-markup)
(>= i nnweb-max-hits))
(setq more nil)
;; Yup, there are more articles
- (setq more (concat "http://groups.google.com" (match-string 1)))
+ (setq more (concat (nnweb-definition 'base) (match-string 1)))
(when more
(erase-buffer)
(mm-url-insert more))))
"?"
(mm-url-encode-www-form-urlencoded
`(("q" . ,search)
- ("num". "100")
+ ("num" . "100")
("hq" . "")
- ("hl" . "")
+ ("hl" . "en")
("lr" . "")
("safe" . "off")
("sites" . "groups")))))
(defun nnweb-gmane-wash-article ()
(let ((case-fold-search t))
(goto-char (point-min))
- (re-search-forward "<!--X-Head-of-Message-->" nil t)
+ (search-forward "<!--X-Head-of-Message-->" nil t)
(delete-region (point-min) (point))
(goto-char (point-min))
(while (looking-at "^<li><em>\\([^ ]+\\)</em>.*</li>")