;;; nnweb.el --- retrieving articles via web search engines
;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-;; 2004, 2005, 2006 Free Software Foundation, Inc.
+;; 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
;; Keywords: news
;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 2, or (at your option)
+;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
(when (string-match "^<\\(.*\\)>$" article)
(setq art (match-string 1 article)))
(when (and fetch art)
- (setq url (format fetch art))
+ (setq url (format fetch
+ (mm-url-form-encode-xwfu art)))
(mm-with-unibyte-current-buffer
(mm-url-insert url))
(if (nnweb-definition 'reference t)
(defun nnweb-google-wash-article ()
;; We have Google's masked e-mail addresses here. :-/
(let ((case-fold-search t)
- (start-re "<pre>\n *")
- (end-re "\n *</pre>"))
+ (start-re "<pre>[\r\n ]*")
+ (end-re "[\r\n ]*</pre>"))
(goto-char (point-min))
(if (save-excursion
(or (re-search-forward "The requested message.*could not be found."
(goto-char (point-max))
(widen)
(narrow-to-region (point)
- (search-forward "</td" nil t))
+ (search-forward "</table" nil t))
(mm-url-remove-markup)
(mm-url-decode-entities)
- (search-backward " - ")
- (when (looking-at
- " - \\([a-zA-Z]+\\) \\([0-9]+\\)\\(?: \\([0-9]\\{4\\}\\)\\)?, [^\n]+by \\([^<\n]+\\)\n")
- (setq From (match-string 4)
- Date (format "%s %s 00:00:00 %s"
- (match-string 1)
- (match-string 2)
- (or (match-string 3)
- (substring (current-time-string) -4)))))
-
+ (goto-char (point-max))
+ (when
+ (re-search-backward
+ "^\\(?:\\(\\w+\\) \\([0-9]+\\)\\|\\S-+\\)\\(?: \\([0-9]\\{4\\}\\)\\)? by ?\\(.*\\)"
+ nil t)
+ (setq Date (if (match-string 1)
+ (format "%s %s 00:00:00 %s"
+ (match-string 1)
+ (match-string 2)
+ (or (match-string 3)
+ (substring (current-time-string) -4)))
+ (current-time-string)))
+ (setq From (match-string 4)))
(widen)
- (forward-line 1)
(incf i)
(unless (nnweb-get-hashtb url)
(push
(goto-char (point-min))
(incf i 100)
(if (or (not (re-search-forward
- "<a href=\"\n\\([^>\"]+\\)\"><img src=\"[^\"]+next"
+ "<a [^>]+href=\"\n?\\([^>\" \n\t]+\\)[^<]*<img[^>]+src=[^>]+next"
nil t))
(>= i nnweb-max-hits))
(setq more nil)
"?"
(mm-url-encode-www-form-urlencoded
`(("q" . ,search)
- ("num" . "100")
+ ("num" . ,(number-to-string
+ (min 100 nnweb-max-hits)))
("hq" . "")
("hl" . "en")
("lr" . "")
(forward-line 1)
;; Thanks to Olly Betts we now have NOV lines in our buffer!
(while (not (eobp))
- (unless (eolp)
+ (unless (or (eolp) (looking-at "\x0d"))
(let ((header (nnheader-parse-nov)))
(let ((xref (mail-header-xref header))
(from (mail-header-from header))
(subject (mail-header-subject header))
(rfc2047-encoding-type 'mime))
- (when (string-match " \\([^:]+\\):\\([0-9]+\\)" xref)
+ (when (string-match " \\([^:]+\\)[:/]\\([0-9]+\\)" xref)
(mail-header-set-xref
header
(format "http://article.gmane.org/%s/%s/raw"
(rfc2047-encode-string subject))
(unless (nnweb-get-hashtb (mail-header-xref header))
- (push
- (list
- (incf (cdr active))
- header)
- map)
+ (mail-header-set-number header (incf (cdr active)))
+ (push (list (mail-header-number header) header) map)
(nnweb-set-hashtb (cadar map) (car map))))))
(forward-line 1)))
(nnheader-message 7 "Searching Gmane...done")
"?"
(mm-url-encode-www-form-urlencoded
`(("query" . ,search)
- ("HITSPERPAGE" . ,(number-to-string nnweb-max-hits))))))
+ ("HITSPERPAGE" . ,(number-to-string nnweb-max-hits))
+ ;;("TOPDOC" . "1000")
+ ))))
(setq buffer-file-name nil)
(set-buffer-multibyte t)
(mm-decode-coding-region (point-min) (point-max) 'utf-8)