X-Git-Url: http://cgit.sxemacs.org/?p=gnus;a=blobdiff_plain;f=lisp%2Fnnweb.el;h=1cfa7a4cbc30544135451c5ad180000f97b06892;hp=025ac62dc29a94cecc33d4b4e1b201623b03195e;hb=06e3d74faa6b1196f0a7b877acc1bb6b6c1563a8;hpb=80b0f5c32fb35747f6449aa9d3b9bd0c40f1f12d diff --git a/lisp/nnweb.el b/lisp/nnweb.el index 025ac62dc..1cfa7a4cb 100644 --- a/lisp/nnweb.el +++ b/lisp/nnweb.el @@ -1,15 +1,17 @@ ;;; nnweb.el --- retrieving articles via web search engines -;; Copyright (C) 1996-2000 Free Software Foundation, Inc. + +;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, +;; 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen ;; Keywords: news ;; This file is part of GNU Emacs. -;; GNU Emacs is free software; you can redistribute it and/or modify +;; GNU Emacs is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) -;; any later version. +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. ;; GNU Emacs is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -17,14 +19,11 @@ ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License -;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. +;; along with GNU Emacs. If not, see . ;;; Commentary: -;; Note: You need to have `url' and `w3' installed for this -;; backend to work. +;; Note: You need to have `w3' installed for some functions to work. ;;; Code: @@ -36,58 +35,54 @@ (require 'gnus) (require 'nnmail) (require 'mm-util) -(eval-when-compile +(require 'mm-url) +(eval-and-compile (ignore-errors - (require 'w3) - (require 'url) - (require 'w3-forms))) -;; Report failure to find w3 at load time if appropriate. -(eval '(progn - (require 'w3) - (require 'url) - (require 'w3-forms))) + (require 'url))) +(autoload 'w3-parse-buffer "w3-parse") (nnoo-declare nnweb) (defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/") "Where nnweb will save its files.") -(defvoo nnweb-type 'dejanews +(defvoo nnweb-type 'google "What search engine type is being used. -Valid types include `dejanews', `dejanewsold', `reference', -and `altavista'.") +Valid types include `google', `dejanews', and `gmane'.") (defvar nnweb-type-definition - '((dejanews - (article . ignore) - (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text") - (map . nnweb-dejanews-create-mapping) - (search . nnweb-dejanews-search) - (address . "http://www.deja.com/=dnc/qs.xp") - (identifier . nnweb-dejanews-identity)) - (dejanewsold - (article . ignore) - (map . nnweb-dejanews-create-mapping) - (search . nnweb-dejanewsold-search) - (address . "http://www.deja.com/dnquery.xp") - (identifier . nnweb-dejanews-identity)) - (reference - (article . nnweb-reference-wash-article) - (map . nnweb-reference-create-mapping) - (search . nnweb-reference-search) - (address . "http://www.reference.com/cgi-bin/pn/go") - (identifier . identity)) - (altavista - (article . nnweb-altavista-wash-article) - (map . nnweb-altavista-create-mapping) - (search . nnweb-altavista-search) - (address . "http://www.altavista.digital.com/cgi-bin/query") - (id . "/cgi-bin/news?id@%s") - (identifier . identity))) + '((google + (id . "http://www.google.com/groups?as_umsgid=%s&hl=en&dmode=source") + (result . "http://groups.google.com/group/%s/msg/%s?dmode=source") + (article . nnweb-google-wash-article) + (reference . identity) + (map . nnweb-google-create-mapping) + (search . nnweb-google-search) + (address . "http://groups.google.com/groups") + (base . "http://groups.google.com") + (identifier . nnweb-google-identity)) + (dejanews ;; alias of google + (id . "http://www.google.com/groups?as_umsgid=%s&hl=en&dmode=source") + (result . "http://groups.google.com/group/%s/msg/%s?dmode=source") + (article . nnweb-google-wash-article) + (reference . identity) + (map . nnweb-google-create-mapping) + (search . nnweb-google-search) + (address . "http://groups.google.com/groups") + (base . "http://groups.google.com") + (identifier . nnweb-google-identity)) + (gmane + (article . nnweb-gmane-wash-article) + (id . "http://gmane.org/view.php?group=%s") + (reference . identity) + (map . nnweb-gmane-create-mapping) + (search . nnweb-gmane-search) + (address . "http://search.gmane.org/nov.php") + (identifier . nnweb-gmane-identity))) "Type-definition alist.") (defvoo nnweb-search nil - "Search string to feed to DejaNews.") + "Search string to feed to Google.") (defvoo nnweb-max-hits 999 "Maximum number of hits to display.") @@ -109,8 +104,7 @@ and `altavista'.") (deffoo nnweb-retrieve-headers (articles &optional group server fetch-old) (nnweb-possibly-change-server group server) - (save-excursion - (set-buffer nntp-server-buffer) + (with-current-buffer nntp-server-buffer (erase-buffer) (let (article header) (mm-with-unibyte-current-buffer @@ -121,26 +115,21 @@ and `altavista'.") (deffoo nnweb-request-scan (&optional group server) (nnweb-possibly-change-server group server) - (setq nnweb-hashtb (gnus-make-hashtable 4095)) + (if nnweb-ephemeral-p + (setq nnweb-hashtb (gnus-make-hashtable 4095)) + (unless nnweb-articles + (nnweb-read-overview group))) (funcall (nnweb-definition 'map)) (unless nnweb-ephemeral-p (nnweb-write-active) (nnweb-write-overview group))) -(deffoo nnweb-request-group (group &optional server dont-check) - (nnweb-possibly-change-server nil server) - (when (and group - (not (equal group nnweb-group)) - (not nnweb-ephemeral-p)) - (let ((info (assoc group nnweb-group-alist))) - (when info - (setq nnweb-group group) - (setq nnweb-type (nth 2 info)) - (setq nnweb-search (nth 3 info)) - (unless dont-check - (nnweb-read-overview group))))) - (unless dont-check - (nnweb-request-scan group)) +(deffoo nnweb-request-group (group &optional server dont-check info) + (nnweb-possibly-change-server group server) + (unless (or nnweb-ephemeral-p + dont-check + nnweb-articles) + (nnweb-read-overview group)) (cond ((not nnweb-articles) (nnheader-report 'nnweb "No matching articles")) @@ -157,64 +146,62 @@ and `altavista'.") (deffoo nnweb-close-group (group &optional server) (nnweb-possibly-change-server group server) (when (gnus-buffer-live-p nnweb-buffer) - (save-excursion - (set-buffer nnweb-buffer) + (with-current-buffer nnweb-buffer (set-buffer-modified-p nil) (kill-buffer nnweb-buffer))) t) (deffoo nnweb-request-article (article &optional group server buffer) (nnweb-possibly-change-server group server) - (save-excursion - (set-buffer (or buffer nntp-server-buffer)) + (with-current-buffer (or buffer nntp-server-buffer) (let* ((header (cadr (assq article nnweb-articles))) (url (and header (mail-header-xref header)))) (when (or (and url (mm-with-unibyte-current-buffer - (nnweb-fetch-url url))) + (mm-url-insert url))) (and (stringp article) (nnweb-definition 'id t) (let ((fetch (nnweb-definition 'id)) - art) + art active) (when (string-match "^<\\(.*\\)>$" article) (setq art (match-string 1 article))) - (and fetch - art - (mm-with-unibyte-current-buffer - (nnweb-fetch-url - (format fetch article))))))) + (when (and fetch art) + (setq url (format fetch + (mm-url-form-encode-xwfu art))) + (mm-with-unibyte-current-buffer + (mm-url-insert url)) + (if (nnweb-definition 'reference t) + (setq article + (funcall (nnweb-definition + 'reference) article))))))) (unless nnheader-callback-function - (funcall (nnweb-definition 'article)) - (nnweb-decode-entities)) + (funcall (nnweb-definition 'article))) (nnheader-report 'nnweb "Fetched article %s" article) (cons group (and (numberp article) article)))))) (deffoo nnweb-close-server (&optional server) (when (and (nnweb-server-opened server) (gnus-buffer-live-p nnweb-buffer)) - (save-excursion - (set-buffer nnweb-buffer) + (with-current-buffer nnweb-buffer (set-buffer-modified-p nil) (kill-buffer nnweb-buffer))) (nnoo-close-server 'nnweb server)) (deffoo nnweb-request-list (&optional server) (nnweb-possibly-change-server nil server) - (save-excursion - (set-buffer nntp-server-buffer) - (nnmail-generate-active nnweb-group-alist) + (with-current-buffer nntp-server-buffer + (nnmail-generate-active (list (assoc server nnweb-group-alist))) t)) -(deffoo nnweb-request-update-info (group info &optional server) - (nnweb-possibly-change-server group server)) +(deffoo nnweb-request-update-info (group info &optional server)) (deffoo nnweb-asynchronous-p () - t) + nil) (deffoo nnweb-request-create-group (group &optional server args) (nnweb-possibly-change-server nil server) (nnweb-request-delete-group group) - (push `(,group ,(cons 1 0) ,@args) nnweb-group-alist) + (push `(,group ,(cons 1 0)) nnweb-group-alist) (nnweb-write-active) t) @@ -284,397 +271,262 @@ and `altavista'.") def)) (defun nnweb-possibly-change-server (&optional group server) - (nnweb-init server) (when server (unless (nnweb-server-opened server) - (nnweb-open-server server))) + (nnweb-open-server server)) + (nnweb-init server)) (unless nnweb-group-alist (nnweb-read-active)) + (unless nnweb-hashtb + (setq nnweb-hashtb (gnus-make-hashtable 4095))) (when group - (when (and (not nnweb-ephemeral-p) - (not (equal group nnweb-group))) - (nnweb-request-group group nil t)))) + (setq nnweb-group group))) (defun nnweb-init (server) "Initialize buffers and such." (unless (gnus-buffer-live-p nnweb-buffer) (setq nnweb-buffer - (save-excursion - (mm-with-unibyte - (nnheader-set-temp-buffer - (format " *nnweb %s %s %s*" - nnweb-type nnweb-search server)) - (current-buffer)))))) - -(defun nnweb-fetch-url (url) - (let (buf) - (save-excursion - (if (not nnheader-callback-function) - (progn - (with-temp-buffer - (mm-enable-multibyte) - (let ((coding-system-for-read 'binary) - (coding-system-for-write 'binary) - (default-process-coding-system 'binary)) - (nnweb-insert url)) - (setq buf (buffer-string))) - (erase-buffer) - (insert buf) - t) - (nnweb-url-retrieve-asynch - url 'nnweb-callback (current-buffer) nnheader-callback-function) - t)))) - -(defun nnweb-callback (buffer callback) - (when (gnus-buffer-live-p url-working-buffer) - (save-excursion - (set-buffer url-working-buffer) - (funcall (nnweb-definition 'article)) - (nnweb-decode-entities) - (set-buffer buffer) - (goto-char (point-max)) - (insert-buffer-substring url-working-buffer)) - (funcall callback t) - (gnus-kill-buffer url-working-buffer))) - -(defun nnweb-url-retrieve-asynch (url callback &rest data) - (let ((url-request-method "GET") - (old-asynch url-be-asynchronous) - (url-request-data nil) - (url-request-extra-headers nil) - (url-working-buffer (generate-new-buffer-name " *nnweb*"))) - (setq-default url-be-asynchronous t) - (save-excursion - (set-buffer (get-buffer-create url-working-buffer)) - (setq url-current-callback-data data - url-be-asynchronous t - url-current-callback-func callback) - (url-retrieve url)) - (setq-default url-be-asynchronous old-asynch))) + (save-current-buffer + (nnheader-set-temp-buffer + (format " *nnweb %s %s %s*" + nnweb-type nnweb-search server)) + (mm-disable-multibyte) + (current-buffer))))) ;;; -;;; DejaNews functions. +;;; groups.google.com ;;; -(defun nnweb-dejanews-create-mapping () - "Perform the search and create an number-to-url alist." - (save-excursion - (set-buffer nnweb-buffer) +(defun nnweb-google-wash-article () + ;; We have Google's masked e-mail addresses here. :-/ + (let ((case-fold-search t) + (start-re "
[\r\n ]*")
+	(end-re "[\r\n ]*
")) + (goto-char (point-min)) + (if (save-excursion + (or (re-search-forward "The requested message.*could not be found." + nil t) + (not (and (re-search-forward start-re nil t) + (re-search-forward end-re nil t))))) + ;; FIXME: Don't know how to indicate "not found". + ;; Should this function throw an error? --rsteib + (progn + (gnus-message 3 "Requested article not found") + (erase-buffer)) + (delete-region (point-min) + (re-search-forward start-re)) + (goto-char (point-min)) + (delete-region (progn + (re-search-forward end-re) + (match-beginning 0)) + (point-max)) + (mm-url-decode-entities)))) + +(defun nnweb-google-parse-1 (&optional Message-ID) + "Parse search result in current buffer." + (let ((i 0) + (case-fold-search t) + (active (cadr (assoc nnweb-group nnweb-group-alist))) + Subject Score Date Newsgroups From + map url mid) + (unless active + (push (list nnweb-group (setq active (cons 1 0))) + nnweb-group-alist)) + ;; Go through all the article hits on this page. + (goto-char (point-min)) + (while + (re-search-forward + "a +href=\"/group/\\([^>\"]+\\)/browse_thread/[^>]+#\\([0-9a-f]+\\)" + nil t) + (setq Newsgroups (match-string-no-properties 1) + ;; Note: Starting with Google Groups 2, `mid' is a Google-internal + ;; ID, not a proper Message-ID. + mid (match-string-no-properties 2) + url (format + (nnweb-definition 'result) Newsgroups mid)) + (narrow-to-region (search-forward ">" nil t) + (search-forward "" nil t)) + (mm-url-remove-markup) + (mm-url-decode-entities) + (setq Subject (buffer-string)) + (goto-char (point-max)) + (widen) + (narrow-to-region (point) + (search-forward "") - nil 0 0 url)) - map) - (nnweb-set-hashtb (cadar map) (car map))))) - ;; See whether there is a "Get next 20 hits" button here. - (goto-char (point-min)) - (if (or (not (re-search-forward - "HREF=\"\\([^\"]+\\)\"[<>b]+Next result" nil t)) - (>= i nnweb-max-hits)) - (setq more nil) - ;; Yup -- fetch it. - (setq more (match-string 1)) - (erase-buffer) - (url-insert-file-contents more))) - ;; Return the articles in the right order. - (setq nnweb-articles - (sort (nconc nnweb-articles map) 'car-less-than-car)))))) - -(defun nnweb-dejanews-search (search) - (nnweb-insert + (let ((more t) + (i 0)) + (while more + (setq nnweb-articles + (nconc nnweb-articles (nnweb-google-parse-1))) + ;; Check if there are more articles to fetch + (goto-char (point-min)) + (incf i 100) + (if (or (not (re-search-forward + "]+href=\"\n?\\([^>\" \n\t]+\\)[^<]*]+src=[^>]+next" + nil t)) + (>= i nnweb-max-hits)) + (setq more nil) + ;; Yup, there are more articles + (setq more (concat (nnweb-definition 'base) (match-string 1))) + (when more + (erase-buffer) + (nnheader-message 7 "Searching google...(%d)" i) + (mm-url-insert more)))) + ;; Return the articles in the right order. + (nnheader-message 7 "Searching google...done") + (setq nnweb-articles + (sort nnweb-articles 'car-less-than-car)))))) + +(defun nnweb-google-search (search) + (mm-url-insert (concat (nnweb-definition 'address) "?" - (nnweb-encode-www-form-urlencoded - `(("ST" . "PS") - ("svcclass" . "dnyr") - ("QRY" . ,search) - ("defaultOp" . "AND") - ("DBS" . "1") - ("OP" . "dnquery.xp") - ("LNG" . "ALL") - ("maxhits" . "100") - ("threaded" . "0") - ("format" . "verbose2") - ("showsort" . "date") - ("agesign" . "1") - ("ageweight" . "1"))))) + (mm-url-encode-www-form-urlencoded + `(("q" . ,search) + ("num" . ,(number-to-string + (min 100 nnweb-max-hits))) + ("hq" . "") + ("hl" . "en") + ("lr" . "") + ("safe" . "off") + ("sites" . "groups") + ("filter" . "0"))))) t) -(defun nnweb-dejanewsold-search (search) - (nnweb-fetch-form - (nnweb-definition 'address) - `(("query" . ,search) - ("defaultOp" . "AND") - ("svcclass" . "dnold") - ("maxhits" . "100") - ("format" . "verbose2") - ("threaded" . "0") - ("showsort" . "date") - ("agesign" . "1") - ("ageweight" . "1"))) - t) - -(defun nnweb-dejanews-identity (url) +(defun nnweb-google-identity (url) "Return an unique identifier based on URL." - (if (string-match "AN=\\([0-9]+\\)" url) + (if (string-match "selm=\\([^ &>]+\\)" url) (match-string 1 url) url)) ;;; -;;; InReference +;;; gmane.org ;;; - -(defun nnweb-reference-create-mapping () - "Perform the search and create an number-to-url alist." - (save-excursion - (set-buffer nnweb-buffer) - (erase-buffer) - (when (funcall (nnweb-definition 'search) nnweb-search) - (let ((i 0) - (more t) - (case-fold-search t) - (active (or (cadr (assoc nnweb-group nnweb-group-alist)) - (cons 1 0))) - Subject Score Date Newsgroups From Message-ID - map url) - (while more - ;; Go through all the article hits on this page. - (goto-char (point-min)) - (search-forward "
" nil t) - (delete-region (point-min) (point)) - (goto-char (point-min)) - (while (re-search-forward "^ +[0-9]+\\." nil t) - (narrow-to-region - (point) - (if (re-search-forward "^$" nil t) - (match-beginning 0) - (point-max))) - (goto-char (point-min)) - (when (looking-at ".*href=\"\\([^\"]+\\)\"") - (setq url (match-string 1))) - (nnweb-remove-markup) - (goto-char (point-min)) - (while (search-forward "\t" nil t) - (replace-match " ")) - (goto-char (point-min)) - (while (re-search-forward "^\\([^:]+\\): \\(.*\\)$" nil t) - (set (intern (match-string 1)) (match-string 2))) - (widen) - (search-forward "" nil t) - (incf i) - (unless (nnweb-get-hashtb url) - (push - (list - (incf (cdr active)) - (make-full-mail-header - (cdr active) (concat "(" Newsgroups ") " Subject) From Date - Message-ID - nil 0 (string-to-int Score) url)) - map) - (nnweb-set-hashtb (cadar map) (car map)))) - (setq more nil)) - ;; Return the articles in the right order. - (setq nnweb-articles - (sort (nconc nnweb-articles map) 'car-less-than-car)))))) - -(defun nnweb-reference-wash-article () - (let ((case-fold-search t)) - (goto-char (point-min)) - (re-search-forward "^
" nil t) - (delete-region (point-min) (point)) - (search-forward "
" nil t)
-    (forward-line -1)
-    (let ((body (point-marker)))
-      (search-forward "
" nil t) - (delete-region (point) (point-max)) - (nnweb-remove-markup) - (goto-char (point-min)) - (while (looking-at " *$") - (gnus-delete-line)) - (narrow-to-region (point-min) body) - (while (and (re-search-forward "^$" nil t) - (not (eobp))) - (gnus-delete-line)) - (goto-char (point-min)) - (while (looking-at "\\(^[^ ]+:\\) *") - (replace-match "\\1 " t) - (forward-line 1)) - (goto-char (point-min)) - (when (re-search-forward "^References:" nil t) - (narrow-to-region - (point) (if (re-search-forward "^$\\|^[^:]+:" nil t) - (match-beginning 0) - (point-max))) +(defun nnweb-gmane-create-mapping () + "Perform the search and create a number-to-url alist." + (with-current-buffer nnweb-buffer + (let ((case-fold-search t) + (active (or (cadr (assoc nnweb-group nnweb-group-alist)) + (cons 1 0))) + map) + (erase-buffer) + (nnheader-message 7 "Searching Gmane..." ) + (when (funcall (nnweb-definition 'search) nnweb-search) (goto-char (point-min)) + ;; Skip the status line + (forward-line 1) + ;; Thanks to Olly Betts we now have NOV lines in our buffer! (while (not (eobp)) - (unless (looking-at "References") - (insert "\t") - (forward-line 1))) - (goto-char (point-min)) - (while (search-forward "," nil t) - (replace-match " " t t))) - (widen) - (set-marker body nil)))) - -(defun nnweb-reference-search (search) - (url-insert-file-contents - (concat - (nnweb-definition 'address) - "?" - (nnweb-encode-www-form-urlencoded - `(("search" . "advanced") - ("querytext" . ,search) - ("subj" . "") - ("name" . "") - ("login" . "") - ("host" . "") - ("organization" . "") - ("groups" . "") - ("keywords" . "") - ("choice" . "Search") - ("startmonth" . "Jul") - ("startday" . "25") - ("startyear" . "1996") - ("endmonth" . "Aug") - ("endday" . "24") - ("endyear" . "1996") - ("mode" . "Quick") - ("verbosity" . "Verbose") - ("ranking" . "Relevance") - ("first" . "1") - ("last" . "25") - ("score" . "50"))))) - (setq buffer-file-name nil) - t) - -;;; -;;; Alta Vista -;;; - -(defun nnweb-altavista-create-mapping () - "Perform the search and create an number-to-url alist." - (save-excursion - (set-buffer nnweb-buffer) - (erase-buffer) - (let ((part 0)) - (when (funcall (nnweb-definition 'search) nnweb-search part) - (let ((i 0) - (more t) - (case-fold-search t) - (active (or (cadr (assoc nnweb-group nnweb-group-alist)) - (cons 1 0))) - subject date from id group - map url) - (while more - ;; Go through all the article hits on this page. - (goto-char (point-min)) - (search-forward "
" nil t) - (delete-region (point-min) (match-beginning 0)) - (goto-char (point-min)) - (while (search-forward "
" nil t) - (replace-match "\n")) - (nnweb-decode-entities) - (goto-char (point-min)) - (while (re-search-forward ".*href=\"\\([^\"]+\\)\">\\([^>]*\\)
\\([^-]+\\)- \\([^<]+\\)<.*href=\"news:\\([^\"]+\\)\">.*\">\\(.+\\)

" - nil t) - (setq url (match-string 1) - subject (match-string 2) - date (match-string 3) - group (match-string 4) - id (concat "<" (match-string 5) ">") - from (match-string 6)) - (incf i) - (unless (nnweb-get-hashtb url) - (push - (list - (incf (cdr active)) - (make-full-mail-header - (cdr active) (concat "(" group ") " subject) from date - id nil 0 0 url)) - map) - (nnweb-set-hashtb (cadar map) (car map)))) - ;; See if we want more. - (when (or (not nnweb-articles) - (>= i nnweb-max-hits) - (not (funcall (nnweb-definition 'search) - nnweb-search (incf part)))) - (setq more nil))) - ;; Return the articles in the right order. - (setq nnweb-articles - (sort (nconc nnweb-articles map) 'car-less-than-car))))))) - -(defun nnweb-altavista-wash-article () - (goto-char (point-min)) + (unless (or (eolp) (looking-at "\x0d")) + (let ((header (nnheader-parse-nov))) + (let ((xref (mail-header-xref header)) + (from (mail-header-from header)) + (subject (mail-header-subject header)) + (rfc2047-encoding-type 'mime)) + (when (string-match " \\([^:]+\\)[:/]\\([0-9]+\\)" xref) + (mail-header-set-xref + header + (format "http://article.gmane.org/%s/%s/raw" + (match-string 1 xref) + (match-string 2 xref)))) + + ;; Add host part to gmane-encrypted addresses + (when (string-match "@$" from) + (mail-header-set-from header + (concat from "public.gmane.org"))) + + (mail-header-set-subject header + (rfc2047-encode-string subject)) + + (unless (nnweb-get-hashtb (mail-header-xref header)) + (mail-header-set-number header (incf (cdr active))) + (push (list (mail-header-number header) header) map) + (nnweb-set-hashtb (cadar map) (car map)))))) + (forward-line 1))) + (nnheader-message 7 "Searching Gmane...done") + (setq nnweb-articles + (sort (nconc nnweb-articles map) 'car-less-than-car))))) + +(defun nnweb-gmane-wash-article () (let ((case-fold-search t)) - (when (re-search-forward "^" nil t) - (delete-region (point-min) (match-beginning 0))) (goto-char (point-min)) - (while (looking-at "\\([^ ]+\\) + +\\(.*\\)$") - (replace-match "\\1: \\2" t) - (forward-line 1)) - (when (re-search-backward "^References:" nil t) - (narrow-to-region (point) (progn (forward-line 1) (point))) + (when (search-forward "" nil t) + (delete-region (point-min) (point)) (goto-char (point-min)) - (while (re-search-forward "[0-9]+" nil t) - (replace-match "<\\1> " t))) - (widen) - (nnweb-remove-markup))) + (while (looking-at "^

  • \\([^ ]+\\).*
  • ") + (replace-match "\\1\\2" t) + (forward-line 1)) + (mm-url-remove-markup)))) -(defun nnweb-altavista-search (search &optional part) - (url-insert-file-contents +(defun nnweb-gmane-search (search) + (mm-url-insert (concat (nnweb-definition 'address) "?" - (nnweb-encode-www-form-urlencoded - `(("pg" . "aq") - ("what" . "news") - ,@(when part `(("stq" . ,(int-to-string (* part 30))))) - ("fmt" . "d") - ("q" . ,search) - ("r" . "") - ("d0" . "") - ("d1" . ""))))) + (mm-url-encode-www-form-urlencoded + `(("query" . ,search) + ("HITSPERPAGE" . ,(number-to-string nnweb-max-hits)) + ;;("TOPDOC" . "1000") + )))) (setq buffer-file-name nil) + (unless (featurep 'xemacs) (set-buffer-multibyte t)) + (mm-decode-coding-region (point-min) (point-max) 'utf-8) t) +(defun nnweb-gmane-identity (url) + "Return a unique identifier based on URL." + (if (string-match "group=\\(.+\\)" url) + (match-string 1 url) + url)) + ;;; ;;; General web/w3 interface utility functions ;;; @@ -682,6 +534,10 @@ and `altavista'.") (defun nnweb-insert-html (parse) "Insert HTML based on a w3 parse tree." (if (stringp parse) + ;; We used to call nnheader-string-as-multibyte here, but it cannot + ;; be right, so I removed it. If a bug shows up because of this change, + ;; please do not blindly revert the change, but help me find the real + ;; cause of the bug instead. --Stef (insert parse) (insert "<" (symbol-name (car parse)) " ") (insert (mapconcat @@ -694,77 +550,9 @@ and `altavista'.") (nth 1 parse) " ")) (insert ">\n") - (mapcar 'nnweb-insert-html (nth 2 parse)) + (mapc 'nnweb-insert-html (nth 2 parse)) (insert "\n"))) -(defun nnweb-encode-www-form-urlencoded (pairs) - "Return PAIRS encoded for forms." - (mapconcat - (function - (lambda (data) - (concat (w3-form-encode-xwfu (car data)) "=" - (w3-form-encode-xwfu (cdr data))))) - pairs "&")) - -(defun nnweb-fetch-form (url pairs) - "Fetch a form from URL with PAIRS as the data using the POST method." - (let ((url-request-data (nnweb-encode-www-form-urlencoded pairs)) - (url-request-method "POST") - (url-request-extra-headers - '(("Content-type" . "application/x-www-form-urlencoded")))) - (url-insert-file-contents url) - (setq buffer-file-name nil)) - t) - -(defun nnweb-decode-entities () - "Decode all HTML entities." - (goto-char (point-min)) - (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t) - (replace-match (char-to-string - (if (eq (aref (match-string 1) 0) ?\#) - (let ((c - (string-to-number (substring - (match-string 1) 1)))) - (if (mm-char-or-char-int-p c) c 32)) - (or (cdr (assq (intern (match-string 1)) - w3-html-entities)) - ?#))) - t t))) - -(defun nnweb-decode-entities-string (str) - (with-temp-buffer - (insert str) - (nnweb-decode-entities) - (buffer-substring (point-min) (point-max)))) - -(defun nnweb-remove-markup () - "Remove all HTML markup, leaving just plain text." - (goto-char (point-min)) - (while (search-forward "" nil t) - (point-max)))) - (goto-char (point-min)) - (while (re-search-forward "<[^>]+>" nil t) - (replace-match "" t t))) - -(defun nnweb-insert (url &optional follow-refresh) - "Insert the contents from an URL in the current buffer. -If FOLLOW-REFRESH is non-nil, redirect refresh url in META." - (let ((name buffer-file-name)) - (if follow-refresh - (save-restriction - (narrow-to-region (point) (point)) - (url-insert-file-contents url) - (goto-char (point-min)) - (when (re-search-forward - "HTTP-EQUIV=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" nil t) - (let ((url (match-string 1))) - (delete-region (point-min) (point-max)) - (nnweb-insert url t)))) - (url-insert-file-contents url)) - (setq buffer-file-name name))) - (defun nnweb-parse-find (type parse &optional maxdepth) "Find the element of TYPE in PARSE." (catch 'found