X-Git-Url: http://cgit.sxemacs.org/?p=gnus;a=blobdiff_plain;f=lisp%2Fnnrss.el;h=f93d811068d11d9193567a7c2b317f1a16742456;hp=b99b90ba3cee05edd84eb59060cba7d858af97a0;hb=a82fb840dc1a4e4f343f7f3e0faa307e841f3bc2;hpb=05d4a5c7e7a591c7b4cc55f0bfff45d0da530bd7 diff --git a/lisp/nnrss.el b/lisp/nnrss.el index b99b90ba3..f93d81106 100644 --- a/lisp/nnrss.el +++ b/lisp/nnrss.el @@ -1,31 +1,34 @@ ;;; nnrss.el --- interfacing with RSS -;; Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. +;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, +;; 2008, 2009, 2010 Free Software Foundation, Inc. ;; Author: Shenghuo Zhu ;; Keywords: RSS ;; This file is part of GNU Emacs. -;; GNU Emacs is free software; you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published -;; by the Free Software Foundation; either version 2, or (at your -;; option) any later version. +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. -;; GNU Emacs is distributed in the hope that it will be useful, but -;; WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -;; General Public License for more details. +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License -;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -;; Boston, MA 02110-1301, USA. +;; along with GNU Emacs. If not, see . ;;; Commentary: ;;; Code: +;; For Emacs < 22.2. +(eval-and-compile + (unless (fboundp 'declare-function) (defmacro declare-function (&rest r)))) + (eval-when-compile (require 'cl)) (require 'gnus) @@ -49,6 +52,15 @@ (defvoo nnrss-directory (nnheader-concat gnus-directory "rss/") "Where nnrss will save its files.") +(defvoo nnrss-ignore-article-fields '(slash:comments) + "*List of fields that should be ignored when comparing RSS articles. +Some RSS feeds update article fields during their lives, e.g. to +indicate the number of comments or the number of times the +articles have been seen. However, if there is a difference +between the local article and the distant one, the latter is +considered to be new. To avoid this and discard some fields, set +this variable to the list of fields to be ignored.") + ;; (group max rss-url) (defvoo nnrss-server-data nil) @@ -82,14 +94,30 @@ ENTRY is the record of the current headline. GROUP is the group name. ARTICLE is the article number of the current headline.") (defvar nnrss-file-coding-system mm-universal-coding-system - "Coding system used when reading and writing files.") - -(defvar nnrss-compatible-encoding-alist '((iso-8859-1 . windows-1252)) + "*Coding system used when reading and writing files. +If you run Gnus with various versions of Emacsen, the value of this +variable should be the coding system that all those Emacsen support. +Note that you have to regenerate all the nnrss groups if you change +the value. Moreover, you should be patient even if you are made to +read the same articles twice, that arises for the difference of the +versions of xml.el.") + +(defvar nnrss-compatible-encoding-alist + (delq nil (mapcar (lambda (elem) + (if (and (mm-coding-system-p (car elem)) + (mm-coding-system-p (cdr elem))) + elem)) + mm-charset-override-alist)) "Alist of encodings and those supersets. The cdr of each element is used to decode data if it is available when -the car is what the data specify as the encoding. Or, the car is used +the car is what the data specify as the encoding. Or, the car is used for decoding when the cdr that the data specify is not available.") +(defvar nnrss-wash-html-in-text-plain-parts nil + "*Non-nil means render text in text/plain parts as HTML. +The function specified by the `mm-text-html-renderer' variable will be +used to render text. If it is nil, text will simply be folded.") + (nnoo-define-basics nnrss) ;;; Interface functions @@ -106,8 +134,7 @@ for decoding when the cdr that the data specify is not available.") (setq group (nnrss-decode-group-name group)) (nnrss-possibly-change-group group server) (let (e) - (save-excursion - (set-buffer nntp-server-buffer) + (with-current-buffer nntp-server-buffer (erase-buffer) (dolist (article articles) (if (setq e (assq article nnrss-group-data)) @@ -151,7 +178,7 @@ for decoding when the cdr that the data specify is not available.") "\n"))))) 'nov) -(deffoo nnrss-request-group (group &optional server dont-check) +(deffoo nnrss-request-group (group &optional server dont-check info) (setq group (nnrss-decode-group-name group)) (nnheader-message 6 "nnrss: Requesting %s..." group) (nnrss-possibly-change-group group server) @@ -169,6 +196,9 @@ for decoding when the cdr that the data specify is not available.") (deffoo nnrss-close-group (group &optional server) t) +(defvar mm-text-html-renderer) +(defvar mm-text-html-washer-alist) + (deffoo nnrss-request-article (article &optional group server buffer) (setq group (nnrss-decode-group-name group)) (when (stringp article) @@ -191,21 +221,16 @@ for decoding when the cdr that the data specify is not available.") (if (nth 5 e) (insert "Date: " (nnrss-format-string (nth 5 e)) "\n")) (let ((header (buffer-string)) - (text (if (nth 6 e) - (mapconcat 'identity - (delete "" (split-string (nth 6 e) "\n+")) - " "))) + (text (nth 6 e)) (link (nth 2 e)) (enclosure (nth 7 e)) (comments (nth 8 e)) - ;; Enable encoding of Newsgroups header in XEmacs. - (default-enable-multibyte-characters t) (rfc2047-header-encoding-alist (if (mm-coding-system-p 'utf-8) (cons '("Newsgroups" . utf-8) rfc2047-header-encoding-alist) rfc2047-header-encoding-alist)) - rfc2047-encode-encoded-words body) + rfc2047-encode-encoded-words body fn) (when (or text link enclosure comments) (insert "\n") (insert "<#multipart type=alternative>\n" @@ -214,23 +239,46 @@ for decoding when the cdr that the data specify is not available.") (when text (insert text) (goto-char body) - ;; See `nnrss-check-group', which inserts "

". - (if (search-forward "

" nil t) + (if (and nnrss-wash-html-in-text-plain-parts + (progn + (require 'mm-view) + (setq fn (or (cdr (assq mm-text-html-renderer + mm-text-html-washer-alist)) + mm-text-html-renderer)))) + (progn + (narrow-to-region body (point-max)) + (if (functionp fn) + (funcall fn) + (apply (car fn) (cdr fn))) + (widen) + (goto-char body) + (re-search-forward "[^\t\n ]" nil t) + (beginning-of-line) + (delete-region body (point)) + (goto-char (point-max)) + (skip-chars-backward "\t\n ") + (end-of-line) + (delete-region (point) (point-max)) + (insert "\n")) + (while (re-search-forward "\n+" nil t) + (replace-match " ")) + (goto-char body) + ;; See `nnrss-check-group', which inserts "

". + (when (search-forward "

" nil t) (if (eobp) (replace-match "\n") - (replace-match "\n\n") - (let ((fill-column default-fill-column) - (window (get-buffer-window nntp-server-buffer))) - (when window - (setq fill-column - (max 1 (/ (* (window-width window) 7) 8)))) - (fill-region (point) (point-max)) - (goto-char (point-max)) - ;; XEmacs version of `fill-region' inserts newline. - (unless (bolp) - (insert "\n")))) - (goto-char (point-max)) - (insert "\n")) + (replace-match "\n\n"))) + (unless (eobp) + (let ((fill-column (default-value 'fill-column)) + (window (get-buffer-window nntp-server-buffer))) + (when window + (setq fill-column + (max 1 (/ (* (window-width window) 7) 8)))) + (fill-region (point) (point-max)) + (goto-char (point-max)) + ;; XEmacs version of `fill-region' inserts newline. + (unless (bolp) + (insert "\n"))))) (when (or link enclosure) (insert "\n"))) (when link @@ -259,7 +307,11 @@ for decoding when the cdr that the data specify is not available.") "<#/part>\n" "<#/multipart>\n")) (condition-case nil - (mml-to-mime) + ;; Allow `mml-to-mime' to generate MIME article without + ;; making inquiry to a user for unknown encoding. + (let ((mml-confirmation-set + (cons 'unknown-encoding mml-confirmation-set))) + (mml-to-mime)) (error (erase-buffer) (insert header @@ -289,11 +341,6 @@ for decoding when the cdr that the data specify is not available.") ;; we return the article number. (cons nnrss-group (car e)))))) -(deffoo nnrss-request-list (&optional server) - (nnrss-possibly-change-group nil server) - (nnrss-generate-active) - t) - (deffoo nnrss-open-server (server &optional defs connectionless) (nnrss-read-server-data server) (nnoo-change-server 'nnrss server defs) @@ -330,19 +377,30 @@ for decoding when the cdr that the data specify is not available.") (delq (assoc group nnrss-server-data) nnrss-server-data)) (nnrss-save-server-data server) (ignore-errors - (delete-file (nnrss-make-filename group server))) + (let ((file-name-coding-system nnmail-pathname-coding-system)) + (delete-file (nnrss-make-filename group server)))) t) (deffoo nnrss-request-list-newsgroups (&optional server) (nnrss-possibly-change-group nil server) - (save-excursion - (set-buffer nntp-server-buffer) + (with-current-buffer nntp-server-buffer (erase-buffer) (dolist (elem nnrss-group-alist) (if (third elem) (insert (car elem) "\t" (third elem) "\n")))) t) +(deffoo nnrss-retrieve-groups (groups &optional server) + (nnrss-possibly-change-group nil server) + (dolist (group groups) + (nnrss-check-group group server)) + (with-current-buffer nntp-server-buffer + (erase-buffer) + (dolist (group groups) + (let ((elem (assoc group nnrss-server-data))) + (insert (format "%S %s 1 y\n" group (or (cadr elem) 0))))) + 'active)) + (nnoo-define-skeleton nnrss) ;;; Internal functions @@ -368,10 +426,12 @@ otherwise return nil." nnrss-compatible-encoding-alist))))) (mm-coding-system-p 'utf-8))) +(declare-function w3-parse-buffer "ext:w3-parse" (&optional buff)) + (defun nnrss-fetch (url &optional local) "Fetch URL and put it in a the expected Lisp structure." (mm-with-unibyte-buffer - ;;some CVS versions of url.el need this to close the connection quickly + ;;some versions of url.el need this to close the connection quickly (let (cs xmlform htmlform) ;; bit o' work necessary for w3 pre-cvs and post-cvs (if local @@ -380,12 +440,18 @@ otherwise return nil." ;; FIXME: shouldn't binding `coding-system-for-read' be moved ;; to `mm-url-insert'? (let ((coding-system-for-read 'binary)) - (mm-url-insert url))) + (condition-case err + (mm-url-insert url) + (error (if (or debug-on-quit debug-on-error) + (signal (car err) (cdr err)) + (message "nnrss: Failed to fetch %s" url)))))) (nnheader-remove-cr-followed-by-lf) ;; Decode text according to the encoding attribute. (when (setq cs (nnrss-get-encoding)) - (mm-decode-coding-region (point-min) (point-max) cs) - (mm-enable-multibyte)) + (insert (prog1 + (mm-decode-coding-string (buffer-string) cs) + (erase-buffer) + (mm-enable-multibyte)))) (goto-char (point-min)) ;; Because xml-parse-region can't deal with anything that isn't @@ -417,31 +483,88 @@ nnrss: %s: Not valid XML %s and w3-parse doesn't work %s" (nnrss-read-group-data group server) (setq nnrss-group group))) -(defvar nnrss-extra-categories '(nnrss-snarf-moreover-categories)) - -(defun nnrss-generate-active () - (when (y-or-n-p "Fetch extra categories? ") - (mapc 'funcall nnrss-extra-categories)) - (save-excursion - (set-buffer nntp-server-buffer) - (erase-buffer) - (dolist (elem nnrss-group-alist) - (insert (prin1-to-string (car elem)) " 0 1 y\n")) - (dolist (elem nnrss-server-data) - (unless (assoc (car elem) nnrss-group-alist) - (insert (prin1-to-string (car elem)) " 0 1 y\n"))))) +(autoload 'timezone-parse-date "timezone") + +(defun nnrss-normalize-date (date) + "Return a date string of DATE in the RFC822 style. +This function handles the ISO 8601 date format described in +URL `http://www.w3.org/TR/NOTE-datetime', and also the RFC822 style +which RSS 2.0 allows." + (let (case-fold-search vector year month day time zone cts given) + (cond ((null date)) ; do nothing for this case + ;; if the date is just digits (unix time stamp): + ((string-match "^[0-9]+$" date) + (setq given (seconds-to-time (string-to-number date)))) + ;; RFC822 + ((string-match " [0-9]+ " date) + (setq vector (timezone-parse-date date) + year (string-to-number (aref vector 0))) + (when (>= year 1969) + (setq month (string-to-number (aref vector 1)) + day (string-to-number (aref vector 2))) + (unless (>= (length (setq time (aref vector 3))) 3) + (setq time "00:00:00")) + (when (and (setq zone (aref vector 4)) + (not (string-match "\\`[A-Z+-]" zone))) + (setq zone nil)))) + ;; ISO 8601 + ((string-match + (eval-when-compile + (concat + ;; 1. year + "\\(199[0-9]\\|20[0-9][0-9]\\)" + "\\(?:-" + ;; 2. month + "\\([01][0-9]\\)" + "\\(?:-" + ;; 3. day + "\\([0-3][0-9]\\)" + "\\)?\\)?\\(?:T" + ;; 4. hh:mm + "\\([012][0-9]:[0-5][0-9]\\)" + "\\(?:" + ;; 5. :ss + "\\(:[0-5][0-9]\\)" + "\\(?:\\.[0-9]+\\)?\\)?\\)?" + ;; 6+7,8,9. zone + "\\(?:\\(?:\\([+-][012][0-9]\\):\\([0-5][0-9]\\)\\)" + "\\|\\([+-][012][0-9][0-5][0-9]\\)" + "\\|\\(Z\\)\\)?")) + date) + (setq year (string-to-number (match-string 1 date)) + month (string-to-number (or (match-string 2 date) "1")) + day (string-to-number (or (match-string 3 date) "1")) + time (if (match-beginning 5) + (substring date (match-beginning 4) (match-end 5)) + (concat (or (match-string 4 date) "00:00") ":00")) + zone (cond ((match-beginning 6) + (concat (match-string 6 date) + (match-string 7 date))) + ((match-beginning 9) ;; Z + "+0000") + (t ;; nil if zone is not provided. + (match-string 8 date)))))) + (if month + (progn + (setq cts (current-time-string (encode-time 0 0 0 day month year))) + (format "%s, %02d %s %04d %s%s" + (substring cts 0 3) day (substring cts 4 7) year time + (if zone + (concat " " zone) + ""))) + (message-make-date given)))) ;;; data functions (defun nnrss-read-server-data (server) (setq nnrss-server-data nil) - (let ((file (nnrss-make-filename "nnrss" server))) + (let ((file (nnrss-make-filename "nnrss" server)) + (file-name-coding-system nnmail-pathname-coding-system)) (when (file-exists-p file) ;; In Emacs 21.3 and earlier, `load' doesn't support non-ASCII ;; file names. So, we use `insert-file-contents' instead. (mm-with-multibyte-buffer - (let ((coding-system-for-read nnrss-file-coding-system) - (file-name-coding-system nnmail-pathname-coding-system)) + (let ((coding-system-for-read nnrss-file-coding-system)) (insert-file-contents file) (eval-region (point-min) (point-max))))))) @@ -464,17 +587,17 @@ nnrss: %s: Not valid XML %s and w3-parse doesn't work %s" (let ((pair (assoc group nnrss-server-data))) (setq nnrss-group-max (or (cadr pair) 0)) (setq nnrss-group-min (+ nnrss-group-max 1))) - (let ((file (nnrss-make-filename group server))) + (let ((file (nnrss-make-filename group server)) + (file-name-coding-system nnmail-pathname-coding-system)) (when (file-exists-p file) ;; In Emacs 21.3 and earlier, `load' doesn't support non-ASCII ;; file names. So, we use `insert-file-contents' instead. (mm-with-multibyte-buffer - (let ((coding-system-for-read nnrss-file-coding-system) - (file-name-coding-system nnmail-pathname-coding-system)) + (let ((coding-system-for-read nnrss-file-coding-system)) (insert-file-contents file) (eval-region (point-min) (point-max)))) (dolist (e nnrss-group-data) - (puthash (or (nth 2 e) (nth 6 e)) t nnrss-group-hashtb) + (puthash (nth 9 e) t nnrss-group-hashtb) (when (and (car e) (> nnrss-group-min (car e))) (setq nnrss-group-min (car e))) (when (and (car e) (< nnrss-group-max (car e))) @@ -516,7 +639,11 @@ nnrss: %s: Not valid XML %s and w3-parse doesn't work %s" (defun nnrss-insert-w3 (url) (mm-with-unibyte-current-buffer - (mm-url-insert url))) + (condition-case err + (mm-url-insert url) + (error (if (or debug-on-quit debug-on-error) + (signal (car err) (cdr err)) + (message "nnrss: Failed to fetch %s" url)))))) (defun nnrss-decode-entities-string (string) (if string @@ -545,14 +672,25 @@ nnrss: %s: Not valid XML %s and w3-parse doesn't work %s" (rfc2047-encode-region (point-min) (point-max))) (goto-char (point-min)) (while (search-forward "\n" nil t) - (delete-backward-char 1)) + (delete-char -1)) (buffer-string))) ;;; Snarf functions +(defun nnrss-make-hash-index (item) + (gnus-message 9 "nnrss: Making hash index of %s" (gnus-prin1-to-string item)) + (setq item (gnus-remove-if + (lambda (field) + (when (listp field) + (memq (car field) nnrss-ignore-article-fields))) + item)) + (md5 (gnus-prin1-to-string item) + nil nil + nnrss-file-coding-system)) (defun nnrss-check-group (group server) (let (file xml subject url extra changed author date feed-subject - enclosure comments rss-ns rdf-ns content-ns dc-ns) + enclosure comments rss-ns rdf-ns content-ns dc-ns + hash-index) (if (and nnrss-use-local (file-exists-p (setq file (expand-file-name (nnrss-translate-file-chars @@ -584,24 +722,21 @@ nnrss: %s: Not valid XML %s and w3-parse doesn't work %s" (dolist (item (nreverse (nnrss-find-el (intern (concat rss-ns "item")) xml))) (when (and (listp item) (string= (concat rss-ns "item") (car item)) - (if (setq url (nnrss-decode-entities-string - (nnrss-node-text rss-ns 'link (cddr item)))) - (not (gethash url nnrss-group-hashtb)) - (setq extra (or (nnrss-node-text content-ns 'encoded item) - (nnrss-node-text rss-ns 'description item))) - (not (gethash extra nnrss-group-hashtb)))) + (progn (setq hash-index (nnrss-make-hash-index item)) + (not (gethash hash-index nnrss-group-hashtb)))) (setq subject (nnrss-node-text rss-ns 'title item)) - (setq extra (or extra - (nnrss-node-text content-ns 'encoded item) + (setq url (nnrss-decode-entities-string + (nnrss-node-text rss-ns 'link (cddr item)))) + (setq extra (or (nnrss-node-text content-ns 'encoded item) (nnrss-node-text rss-ns 'description item))) (if (setq feed-subject (nnrss-node-text dc-ns 'subject item)) (setq extra (concat feed-subject "

" extra))) (setq author (or (nnrss-node-text rss-ns 'author item) (nnrss-node-text dc-ns 'creator item) (nnrss-node-text dc-ns 'contributor item))) - (setq date (or (nnrss-node-text dc-ns 'date item) - (nnrss-node-text rss-ns 'pubDate item) - (message-make-date))) + (setq date (nnrss-normalize-date + (or (nnrss-node-text dc-ns 'date item) + (nnrss-node-text rss-ns 'pubDate item)))) (setq comments (nnrss-node-text rss-ns 'comments item)) (when (setq enclosure (cadr (assq (intern (concat rss-ns "enclosure")) item))) (let ((url (cdr (assq 'url enclosure))) @@ -634,9 +769,10 @@ nnrss: %s: Not valid XML %s and w3-parse doesn't work %s" date (and extra (nnrss-decode-entities-string extra)) enclosure - comments) + comments + hash-index) nnrss-group-data) - (puthash (or url extra) t nnrss-group-hashtb) + (puthash hash-index t nnrss-group-hashtb) (setq changed t)) (setq extra nil)) (when changed @@ -647,18 +783,35 @@ nnrss: %s: Not valid XML %s and w3-parse doesn't work %s" (push (list group nnrss-group-max) nnrss-server-data))) (nnrss-save-server-data server)))) +(declare-function gnus-group-make-rss-group "gnus-group" (&optional url)) + (defun nnrss-opml-import (opml-file) "OPML subscriptions import. Read the file and attempt to subscribe to each Feed in the file." (interactive "fImport file: ") - (mapcar - (lambda (node) (gnus-group-make-rss-group - (cdr (assq 'xmlUrl (cadr node))))) + (mapc + (lambda (node) + (let ((xmlurl (cdr (assq 'xmlUrl (cadr node))))) + (when (and xmlurl + (not (string-match "\\`[\t ]*\\'" xmlurl)) + (prog1 + (y-or-n-p (format "Subscribe to %s " xmlurl)) + (message ""))) + (condition-case err + (progn + (gnus-group-make-rss-group xmlurl) + (forward-line 1)) + (error + (message + "Failed to subscribe to %s (%s); type any key to continue: " + xmlurl + (error-message-string err)) + (let ((echo-keystrokes 0)) + (read-char))))))) (nnrss-find-el 'outline - (progn - (find-file opml-file) - (xml-parse-region (point-min) - (point-max)))))) + (mm-with-multibyte-buffer + (insert-file-contents opml-file) + (xml-parse-region (point-min) (point-max)))))) (defun nnrss-opml-export () "OPML subscription export. @@ -705,33 +858,6 @@ It is useful when `(setq nnrss-use-local t)'." (append nnheader-file-name-translation-alist '((?' . ?_))))) (nnheader-translate-file-chars name))) -(defvar nnrss-moreover-url - "http://w.moreover.com/categories/category_list_rss.html" - "The url of moreover.com categories.") - -(defun nnrss-snarf-moreover-categories () - "Snarf RSS links from moreover.com." - (interactive) - (let (category name url changed) - (with-temp-buffer - (nnrss-insert nnrss-moreover-url) - (goto-char (point-min)) - (while (re-search-forward - "\\|