;;; nnrss.el --- interfacing with RSS
-;; Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+
+;; Copyright (C) 2001, 2002, 2003, 2004, 2005,
+;; 2006, 2007 Free Software Foundation, Inc.
;; Author: Shenghuo Zhu <zsh@cs.rochester.edu>
;; Keywords: RSS
;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published
-;; by the Free Software Foundation; either version 2, or (at your
+;; by the Free Software Foundation; either version 3, or (at your
;; option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful, but
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING. If not, write to the
-;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA.
+;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
;;; Commentary:
(defvoo nnrss-directory (nnheader-concat gnus-directory "rss/")
"Where nnrss will save its files.")
+(defvoo nnrss-ignore-article-fields '(slash:comments)
+ "*List of fields that should be ignored when comparing RSS articles.
+Some RSS feeds update article fields during their lives, e.g. to
+indicate the number of comments or the number of times the
+articles have been seen. However, if there is a difference
+between the local article and the distant one, the latter is
+considered to be new. To avoid this and discard some fields, set
+this variable to the list of fields to be ignored.")
+
;; (group max rss-url)
(defvoo nnrss-server-data nil)
ARTICLE is the article number of the current headline.")
(defvar nnrss-file-coding-system mm-universal-coding-system
- "Coding system used when reading and writing files.")
-
-(defvar nnrss-compatible-encoding-alist '((iso-8859-1 . windows-1252))
+ "*Coding system used when reading and writing files.
+If you run Gnus with various versions of Emacsen, the value of this
+variable should be the coding system that all those Emacsen support.
+Note that you have to regenerate all the nnrss groups if you change
+the value. Moreover, you should be patient even if you are made to
+read the same articles twice, that arises for the difference of the
+versions of xml.el.")
+
+(defvar nnrss-compatible-encoding-alist
+ (delq nil (mapcar (lambda (elem)
+ (if (and (mm-coding-system-p (car elem))
+ (mm-coding-system-p (cdr elem)))
+ elem))
+ mm-charset-override-alist))
"Alist of encodings and those supersets.
The cdr of each element is used to decode data if it is available when
-the car is what the data specify as the encoding. Or, the car is used
+the car is what the data specify as the encoding. Or, the car is used
for decoding when the cdr that the data specify is not available.")
+(defvar nnrss-wash-html-in-text-plain-parts nil
+ "*Non-nil means render text in text/plain parts as HTML.
+The function specified by the `mm-text-html-renderer' variable will be
+used to render text. If it is nil, text will simply be folded.")
+
(nnoo-define-basics nnrss)
;;; Interface functions
(deffoo nnrss-close-group (group &optional server)
t)
+(defvar mm-text-html-renderer)
+(defvar mm-text-html-washer-alist)
+
(deffoo nnrss-request-article (article &optional group server buffer)
(setq group (nnrss-decode-group-name group))
(when (stringp article)
(if (nth 5 e)
(insert "Date: " (nnrss-format-string (nth 5 e)) "\n"))
(let ((header (buffer-string))
- (text (if (nth 6 e)
- (mapconcat 'identity
- (delete "" (split-string (nth 6 e) "\n+"))
- " ")))
+ (text (nth 6 e))
(link (nth 2 e))
+ (enclosure (nth 7 e))
+ (comments (nth 8 e))
;; Enable encoding of Newsgroups header in XEmacs.
(default-enable-multibyte-characters t)
(rfc2047-header-encoding-alist
(cons '("Newsgroups" . utf-8)
rfc2047-header-encoding-alist)
rfc2047-header-encoding-alist))
- rfc2047-encode-encoded-words body)
- (when (or text link)
+ rfc2047-encode-encoded-words body fn)
+ (when (or text link enclosure comments)
(insert "\n")
(insert "<#multipart type=alternative>\n"
"<#part type=\"text/plain\">\n")
(setq body (point))
- (if text
- (progn
- (insert text "\n")
- (when link
- (insert "\n" link "\n")))
- (when link
- (insert link "\n")))
+ (when text
+ (insert text)
+ (goto-char body)
+ (if (and nnrss-wash-html-in-text-plain-parts
+ (progn
+ (require 'mm-view)
+ (setq fn (or (cdr (assq mm-text-html-renderer
+ mm-text-html-washer-alist))
+ mm-text-html-renderer))))
+ (progn
+ (narrow-to-region body (point-max))
+ (if (functionp fn)
+ (funcall fn)
+ (apply (car fn) (cdr fn)))
+ (widen)
+ (goto-char body)
+ (re-search-forward "[^\t\n ]" nil t)
+ (beginning-of-line)
+ (delete-region body (point))
+ (goto-char (point-max))
+ (skip-chars-backward "\t\n ")
+ (end-of-line)
+ (delete-region (point) (point-max))
+ (insert "\n"))
+ (while (re-search-forward "\n+" nil t)
+ (replace-match " "))
+ (goto-char body)
+ ;; See `nnrss-check-group', which inserts "<br /><br />".
+ (when (search-forward "<br /><br />" nil t)
+ (if (eobp)
+ (replace-match "\n")
+ (replace-match "\n\n")))
+ (unless (eobp)
+ (let ((fill-column default-fill-column)
+ (window (get-buffer-window nntp-server-buffer)))
+ (when window
+ (setq fill-column
+ (max 1 (/ (* (window-width window) 7) 8))))
+ (fill-region (point) (point-max))
+ (goto-char (point-max))
+ ;; XEmacs version of `fill-region' inserts newline.
+ (unless (bolp)
+ (insert "\n")))))
+ (when (or link enclosure)
+ (insert "\n")))
+ (when link
+ (insert link "\n"))
+ (when enclosure
+ (insert (car enclosure) " "
+ (nth 2 enclosure) " "
+ (nth 3 enclosure) "\n"))
+ (when comments
+ (insert comments "\n"))
(setq body (buffer-substring body (point)))
(insert "<#/part>\n"
"<#part type=\"text/html\">\n"
(insert text "\n"))
(when link
(insert "<p><a href=\"" link "\">link</a></p>\n"))
+ (when enclosure
+ (insert "<p><a href=\"" (car enclosure) "\">"
+ (cadr enclosure) "</a> " (nth 2 enclosure)
+ " " (nth 3 enclosure) "</p>\n"))
+ (when comments
+ (insert "<p><a href=\"" comments "\">comments</a></p>\n"))
(insert "</body></html>\n"
"<#/part>\n"
"<#/multipart>\n"))
(delq (assoc group nnrss-server-data) nnrss-server-data))
(nnrss-save-server-data server)
(ignore-errors
- (delete-file (nnrss-make-filename group server)))
+ (let ((file-name-coding-system nnmail-pathname-coding-system))
+ (delete-file (nnrss-make-filename group server))))
t)
(deffoo nnrss-request-list-newsgroups (&optional server)
;; FIXME: shouldn't binding `coding-system-for-read' be moved
;; to `mm-url-insert'?
(let ((coding-system-for-read 'binary))
- (mm-url-insert url)))
+ (condition-case err
+ (mm-url-insert url)
+ (error (if (or debug-on-quit debug-on-error)
+ (signal (car err) (cdr err))
+ (message "nnrss: Failed to fetch %s" url))))))
(nnheader-remove-cr-followed-by-lf)
;; Decode text according to the encoding attribute.
(when (setq cs (nnrss-get-encoding))
- (mm-decode-coding-region (point-min) (point-max) cs)
- (mm-enable-multibyte))
+ (insert (prog1
+ (mm-decode-coding-string (buffer-string) cs)
+ (erase-buffer)
+ (mm-enable-multibyte))))
(goto-char (point-min))
;; Because xml-parse-region can't deal with anything that isn't
(unless (assoc (car elem) nnrss-group-alist)
(insert (prin1-to-string (car elem)) " 0 1 y\n")))))
+(eval-and-compile (autoload 'timezone-parse-date "timezone"))
+
+(defun nnrss-normalize-date (date)
+ "Return a date string of DATE in the RFC822 style.
+This function handles the ISO 8601 date format described in
+<URL:http://www.w3.org/TR/NOTE-datetime>, and also the RFC822 style
+which RSS 2.0 allows."
+ (let (case-fold-search vector year month day time zone cts)
+ (cond ((null date))
+ ;; RFC822
+ ((string-match " [0-9]+ " date)
+ (setq vector (timezone-parse-date date)
+ year (string-to-number (aref vector 0)))
+ (when (>= year 1969)
+ (setq month (string-to-number (aref vector 1))
+ day (string-to-number (aref vector 2)))
+ (unless (>= (length (setq time (aref vector 3))) 3)
+ (setq time "00:00:00"))
+ (when (and (setq zone (aref vector 4))
+ (not (string-match "\\`[A-Z+-]" zone)))
+ (setq zone nil))))
+ ;; ISO 8601
+ ((string-match
+ (eval-when-compile
+ (concat
+ ;; 1. year
+ "\\(199[0-9]\\|20[0-9][0-9]\\)"
+ "\\(?:-"
+ ;; 2. month
+ "\\([01][0-9]\\)"
+ "\\(?:-"
+ ;; 3. day
+ "\\([0-3][0-9]\\)"
+ "\\)?\\)?\\(?:T"
+ ;; 4. hh:mm
+ "\\([012][0-9]:[0-5][0-9]\\)"
+ "\\(?:"
+ ;; 5. :ss
+ "\\(:[0-5][0-9]\\)"
+ "\\(?:\\.[0-9]+\\)?\\)?\\)?"
+ ;; 6+7,8,9. zone
+ "\\(?:\\(?:\\([+-][012][0-9]\\):\\([0-5][0-9]\\)\\)"
+ "\\|\\([+-][012][0-9][0-5][0-9]\\)"
+ "\\|\\(Z\\)\\)?"))
+ date)
+ (setq year (string-to-number (match-string 1 date))
+ month (string-to-number (or (match-string 2 date) "1"))
+ day (string-to-number (or (match-string 3 date) "1"))
+ time (if (match-beginning 5)
+ (substring date (match-beginning 4) (match-end 5))
+ (concat (or (match-string 4 date) "00:00") ":00"))
+ zone (cond ((match-beginning 6)
+ (concat (match-string 6 date)
+ (match-string 7 date)))
+ ((match-beginning 9) ;; Z
+ "+0000")
+ (t ;; nil if zone is not provided.
+ (match-string 8 date))))))
+ (if month
+ (progn
+ (setq cts (current-time-string (encode-time 0 0 0 day month year)))
+ (format "%s, %02d %s %04d %s%s"
+ (substring cts 0 3) day (substring cts 4 7) year time
+ (if zone
+ (concat " " zone)
+ "")))
+ (message-make-date))))
+
;;; data functions
(defun nnrss-read-server-data (server)
(setq nnrss-server-data nil)
- (let ((file (nnrss-make-filename "nnrss" server)))
+ (let ((file (nnrss-make-filename "nnrss" server))
+ (file-name-coding-system nnmail-pathname-coding-system))
(when (file-exists-p file)
;; In Emacs 21.3 and earlier, `load' doesn't support non-ASCII
;; file names. So, we use `insert-file-contents' instead.
(mm-with-multibyte-buffer
- (let ((coding-system-for-read nnrss-file-coding-system)
- (file-name-coding-system nnmail-pathname-coding-system))
+ (let ((coding-system-for-read nnrss-file-coding-system))
(insert-file-contents file)
(eval-region (point-min) (point-max)))))))
(let ((pair (assoc group nnrss-server-data)))
(setq nnrss-group-max (or (cadr pair) 0))
(setq nnrss-group-min (+ nnrss-group-max 1)))
- (let ((file (nnrss-make-filename group server)))
+ (let ((file (nnrss-make-filename group server))
+ (file-name-coding-system nnmail-pathname-coding-system))
(when (file-exists-p file)
;; In Emacs 21.3 and earlier, `load' doesn't support non-ASCII
;; file names. So, we use `insert-file-contents' instead.
(mm-with-multibyte-buffer
- (let ((coding-system-for-read nnrss-file-coding-system)
- (file-name-coding-system nnmail-pathname-coding-system))
+ (let ((coding-system-for-read nnrss-file-coding-system))
(insert-file-contents file)
(eval-region (point-min) (point-max))))
(dolist (e nnrss-group-data)
- (puthash (or (nth 2 e) (nth 6 e)) t nnrss-group-hashtb)
+ (puthash (nth 9 e) t nnrss-group-hashtb)
(when (and (car e) (> nnrss-group-min (car e)))
(setq nnrss-group-min (car e)))
(when (and (car e) (< nnrss-group-max (car e)))
(defun nnrss-insert-w3 (url)
(mm-with-unibyte-current-buffer
- (mm-url-insert url)))
+ (condition-case err
+ (mm-url-insert url)
+ (error (if (or debug-on-quit debug-on-error)
+ (signal (car err) (cdr err))
+ (message "nnrss: Failed to fetch %s" url))))))
(defun nnrss-decode-entities-string (string)
(if string
;;; Snarf functions
+(defun nnrss-make-hash-index (item)
+ (setq item (gnus-remove-if
+ (lambda (field)
+ (when (listp field)
+ (memq (car field) nnrss-ignore-article-fields)))
+ item))
+ (md5 (gnus-prin1-to-string item)
+ nil nil
+ nnrss-file-coding-system))
+
(defun nnrss-check-group (group server)
- (let (file xml subject url extra changed author
- date rss-ns rdf-ns content-ns dc-ns)
+ (let (file xml subject url extra changed author date feed-subject
+ enclosure comments rss-ns rdf-ns content-ns dc-ns
+ hash-index)
(if (and nnrss-use-local
(file-exists-p (setq file (expand-file-name
(nnrss-translate-file-chars
(dolist (item (nreverse (nnrss-find-el (intern (concat rss-ns "item")) xml)))
(when (and (listp item)
(string= (concat rss-ns "item") (car item))
- (if (setq url (nnrss-decode-entities-string
- (nnrss-node-text rss-ns 'link (cddr item))))
- (not (gethash url nnrss-group-hashtb))
- (setq extra (or (nnrss-node-text content-ns 'encoded item)
- (nnrss-node-text rss-ns 'description item)))
- (not (gethash extra nnrss-group-hashtb))))
+ (progn (setq hash-index (nnrss-make-hash-index item))
+ (not (gethash hash-index nnrss-group-hashtb))))
(setq subject (nnrss-node-text rss-ns 'title item))
- (setq extra (or extra
- (nnrss-node-text content-ns 'encoded item)
+ (setq url (nnrss-decode-entities-string
+ (nnrss-node-text rss-ns 'link (cddr item))))
+ (setq extra (or (nnrss-node-text content-ns 'encoded item)
(nnrss-node-text rss-ns 'description item)))
+ (if (setq feed-subject (nnrss-node-text dc-ns 'subject item))
+ (setq extra (concat feed-subject "<br /><br />" extra)))
(setq author (or (nnrss-node-text rss-ns 'author item)
(nnrss-node-text dc-ns 'creator item)
(nnrss-node-text dc-ns 'contributor item)))
- (setq date (or (nnrss-node-text dc-ns 'date item)
- (nnrss-node-text rss-ns 'pubDate item)
- (message-make-date)))
+ (setq date (nnrss-normalize-date
+ (or (nnrss-node-text dc-ns 'date item)
+ (nnrss-node-text rss-ns 'pubDate item))))
+ (setq comments (nnrss-node-text rss-ns 'comments item))
+ (when (setq enclosure (cadr (assq (intern (concat rss-ns "enclosure")) item)))
+ (let ((url (cdr (assq 'url enclosure)))
+ (len (cdr (assq 'length enclosure)))
+ (type (cdr (assq 'type enclosure)))
+ (name))
+ (setq len
+ (if (and len (integerp (setq len (string-to-number len))))
+ ;; actually already in `ls-lisp-format-file-size' but
+ ;; probably not worth to require it for one function
+ (do ((size (/ len 1.0) (/ size 1024.0))
+ (post-fixes (list "" "k" "M" "G" "T" "P" "E")
+ (cdr post-fixes)))
+ ((< size 1024)
+ (format "%.1f%s" size (car post-fixes))))
+ "0"))
+ (setq url (or url ""))
+ (setq name (if (string-match "/\\([^/]*\\)$" url)
+ (match-string 1 url)
+ "file"))
+ (setq type (or type ""))
+ (setq enclosure (list url name len type))))
(push
(list
(incf nnrss-group-max)
(and subject (nnrss-mime-encode-string subject))
(and author (nnrss-mime-encode-string author))
date
- (and extra (nnrss-decode-entities-string extra)))
+ (and extra (nnrss-decode-entities-string extra))
+ enclosure
+ comments
+ hash-index)
nnrss-group-data)
- (puthash (or url extra) t nnrss-group-hashtb)
+ (puthash hash-index t nnrss-group-hashtb)
(setq changed t))
(setq extra nil))
(when changed
"OPML subscriptions import.
Read the file and attempt to subscribe to each Feed in the file."
(interactive "fImport file: ")
- (mapcar
- (lambda (node) (gnus-group-make-rss-group
- (cdr (assq 'xmlUrl (cadr node)))))
+ (mapc
+ (lambda (node)
+ (let ((xmlurl (cdr (assq 'xmlUrl (cadr node)))))
+ (when (and xmlurl
+ (not (string-match "\\`[\t ]*\\'" xmlurl))
+ (prog1
+ (y-or-n-p (format "Subscribe to %s " xmlurl))
+ (message "")))
+ (condition-case err
+ (progn
+ (gnus-group-make-rss-group xmlurl)
+ (forward-line 1))
+ (error
+ (message
+ "Failed to subscribe to %s (%s); type any key to continue: "
+ xmlurl
+ (error-message-string err))
+ (let ((echo-keystrokes 0))
+ (read-char)))))))
(nnrss-find-el 'outline
- (progn
- (find-file opml-file)
- (xml-parse-region (point-min)
- (point-max))))))
+ (mm-with-multibyte-buffer
+ (insert-file-contents opml-file)
+ (xml-parse-region (point-min) (point-max))))))
(defun nnrss-opml-export ()
"OPML subscription export.
" <ownerName>" (user-full-name) "</ownerName>\n"
" </head>\n"
" <body>\n")
- (mapc (lambda (sub)
- (insert " <outline text=\"" (car sub) "\" xmlUrl=\""
- (cadr sub) "\"/>\n"))
- nnrss-group-alist)
+ (dolist (sub nnrss-group-alist)
+ (insert " <outline text=\"" (car sub)
+ "\" xmlUrl=\"" (cadr sub) "\"/>\n"))
(insert " </body>\n"
"</opml>\n"))
(pop-to-buffer "*OPML Export*")
(text (if (and node (listp node))
(nnrss-node-just-text node)
node))
- (cleaned-text (if text (gnus-replace-in-string
- text "^[\000-\037\177]+\\|^ +\\| +$" ""))))
+ (cleaned-text (if text
+ (gnus-replace-in-string
+ (gnus-replace-in-string
+ text "^[\000-\037\177]+\\|^ +\\| +$" "")
+ "\r\n" "\n"))))
(if (string-equal "" cleaned-text)
nil
cleaned-text)))
"Find the all matching elements in the data.
Careful with this on large documents!"
(when (consp data)
- (mapc (lambda (bit)
- (when (car-safe bit)
- (when (equal tag (car bit))
- ;; Old xml.el may return a list of string.
- (when (and (consp (caddr bit))
- (stringp (caaddr bit)))
- (setcar (cddr bit) (caaddr bit)))
- (setq found-list
- (append found-list
- (list bit))))
- (if (and (consp (car-safe (caddr bit)))
- (not (stringp (caddr bit))))
- (setq found-list
- (append found-list
- (nnrss-find-el
- tag (caddr bit))))
- (setq found-list
- (append found-list
- (nnrss-find-el
- tag (cddr bit)))))))
- data))
+ (dolist (bit data)
+ (when (car-safe bit)
+ (when (equal tag (car bit))
+ ;; Old xml.el may return a list of string.
+ (when (and (consp (caddr bit))
+ (stringp (caaddr bit)))
+ (setcar (cddr bit) (caaddr bit)))
+ (setq found-list
+ (append found-list
+ (list bit))))
+ (if (and (consp (car-safe (caddr bit)))
+ (not (stringp (caddr bit))))
+ (setq found-list
+ (append found-list
+ (nnrss-find-el
+ tag (caddr bit))))
+ (setq found-list
+ (append found-list
+ (nnrss-find-el
+ tag (cddr bit))))))))
found-list)
(defun nnrss-rsslink-p (el)
rss-onsite-in rdf-onsite-in xml-onsite-in
rss-offsite-end rdf-offsite-end xml-offsite-end
rss-offsite-in rdf-offsite-in xml-offsite-in)
- (mapc (lambda (href)
- (if (not (null href))
- (cond ((string-match "\\.rss$" href)
- (nnrss-match-macro
- base-uri href rss-onsite-end rss-offsite-end))
- ((string-match "\\.rdf$" href)
- (nnrss-match-macro
- base-uri href rdf-onsite-end rdf-offsite-end))
- ((string-match "\\.xml$" href)
- (nnrss-match-macro
- base-uri href xml-onsite-end xml-offsite-end))
- ((string-match "rss" href)
- (nnrss-match-macro
- base-uri href rss-onsite-in rss-offsite-in))
- ((string-match "rdf" href)
- (nnrss-match-macro
- base-uri href rdf-onsite-in rdf-offsite-in))
- ((string-match "xml" href)
- (nnrss-match-macro
- base-uri href xml-onsite-in xml-offsite-in)))))
- hrefs)
+ (dolist (href hrefs)
+ (cond ((null href))
+ ((string-match "\\.rss$" href)
+ (nnrss-match-macro
+ base-uri href rss-onsite-end rss-offsite-end))
+ ((string-match "\\.rdf$" href)
+ (nnrss-match-macro
+ base-uri href rdf-onsite-end rdf-offsite-end))
+ ((string-match "\\.xml$" href)
+ (nnrss-match-macro
+ base-uri href xml-onsite-end xml-offsite-end))
+ ((string-match "rss" href)
+ (nnrss-match-macro
+ base-uri href rss-onsite-in rss-offsite-in))
+ ((string-match "rdf" href)
+ (nnrss-match-macro
+ base-uri href rdf-onsite-in rdf-offsite-in))
+ ((string-match "xml" href)
+ (nnrss-match-macro
+ base-uri href xml-onsite-in xml-offsite-in))))
(append
rss-onsite-end rdf-onsite-end xml-onsite-end
rss-onsite-in rdf-onsite-in xml-onsite-in
(selection
(mapcar (lambda (listinfo)
(cons (cdr (assoc "sitename" listinfo))
- (string-to-int
+ (string-to-number
(cdr (assoc "feedid" listinfo)))))
feedinfo)))
(cdr (assoc