;;; nnrss.el --- interfacing with RSS
-;; Copyright (C) 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+
+;; Copyright (C) 2001, 2002, 2003, 2004, 2005,
+;; 2006 Free Software Foundation, Inc.
;; Author: Shenghuo Zhu <zsh@cs.rochester.edu>
;; Keywords: RSS
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING. If not, write to the
-;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA.
+;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
;;; Commentary:
(require 'time-date)
(require 'rfc2231)
(require 'mm-url)
+(require 'rfc2047)
+(require 'mml)
(eval-when-compile
(ignore-errors
- (require 'xml)))
+ (require 'xml)))
(eval '(require 'xml))
(nnoo-declare nnrss)
ENTRY is the record of the current headline. GROUP is the group name.
ARTICLE is the article number of the current headline.")
+(defvar nnrss-file-coding-system mm-universal-coding-system
+ "Coding system used when reading and writing files.")
+
+(defvar nnrss-compatible-encoding-alist '((iso-8859-1 . windows-1252))
+ "Alist of encodings and those supersets.
+The cdr of each element is used to decode data if it is available when
+the car is what the data specify as the encoding. Or, the car is used
+for decoding when the cdr that the data specify is not available.")
+
+(defvar nnrss-wash-html-in-text-plain-parts nil
+ "*Non-nil means render text in text/plain parts as HTML.
+The function specified by the `mm-text-html-renderer' variable will be
+used to render text. If it is nil, text will simply be folded.")
+
(nnoo-define-basics nnrss)
;;; Interface functions
+(defsubst nnrss-format-string (string)
+ (gnus-replace-in-string string " *\n *" " "))
+
+(defun nnrss-decode-group-name (group)
+ (if (and group (mm-coding-system-p 'utf-8))
+ (setq group (mm-decode-coding-string group 'utf-8))
+ group))
+
(deffoo nnrss-retrieve-headers (articles &optional group server fetch-old)
+ (setq group (nnrss-decode-group-name group))
(nnrss-possibly-change-group group server)
(let (e)
(save-excursion
(dolist (article articles)
(if (setq e (assq article nnrss-group-data))
(insert (number-to-string (car e)) "\t" ;; number
- (if (nth 3 e)
- (nnrss-format-string (nth 3 e)) "")
- "\t" ;; subject
- (if (nth 4 e)
- (nnrss-format-string (nth 4 e))
- "(nobody)")
- "\t" ;;from
+ ;; subject
+ (or (nth 3 e) "")
+ "\t"
+ ;; from
+ (or (nth 4 e) "(nobody)")
+ "\t"
+ ;; date
(or (nth 5 e) "")
- "\t" ;; date
+ "\t"
+ ;; id
(format "<%d@%s.nnrss>" (car e) group)
- "\t" ;; id
- "\t" ;; refs
- "-1" "\t" ;; chars
- "-1" "\t" ;; lines
- "" "\t" ;; Xref
+ "\t"
+ ;; refs
+ "\t"
+ ;; chars
+ "-1" "\t"
+ ;; lines
+ "-1" "\t"
+ ;; Xref
+ "" "\t"
(if (and (nth 6 e)
(memq nnrss-description-field
nnmail-extra-headers))
'nov)
(deffoo nnrss-request-group (group &optional server dont-check)
+ (setq group (nnrss-decode-group-name group))
+ (nnheader-message 6 "nnrss: Requesting %s..." group)
(nnrss-possibly-change-group group server)
- (if dont-check
- t
- (nnrss-check-group group server)
- (nnheader-report 'nnrss "Opened group %s" group)
- (nnheader-insert
- "211 %d %d %d %s\n" nnrss-group-max nnrss-group-min nnrss-group-max
- (prin1-to-string group)
- t)))
+ (prog1
+ (if dont-check
+ t
+ (nnrss-check-group group server)
+ (nnheader-report 'nnrss "Opened group %s" group)
+ (nnheader-insert
+ "211 %d %d %d %s\n" nnrss-group-max nnrss-group-min nnrss-group-max
+ (prin1-to-string group)
+ t))
+ (nnheader-message 6 "nnrss: Requesting %s...done" group)))
(deffoo nnrss-close-group (group &optional server)
t)
+(eval-when-compile
+ (defvar mm-text-html-renderer)
+ (defvar mm-text-html-washer-alist))
+
(deffoo nnrss-request-article (article &optional group server buffer)
+ (setq group (nnrss-decode-group-name group))
+ (when (stringp article)
+ (setq article (if (string-match "\\`<\\([0-9]+\\)@" article)
+ (string-to-number (match-string 1 article))
+ 0)))
(nnrss-possibly-change-group group server)
(let ((e (assq article nnrss-group-data))
- (boundary "=-=-=-=-=-=-=-=-=-")
(nntp-server-buffer (or buffer nntp-server-buffer))
post err)
(when e
- (catch 'error
- (with-current-buffer nntp-server-buffer
- (erase-buffer)
- (goto-char (point-min))
- (insert "Mime-Version: 1.0\nContent-Type: multipart/alternative; boundary=\"" boundary "\"\n")
- (if group
- (insert "Newsgroups: " group "\n"))
- (if (nth 3 e)
- (insert "Subject: " (nnrss-format-string (nth 3 e)) "\n"))
- (if (nth 4 e)
- (insert "From: " (nnrss-format-string (nth 4 e)) "\n"))
- (if (nth 5 e)
- (insert "Date: " (nnrss-format-string (nth 5 e)) "\n"))
- (insert "Message-ID: " (format "<%d@%s.nnrss>" (car e) group) "\n")
- (insert "\n")
- (let ((text (if (nth 6 e)
- (nnrss-string-as-multibyte (nth 6 e))))
- (link (if (nth 2 e)
- (nth 2 e))))
- (insert "\n\n--" boundary "\nContent-Type: text/plain\n\n")
- (let ((point (point)))
- (when text
- (insert text)
- (goto-char point)
- (while (re-search-forward "\n" nil t)
- (replace-match " "))
- (goto-char (point-max))
- (insert "\n\n"))
- (when link
- (insert link)))
- (insert "\n\n--" boundary "\nContent-Type: text/html\n\n")
- (let ((point (point)))
- (when text
- (insert "<html><head></head><body>\n" text "\n</body></html>")
- (goto-char point)
- (while (re-search-forward "\n" nil t)
+ (with-current-buffer nntp-server-buffer
+ (erase-buffer)
+ (if group
+ (insert "Newsgroups: " group "\n"))
+ (if (nth 3 e)
+ (insert "Subject: " (nth 3 e) "\n"))
+ (if (nth 4 e)
+ (insert "From: " (nth 4 e) "\n"))
+ (if (nth 5 e)
+ (insert "Date: " (nnrss-format-string (nth 5 e)) "\n"))
+ (let ((header (buffer-string))
+ (text (nth 6 e))
+ (link (nth 2 e))
+ (enclosure (nth 7 e))
+ (comments (nth 8 e))
+ ;; Enable encoding of Newsgroups header in XEmacs.
+ (default-enable-multibyte-characters t)
+ (rfc2047-header-encoding-alist
+ (if (mm-coding-system-p 'utf-8)
+ (cons '("Newsgroups" . utf-8)
+ rfc2047-header-encoding-alist)
+ rfc2047-header-encoding-alist))
+ rfc2047-encode-encoded-words body fn)
+ (when (or text link enclosure comments)
+ (insert "\n")
+ (insert "<#multipart type=alternative>\n"
+ "<#part type=\"text/plain\">\n")
+ (setq body (point))
+ (when text
+ (insert text)
+ (goto-char body)
+ (if (and nnrss-wash-html-in-text-plain-parts
+ (progn
+ (require 'mm-view)
+ (setq fn (or (cdr (assq mm-text-html-renderer
+ mm-text-html-washer-alist))
+ mm-text-html-renderer))))
+ (progn
+ (narrow-to-region body (point-max))
+ (if (functionp fn)
+ (funcall fn)
+ (apply (car fn) (cdr fn)))
+ (widen)
+ (goto-char body)
+ (re-search-forward "[^\t\n ]" nil t)
+ (beginning-of-line)
+ (delete-region body (point))
+ (goto-char (point-max))
+ (skip-chars-backward "\t\n ")
+ (end-of-line)
+ (delete-region (point) (point-max))
+ (insert "\n"))
+ (while (re-search-forward "\n+" nil t)
(replace-match " "))
- (goto-char (point-max))
- (insert "\n\n"))
- (when link
- (insert "<p><a href=\"" link "\">link</a></p>\n"))))
- (when nnrss-content-function
- (funcall nnrss-content-function e group article)))))
+ (goto-char body)
+ ;; See `nnrss-check-group', which inserts "<br /><br />".
+ (when (search-forward "<br /><br />" nil t)
+ (if (eobp)
+ (replace-match "\n")
+ (replace-match "\n\n")))
+ (unless (eobp)
+ (let ((fill-column default-fill-column)
+ (window (get-buffer-window nntp-server-buffer)))
+ (when window
+ (setq fill-column
+ (max 1 (/ (* (window-width window) 7) 8))))
+ (fill-region (point) (point-max))
+ (goto-char (point-max))
+ ;; XEmacs version of `fill-region' inserts newline.
+ (unless (bolp)
+ (insert "\n")))))
+ (when (or link enclosure)
+ (insert "\n")))
+ (when link
+ (insert link "\n"))
+ (when enclosure
+ (insert (car enclosure) " "
+ (nth 2 enclosure) " "
+ (nth 3 enclosure) "\n"))
+ (when comments
+ (insert comments "\n"))
+ (setq body (buffer-substring body (point)))
+ (insert "<#/part>\n"
+ "<#part type=\"text/html\">\n"
+ "<html><head></head><body>\n")
+ (when text
+ (insert text "\n"))
+ (when link
+ (insert "<p><a href=\"" link "\">link</a></p>\n"))
+ (when enclosure
+ (insert "<p><a href=\"" (car enclosure) "\">"
+ (cadr enclosure) "</a> " (nth 2 enclosure)
+ " " (nth 3 enclosure) "</p>\n"))
+ (when comments
+ (insert "<p><a href=\"" comments "\">comments</a></p>\n"))
+ (insert "</body></html>\n"
+ "<#/part>\n"
+ "<#/multipart>\n"))
+ (condition-case nil
+ (mml-to-mime)
+ (error
+ (erase-buffer)
+ (insert header
+ "Content-Type: text/plain; charset=gnus-decoded\n"
+ "Content-Transfer-Encoding: 8bit\n\n"
+ body)
+ (nnheader-message
+ 3 "Warning - there might be invalid characters"))))
+ (goto-char (point-min))
+ (search-forward "\n\n")
+ (forward-line -1)
+ (insert (format "Message-ID: <%d@%s.nnrss>\n"
+ (car e)
+ (let ((rfc2047-encoding-type 'mime)
+ rfc2047-encode-max-chars)
+ (rfc2047-encode-string
+ (gnus-replace-in-string group "[\t\n ]+" "_")))))
+ (when nnrss-content-function
+ (funcall nnrss-content-function e group article))))
(cond
(err
(nnheader-report 'nnrss err))
(deffoo nnrss-request-expire-articles
(articles group &optional server force)
+ (setq group (nnrss-decode-group-name group))
(nnrss-possibly-change-group group server)
(let (e days not-expirable changed)
(dolist (art articles)
not-expirable))
(deffoo nnrss-request-delete-group (group &optional force server)
+ (setq group (nnrss-decode-group-name group))
(nnrss-possibly-change-group group server)
+ (let (elem)
+ ;; There may be two or more entries in `nnrss-group-alist' since
+ ;; this function didn't delete them formerly.
+ (while (setq elem (assoc group nnrss-group-alist))
+ (setq nnrss-group-alist (delq elem nnrss-group-alist))))
(setq nnrss-server-data
(delq (assoc group nnrss-server-data) nnrss-server-data))
(nnrss-save-server-data server)
(ignore-errors
- (delete-file (nnrss-make-filename group server)))
+ (delete-file (nnrss-make-filename group server)))
t)
(deffoo nnrss-request-list-newsgroups (&optional server)
;;; Internal functions
(eval-when-compile (defun xml-rpc-method-call (&rest args)))
+
+(defun nnrss-get-encoding ()
+ "Return an encoding attribute specified in the current xml contents.
+If `nnrss-compatible-encoding-alist' specifies the compatible encoding,
+it is used instead. If the xml contents doesn't specify the encoding,
+return `utf-8' which is the default encoding for xml if it is available,
+otherwise return nil."
+ (goto-char (point-min))
+ (if (re-search-forward
+ "<\\?[^>]*encoding=\\(?:\"\\([^\">]+\\)\"\\|'\\([^'>]+\\)'\\)"
+ nil t)
+ (let ((encoding (intern (downcase (or (match-string 1)
+ (match-string 2))))))
+ (or
+ (mm-coding-system-p (cdr (assq encoding
+ nnrss-compatible-encoding-alist)))
+ (mm-coding-system-p encoding)
+ (mm-coding-system-p (car (rassq encoding
+ nnrss-compatible-encoding-alist)))))
+ (mm-coding-system-p 'utf-8)))
+
(defun nnrss-fetch (url &optional local)
"Fetch URL and put it in a the expected Lisp structure."
- (with-temp-buffer
- ;some CVS versions of url.el need this to close the connection quickly
- (let* (xmlform htmlform)
+ (mm-with-unibyte-buffer
+ ;;some CVS versions of url.el need this to close the connection quickly
+ (let (cs xmlform htmlform)
;; bit o' work necessary for w3 pre-cvs and post-cvs
(if local
(let ((coding-system-for-read 'binary))
(insert-file-contents url))
- (mm-url-insert url))
-
-;; Because xml-parse-region can't deal with anything that isn't
-;; xml and w3-parse-buffer can't deal with some xml, we have to
-;; parse with xml-parse-region first and, if that fails, parse
-;; with w3-parse-buffer. Yuck. Eventually, someone should find out
-;; why w3-parse-buffer fails to parse some well-formed xml and
-;; fix it.
+ ;; FIXME: shouldn't binding `coding-system-for-read' be moved
+ ;; to `mm-url-insert'?
+ (let ((coding-system-for-read 'binary))
+ (condition-case err
+ (mm-url-insert url)
+ (error (if (or debug-on-quit debug-on-error)
+ (signal (car err) (cdr err))
+ (message "nnrss: Failed to fetch %s" url))))))
+ (nnheader-remove-cr-followed-by-lf)
+ ;; Decode text according to the encoding attribute.
+ (when (setq cs (nnrss-get-encoding))
+ (mm-decode-coding-region (point-min) (point-max) cs)
+ (mm-enable-multibyte))
+ (goto-char (point-min))
- (condition-case err
- (setq xmlform (xml-parse-region (point-min) (point-max)))
- (error (if (fboundp 'w3-parse-buffer)
- (setq htmlform (caddar (w3-parse-buffer
- (current-buffer))))
- (message "nnrss: Not valid XML and w3 parse not available (%s)"
- url))))
- (if htmlform
- htmlform
- xmlform))))
+ ;; Because xml-parse-region can't deal with anything that isn't
+ ;; xml and w3-parse-buffer can't deal with some xml, we have to
+ ;; parse with xml-parse-region first and, if that fails, parse
+ ;; with w3-parse-buffer. Yuck. Eventually, someone should find out
+ ;; why w3-parse-buffer fails to parse some well-formed xml and
+ ;; fix it.
+
+ (condition-case err1
+ (setq xmlform (xml-parse-region (point-min) (point-max)))
+ (error
+ (condition-case err2
+ (setq htmlform (caddar (w3-parse-buffer
+ (current-buffer))))
+ (error
+ (message "\
+nnrss: %s: Not valid XML %s and w3-parse doesn't work %s"
+ url err1 err2)))))
+ (if htmlform
+ htmlform
+ xmlform))))
(defun nnrss-possibly-change-group (&optional group server)
(when (and server