* message.el (message-user-organization-file): Check several
[gnus] / lisp / nnrss.el
index 0662862..f405687 100644 (file)
@@ -1,5 +1,7 @@
 ;;; nnrss.el --- interfacing with RSS
-;; Copyright (C) 2001, 2002, 2003, 2004  Free Software Foundation, Inc.
+
+;; Copyright (C) 2001, 2002, 2003, 2004, 2005,
+;;   2006 Free Software Foundation, Inc.
 
 ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu>
 ;; Keywords: RSS
@@ -18,8 +20,8 @@
 
 ;; You should have received a copy of the GNU General Public License
 ;; along with GNU Emacs; see the file COPYING.  If not, write to the
-;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA.
+;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
 
 ;;; Commentary:
 
 (require 'time-date)
 (require 'rfc2231)
 (require 'mm-url)
+(require 'rfc2047)
+(require 'mml)
 (eval-when-compile
   (ignore-errors
-    (require 'xml)))
+   (require 'xml)))
 (eval '(require 'xml))
 
 (nnoo-declare nnrss)
@@ -78,11 +82,34 @@ The arguments are (ENTRY GROUP ARTICLE).
 ENTRY is the record of the current headline.  GROUP is the group name.
 ARTICLE is the article number of the current headline.")
 
+(defvar nnrss-file-coding-system mm-universal-coding-system
+  "Coding system used when reading and writing files.")
+
+(defvar nnrss-compatible-encoding-alist '((iso-8859-1 . windows-1252))
+  "Alist of encodings and those supersets.
+The cdr of each element is used to decode data if it is available when
+the car is what the data specify as the encoding.  Or, the car is used
+for decoding when the cdr that the data specify is not available.")
+
+(defvar nnrss-wash-html-in-text-plain-parts nil
+  "*Non-nil means render text in text/plain parts as HTML.
+The function specified by the `mm-text-html-renderer' variable will be
+used to render text.  If it is nil, text will simply be folded.")
+
 (nnoo-define-basics nnrss)
 
 ;;; Interface functions
 
+(defsubst nnrss-format-string (string)
+  (gnus-replace-in-string string " *\n *" " "))
+
+(defun nnrss-decode-group-name (group)
+  (if (and group (mm-coding-system-p 'utf-8))
+      (setq group (mm-decode-coding-string group 'utf-8))
+    group))
+
 (deffoo nnrss-retrieve-headers (articles &optional group server fetch-old)
+  (setq group (nnrss-decode-group-name group))
   (nnrss-possibly-change-group group server)
   (let (e)
     (save-excursion
@@ -91,21 +118,26 @@ ARTICLE is the article number of the current headline.")
       (dolist (article articles)
        (if (setq e (assq article nnrss-group-data))
            (insert (number-to-string (car e)) "\t" ;; number
-                   (if (nth 3 e)
-                       (nnrss-format-string (nth 3 e)) "")
-                   "\t" ;; subject
-                   (if (nth 4 e)
-                       (nnrss-format-string (nth 4 e))
-                     "(nobody)")
-                   "\t" ;;from
+                   ;; subject
+                   (or (nth 3 e) "")
+                   "\t"
+                   ;; from
+                   (or (nth 4 e) "(nobody)")
+                   "\t"
+                   ;; date
                    (or (nth 5 e) "")
-                   "\t" ;; date
+                   "\t"
+                   ;; id
                    (format "<%d@%s.nnrss>" (car e) group)
-                   "\t" ;; id
-                   "\t" ;; refs
-                   "-1" "\t" ;; chars
-                   "-1" "\t" ;; lines
-                   "" "\t" ;; Xref
+                   "\t"
+                   ;; refs
+                   "\t"
+                   ;; chars
+                   "-1" "\t"
+                   ;; lines
+                   "-1" "\t"
+                   ;; Xref
+                   "" "\t"
                    (if (and (nth 6 e)
                             (memq nnrss-description-field
                                   nnmail-extra-headers))
@@ -126,69 +158,157 @@ ARTICLE is the article number of the current headline.")
   'nov)
 
 (deffoo nnrss-request-group (group &optional server dont-check)
+  (setq group (nnrss-decode-group-name group))
+  (nnheader-message 6 "nnrss: Requesting %s..." group)
   (nnrss-possibly-change-group group server)
-  (if dont-check
-      t
-    (nnrss-check-group group server)
-    (nnheader-report 'nnrss "Opened group %s" group)
-    (nnheader-insert
-     "211 %d %d %d %s\n" nnrss-group-max nnrss-group-min nnrss-group-max
-     (prin1-to-string group)
-     t)))
+  (prog1
+      (if dont-check
+         t
+       (nnrss-check-group group server)
+       (nnheader-report 'nnrss "Opened group %s" group)
+       (nnheader-insert
+        "211 %d %d %d %s\n" nnrss-group-max nnrss-group-min nnrss-group-max
+        (prin1-to-string group)
+        t))
+    (nnheader-message 6 "nnrss: Requesting %s...done" group)))
 
 (deffoo nnrss-close-group (group &optional server)
   t)
 
+(eval-when-compile
+  (defvar mm-text-html-renderer)
+  (defvar mm-text-html-washer-alist))
+
 (deffoo nnrss-request-article (article &optional group server buffer)
+  (setq group (nnrss-decode-group-name group))
+  (when (stringp article)
+    (setq article (if (string-match "\\`<\\([0-9]+\\)@" article)
+                     (string-to-number (match-string 1 article))
+                   0)))
   (nnrss-possibly-change-group group server)
   (let ((e (assq article nnrss-group-data))
-       (boundary "=-=-=-=-=-=-=-=-=-")
        (nntp-server-buffer (or buffer nntp-server-buffer))
        post err)
     (when e
-      (catch 'error
-       (with-current-buffer nntp-server-buffer
-         (erase-buffer)
-         (goto-char (point-min))
-         (insert "Mime-Version: 1.0\nContent-Type: multipart/alternative; boundary=\"" boundary "\"\n")
-         (if group
-             (insert "Newsgroups: " group "\n"))
-         (if (nth 3 e)
-             (insert "Subject: " (nnrss-format-string (nth 3 e)) "\n"))
-         (if (nth 4 e)
-             (insert "From: " (nnrss-format-string (nth 4 e)) "\n"))
-         (if (nth 5 e)
-             (insert "Date: " (nnrss-format-string (nth 5 e)) "\n"))
-         (insert "Message-ID: " (format "<%d@%s.nnrss>" (car e) group) "\n")
-         (insert "\n")
-         (let ((text (if (nth 6 e)
-                         (nnrss-string-as-multibyte (nth 6 e))))
-               (link (if (nth 2 e)
-                         (nth 2 e))))
-           (insert "\n\n--" boundary "\nContent-Type: text/plain\n\n")
-           (let ((point (point)))
-             (when text
-               (insert text)
-               (goto-char point)
-               (while (re-search-forward "\n" nil t)
-                 (replace-match " "))
-               (goto-char (point-max))
-               (insert "\n\n"))
-             (when link
-               (insert link)))
-           (insert "\n\n--" boundary "\nContent-Type: text/html\n\n")
-           (let ((point (point)))
-             (when text
-               (insert "<html><head></head><body>\n" text "\n</body></html>")
-               (goto-char point)
-               (while (re-search-forward "\n" nil t)
+      (with-current-buffer nntp-server-buffer
+       (erase-buffer)
+       (if group
+           (insert "Newsgroups: " group "\n"))
+       (if (nth 3 e)
+           (insert "Subject: " (nth 3 e) "\n"))
+       (if (nth 4 e)
+           (insert "From: " (nth 4 e) "\n"))
+       (if (nth 5 e)
+           (insert "Date: " (nnrss-format-string (nth 5 e)) "\n"))
+       (let ((header (buffer-string))
+             (text (nth 6 e))
+             (link (nth 2 e))
+             (enclosure (nth 7 e))
+             (comments (nth 8 e))
+             ;; Enable encoding of Newsgroups header in XEmacs.
+             (default-enable-multibyte-characters t)
+             (rfc2047-header-encoding-alist
+              (if (mm-coding-system-p 'utf-8)
+                  (cons '("Newsgroups" . utf-8)
+                        rfc2047-header-encoding-alist)
+                rfc2047-header-encoding-alist))
+             rfc2047-encode-encoded-words body fn)
+         (when (or text link enclosure comments)
+           (insert "\n")
+           (insert "<#multipart type=alternative>\n"
+                   "<#part type=\"text/plain\">\n")
+           (setq body (point))
+           (when text
+             (insert text)
+             (goto-char body)
+             (if (and nnrss-wash-html-in-text-plain-parts
+                      (progn
+                        (require 'mm-view)
+                        (setq fn (or (cdr (assq mm-text-html-renderer
+                                                mm-text-html-washer-alist))
+                                     mm-text-html-renderer))))
+                 (progn
+                   (narrow-to-region body (point-max))
+                   (if (functionp fn)
+                       (funcall fn)
+                     (apply (car fn) (cdr fn)))
+                   (widen)
+                   (goto-char body)
+                   (re-search-forward "[^\t\n ]" nil t)
+                   (beginning-of-line)
+                   (delete-region body (point))
+                   (goto-char (point-max))
+                   (skip-chars-backward "\t\n ")
+                   (end-of-line)
+                   (delete-region (point) (point-max))
+                   (insert "\n"))
+               (while (re-search-forward "\n+" nil t)
                  (replace-match " "))
-               (goto-char (point-max))
-               (insert "\n\n"))
-             (when link
-               (insert "<p><a href=\"" link "\">link</a></p>\n"))))
-         (when nnrss-content-function
-           (funcall nnrss-content-function e group article)))))
+               (goto-char body)
+               ;; See `nnrss-check-group', which inserts "<br /><br />".
+               (when (search-forward "<br /><br />" nil t)
+                 (if (eobp)
+                     (replace-match "\n")
+                   (replace-match "\n\n")))
+               (unless (eobp)
+                 (let ((fill-column default-fill-column)
+                       (window (get-buffer-window nntp-server-buffer)))
+                   (when window
+                     (setq fill-column
+                           (max 1 (/ (* (window-width window) 7) 8))))
+                   (fill-region (point) (point-max))
+                   (goto-char (point-max))
+                   ;; XEmacs version of `fill-region' inserts newline.
+                   (unless (bolp)
+                     (insert "\n")))))
+             (when (or link enclosure)
+               (insert "\n")))
+           (when link
+             (insert link "\n"))
+           (when enclosure
+             (insert (car enclosure) " "
+                     (nth 2 enclosure) " "
+                     (nth 3 enclosure) "\n"))
+           (when comments
+             (insert comments "\n"))
+           (setq body (buffer-substring body (point)))
+           (insert "<#/part>\n"
+                   "<#part type=\"text/html\">\n"
+                   "<html><head></head><body>\n")
+           (when text
+             (insert text "\n"))
+           (when link
+             (insert "<p><a href=\"" link "\">link</a></p>\n"))
+           (when enclosure
+             (insert "<p><a href=\"" (car enclosure) "\">"
+                     (cadr enclosure) "</a> " (nth 2 enclosure)
+                     " " (nth 3 enclosure) "</p>\n"))
+           (when comments
+             (insert "<p><a href=\"" comments "\">comments</a></p>\n"))
+           (insert "</body></html>\n"
+                   "<#/part>\n"
+                   "<#/multipart>\n"))
+         (condition-case nil
+             (mml-to-mime)
+           (error
+            (erase-buffer)
+            (insert header
+                    "Content-Type: text/plain; charset=gnus-decoded\n"
+                    "Content-Transfer-Encoding: 8bit\n\n"
+                    body)
+            (nnheader-message
+             3 "Warning - there might be invalid characters"))))
+       (goto-char (point-min))
+       (search-forward "\n\n")
+       (forward-line -1)
+       (insert (format "Message-ID: <%d@%s.nnrss>\n"
+                       (car e)
+                       (let ((rfc2047-encoding-type 'mime)
+                             rfc2047-encode-max-chars)
+                         (rfc2047-encode-string
+                          (gnus-replace-in-string group "[\t\n ]+" "_")))))
+       (when nnrss-content-function
+         (funcall nnrss-content-function e group article))))
     (cond
      (err
       (nnheader-report 'nnrss err))
@@ -211,6 +331,7 @@ ARTICLE is the article number of the current headline.")
 
 (deffoo nnrss-request-expire-articles
     (articles group &optional server force)
+  (setq group (nnrss-decode-group-name group))
   (nnrss-possibly-change-group group server)
   (let (e days not-expirable changed)
     (dolist (art articles)
@@ -228,12 +349,18 @@ ARTICLE is the article number of the current headline.")
     not-expirable))
 
 (deffoo nnrss-request-delete-group (group &optional force server)
+  (setq group (nnrss-decode-group-name group))
   (nnrss-possibly-change-group group server)
+  (let (elem)
+    ;; There may be two or more entries in `nnrss-group-alist' since
+    ;; this function didn't delete them formerly.
+    (while (setq elem (assoc group nnrss-group-alist))
+      (setq nnrss-group-alist (delq elem nnrss-group-alist))))
   (setq nnrss-server-data
        (delq (assoc group nnrss-server-data) nnrss-server-data))
   (nnrss-save-server-data server)
   (ignore-errors
-    (delete-file (nnrss-make-filename group server)))
+   (delete-file (nnrss-make-filename group server)))
   t)
 
 (deffoo nnrss-request-list-newsgroups (&optional server)
@@ -250,34 +377,71 @@ ARTICLE is the article number of the current headline.")
 
 ;;; Internal functions
 (eval-when-compile (defun xml-rpc-method-call (&rest args)))
+
+(defun nnrss-get-encoding ()
+  "Return an encoding attribute specified in the current xml contents.
+If `nnrss-compatible-encoding-alist' specifies the compatible encoding,
+it is used instead.  If the xml contents doesn't specify the encoding,
+return `utf-8' which is the default encoding for xml if it is available,
+otherwise return nil."
+  (goto-char (point-min))
+  (if (re-search-forward
+       "<\\?[^>]*encoding=\\(?:\"\\([^\">]+\\)\"\\|'\\([^'>]+\\)'\\)"
+       nil t)
+      (let ((encoding (intern (downcase (or (match-string 1)
+                                           (match-string 2))))))
+       (or
+        (mm-coding-system-p (cdr (assq encoding
+                                       nnrss-compatible-encoding-alist)))
+        (mm-coding-system-p encoding)
+        (mm-coding-system-p (car (rassq encoding
+                                        nnrss-compatible-encoding-alist)))))
+    (mm-coding-system-p 'utf-8)))
+
 (defun nnrss-fetch (url &optional local)
   "Fetch URL and put it in a the expected Lisp structure."
-  (with-temp-buffer
-  ;some CVS versions of url.el need this to close the connection quickly
-    (let* (xmlform htmlform)
+  (mm-with-unibyte-buffer
+    ;;some CVS versions of url.el need this to close the connection quickly
+    (let (cs xmlform htmlform)
       ;; bit o' work necessary for w3 pre-cvs and post-cvs
       (if local
          (let ((coding-system-for-read 'binary))
            (insert-file-contents url))
-       (mm-url-insert url))
-
-;; Because xml-parse-region can't deal with anything that isn't
-;; xml and w3-parse-buffer can't deal with some xml, we have to
-;; parse with xml-parse-region first and, if that fails, parse
-;; with w3-parse-buffer.  Yuck.  Eventually, someone should find out
-;; why w3-parse-buffer fails to parse some well-formed xml and
-;; fix it.
+       ;; FIXME: shouldn't binding `coding-system-for-read' be moved
+       ;; to `mm-url-insert'?
+       (let ((coding-system-for-read 'binary))
+         (condition-case err
+             (mm-url-insert url)
+           (error (if (or debug-on-quit debug-on-error)
+                      (signal (car err) (cdr err))
+                    (message "nnrss: Failed to fetch %s" url))))))
+      (nnheader-remove-cr-followed-by-lf)
+      ;; Decode text according to the encoding attribute.
+      (when (setq cs (nnrss-get-encoding))
+       (mm-decode-coding-region (point-min) (point-max) cs)
+       (mm-enable-multibyte))
+      (goto-char (point-min))
 
-    (condition-case err
-       (setq xmlform (xml-parse-region (point-min) (point-max)))
-      (error (if (fboundp 'w3-parse-buffer)
-                (setq htmlform (caddar (w3-parse-buffer
-                                        (current-buffer))))
-              (message "nnrss: Not valid XML and w3 parse not available (%s)"
-                       url))))
-    (if htmlform
-       htmlform
-      xmlform))))
+      ;; Because xml-parse-region can't deal with anything that isn't
+      ;; xml and w3-parse-buffer can't deal with some xml, we have to
+      ;; parse with xml-parse-region first and, if that fails, parse
+      ;; with w3-parse-buffer.  Yuck.  Eventually, someone should find out
+      ;; why w3-parse-buffer fails to parse some well-formed xml and
+      ;; fix it.
+
+      (condition-case err1
+         (setq xmlform (xml-parse-region (point-min) (point-max)))
+       (error
+        (condition-case err2
+            (setq htmlform (caddar (w3-parse-buffer
+                                    (current-buffer))))
+          (error
+           (message "\
+nnrss: %s: Not valid XML %s and w3-parse doesn't work %s"
+                    url err1 err2)))))
+      (if htmlform
+         htmlform
+       xmlform))))
 
 (defun nnrss-possibly-change-group (&optional group server)
   (when (and server
@@