(mm-codepage-setup): New helper function.
[gnus] / lisp / nnrss.el
index 6e89786..2bb6e5f 100644 (file)
@@ -1,5 +1,6 @@
 ;;; nnrss.el --- interfacing with RSS
-;; Copyright (C) 2001, 2002, 2003, 2004, 2005  Free Software Foundation, Inc.
+
+;; Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
 
 ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu>
 ;; Keywords: RSS
@@ -18,8 +19,8 @@
 
 ;; You should have received a copy of the GNU General Public License
 ;; along with GNU Emacs; see the file COPYING.  If not, write to the
-;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA.
+;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
 
 ;;; Commentary:
 
@@ -40,7 +41,7 @@
 (require 'mml)
 (eval-when-compile
   (ignore-errors
-    (require 'xml)))
+   (require 'xml)))
 (eval '(require 'xml))
 
 (nnoo-declare nnrss)
@@ -66,9 +67,6 @@
 
 (defvar nnrss-use-local nil)
 
-(defvar nnrss-verbose t
-  "Write messages when requesting group.")
-
 (defvar nnrss-description-field 'X-Gnus-Description
   "Field name used for DESCRIPTION.
 To use the description in headers, put this name into `nnmail-extra-headers'.")
@@ -86,6 +84,12 @@ ARTICLE is the article number of the current headline.")
 (defvar nnrss-file-coding-system mm-universal-coding-system
   "Coding system used when reading and writing files.")
 
+(defvar nnrss-compatible-encoding-alist '((iso-8859-1 . windows-1252))
+  "Alist of encodings and those supersets.
+The cdr of each element is used to decode data if it is available when
+the car is what the data specify as the encoding. Or, the car is used
+for decoding when the cdr that the data specify is not available.")
+
 (nnoo-define-basics nnrss)
 
 ;;; Interface functions
@@ -148,9 +152,8 @@ ARTICLE is the article number of the current headline.")
   'nov)
 
 (deffoo nnrss-request-group (group &optional server dont-check)
-  (if nnrss-verbose
-      (message (concat "nnrss requesting " group "...")))
   (setq group (nnrss-decode-group-name group))
+  (nnheader-message 6 "nnrss: Requesting %s..." group)
   (nnrss-possibly-change-group group server)
   (prog1
       (if dont-check
@@ -161,8 +164,7 @@ ARTICLE is the article number of the current headline.")
         "211 %d %d %d %s\n" nnrss-group-max nnrss-group-min nnrss-group-max
         (prin1-to-string group)
         t))
-    (when nnrss-verbose
-      (message (concat "nnrss done with " group ".")))))
+    (nnheader-message 6 "nnrss: Requesting %s...done" group)))
 
 (deffoo nnrss-close-group (group &optional server)
   t)
@@ -178,63 +180,87 @@ ARTICLE is the article number of the current headline.")
        (nntp-server-buffer (or buffer nntp-server-buffer))
        post err)
     (when e
-      (catch 'error
-       (with-current-buffer nntp-server-buffer
-         (erase-buffer)
-         (if group
-             (insert "Newsgroups: " group "\n"))
-         (if (nth 3 e)
-             (insert "Subject: " (nth 3 e) "\n"))
-         (if (nth 4 e)
-             (insert "From: " (nth 4 e) "\n"))
-         (if (nth 5 e)
-             (insert "Date: " (nnrss-format-string (nth 5 e)) "\n"))
-         (insert "\n")
-         (let ((text (if (nth 6 e)
-                         (mapconcat 'identity
-                                    (delete "" (split-string (nth 6 e) "\n+"))
-                                    " ")))
-               (link (nth 2 e))
-               ;; Enable encoding of Newsgroups header in XEmacs.
-               (default-enable-multibyte-characters t)
-               (rfc2047-header-encoding-alist
-                (if (mm-coding-system-p 'utf-8)
-                    (cons '("Newsgroups" . utf-8)
-                          rfc2047-header-encoding-alist)
-                  rfc2047-header-encoding-alist))
-               rfc2047-encode-encoded-words)
-           (when (or text link)
-             (insert "<#multipart type=alternative>\n"
-                     "<#part type=\"text/plain\">\n")
-             (if text
-                 (progn
-                   (insert text "\n")
-                   (when link
-                     (insert "\n" link "\n")))
-               (when link
-                 (insert link "\n")))
-             (insert "<#/part>\n"
-                     "<#part type=\"text/html\">\n"
-                     "<html><head></head><body>\n")
-             (when text
-               (insert text "\n"))
-             (when link
-               (insert "<p><a href=\"" link "\">link</a></p>\n"))
-             (insert "</body></html>\n"
-                     "<#/part>\n"
-                     "<#/multipart>\n")
-             (mml-to-mime)))
-         (goto-char (point-min))
-         (search-forward "\n\n")
-         (forward-line -1)
-         (insert (format "Message-ID: <%d@%s.nnrss>\n"
-                         (car e)
-                         (let ((rfc2047-encoding-type 'mime)
-                               rfc2047-encode-max-chars)
-                           (rfc2047-encode-string
-                            (gnus-replace-in-string group "[\t\n ]+" "_")))))
-         (when nnrss-content-function
-           (funcall nnrss-content-function e group article)))))
+      (with-current-buffer nntp-server-buffer
+       (erase-buffer)
+       (if group
+           (insert "Newsgroups: " group "\n"))
+       (if (nth 3 e)
+           (insert "Subject: " (nth 3 e) "\n"))
+       (if (nth 4 e)
+           (insert "From: " (nth 4 e) "\n"))
+       (if (nth 5 e)
+           (insert "Date: " (nnrss-format-string (nth 5 e)) "\n"))
+       (let ((header (buffer-string))
+             (text (if (nth 6 e)
+                       (mapconcat 'identity
+                                  (delete "" (split-string (nth 6 e) "\n+"))
+                                  " ")))
+             (link (nth 2 e))
+             (enclosure (nth 7 e))
+             (comments (nth 8 e))
+             ;; Enable encoding of Newsgroups header in XEmacs.
+             (default-enable-multibyte-characters t)
+             (rfc2047-header-encoding-alist
+              (if (mm-coding-system-p 'utf-8)
+                  (cons '("Newsgroups" . utf-8)
+                        rfc2047-header-encoding-alist)
+                rfc2047-header-encoding-alist))
+             rfc2047-encode-encoded-words body)
+         (when (or text link enclosure comments)
+           (insert "\n")
+           (insert "<#multipart type=alternative>\n"
+                   "<#part type=\"text/plain\">\n")
+           (setq body (point))
+           (when text
+             (insert text "\n")
+             (when (or link enclosure)
+               (insert "\n")))
+           (when link
+             (insert link "\n"))
+           (when enclosure
+             (insert (car enclosure) " "
+                     (nth 2 enclosure) " "
+                     (nth 3 enclosure) "\n"))
+           (when comments
+             (insert comments "\n"))
+           (setq body (buffer-substring body (point)))
+           (insert "<#/part>\n"
+                   "<#part type=\"text/html\">\n"
+                   "<html><head></head><body>\n")
+           (when text
+             (insert text "\n"))
+           (when link
+             (insert "<p><a href=\"" link "\">link</a></p>\n"))
+           (when enclosure
+             (insert "<p><a href=\"" (car enclosure) "\">"
+                     (cadr enclosure) "</a> " (nth 2 enclosure)
+                     " " (nth 3 enclosure) "</p>\n"))
+           (when comments
+             (insert "<p><a href=\"" comments "\">comments</a></p>\n"))
+           (insert "</body></html>\n"
+                   "<#/part>\n"
+                   "<#/multipart>\n"))
+         (condition-case nil
+             (mml-to-mime)
+           (error
+            (erase-buffer)
+            (insert header
+                    "Content-Type: text/plain; charset=gnus-decoded\n"
+                    "Content-Transfer-Encoding: 8bit\n\n"
+                    body)
+            (nnheader-message
+             3 "Warning - there might be invalid characters"))))
+       (goto-char (point-min))
+       (search-forward "\n\n")
+       (forward-line -1)
+       (insert (format "Message-ID: <%d@%s.nnrss>\n"
+                       (car e)
+                       (let ((rfc2047-encoding-type 'mime)
+                             rfc2047-encode-max-chars)
+                         (rfc2047-encode-string
+                          (gnus-replace-in-string group "[\t\n ]+" "_")))))
+       (when nnrss-content-function
+         (funcall nnrss-content-function e group article))))
     (cond
      (err
       (nnheader-report 'nnrss err))
@@ -286,7 +312,7 @@ ARTICLE is the article number of the current headline.")
        (delq (assoc group nnrss-server-data) nnrss-server-data))
   (nnrss-save-server-data server)
   (ignore-errors
-    (delete-file (nnrss-make-filename group server)))
+   (delete-file (nnrss-make-filename group server)))
   t)
 
 (deffoo nnrss-request-list-newsgroups (&optional server)
@@ -305,16 +331,24 @@ ARTICLE is the article number of the current headline.")
 (eval-when-compile (defun xml-rpc-method-call (&rest args)))
 
 (defun nnrss-get-encoding ()
-  "Return an encoding attribute specified in the current xml contents."
+  "Return an encoding attribute specified in the current xml contents.
+If `nnrss-compatible-encoding-alist' specifies the compatible encoding,
+it is used instead.  If the xml contents doesn't specify the encoding,
+return `utf-8' which is the default encoding for xml if it is available,
+otherwise return nil."
   (goto-char (point-min))
-  (mm-coding-system-p
-   (if (re-search-forward
-       "<\\?[^>]*encoding=\\(?:\"\\([^\">]+\\)\"\\|'\\([^'>]+\\)'\\)"
-       nil t)
-       (intern-soft (downcase (or (match-string-no-properties 1)
-                                 (match-string-no-properties 2))))
-     ;; The default encoding for xml.
-     'utf-8)))
+  (if (re-search-forward
+       "<\\?[^>]*encoding=\\(?:\"\\([^\">]+\\)\"\\|'\\([^'>]+\\)'\\)"
+       nil t)
+      (let ((encoding (intern (downcase (or (match-string 1)
+                                           (match-string 2))))))
+       (or
+        (mm-coding-system-p (cdr (assq encoding
+                                       nnrss-compatible-encoding-alist)))
+        (mm-coding-system-p encoding)
+        (mm-coding-system-p (car (rassq encoding
+                                        nnrss-compatible-encoding-alist)))))
+    (mm-coding-system-p 'utf-8)))
 
 (defun nnrss-fetch (url &optional local)
   "Fetch URL and put it in a the expected Lisp structure."
@@ -499,8 +533,8 @@ nnrss: %s: Not valid XML %s and w3-parse doesn't work %s"
 ;;; Snarf functions
 
 (defun nnrss-check-group (group server)
-  (let (file xml subject url extra changed author
-            date rss-ns rdf-ns content-ns dc-ns)
+  (let (file xml subject url extra changed author date
+            enclosure comments rss-ns rdf-ns content-ns dc-ns)
     (if (and nnrss-use-local
             (file-exists-p (setq file (expand-file-name
                                        (nnrss-translate-file-chars
@@ -511,11 +545,11 @@ nnrss: %s: Not valid XML %s and w3-parse doesn't work %s"
                    (second (assoc group nnrss-group-alist))))
       (unless url
        (setq url
-             (cdr
-              (assoc 'href
-                     (nnrss-discover-feed
-                      (read-string
-                       (format "URL to search for %s: " group) "http://")))))
+             (cdr
+              (assoc 'href
+                     (nnrss-discover-feed
+                      (read-string
+                       (format "URL to search for %s: " group) "http://")))))
        (let ((pair (assoc group nnrss-server-data)))
          (if pair
              (setcdr (cdr pair) (list url))
@@ -548,6 +582,28 @@ nnrss: %s: Not valid XML %s and w3-parse doesn't work %s"
        (setq date (or (nnrss-node-text dc-ns 'date item)
                       (nnrss-node-text rss-ns 'pubDate item)
                       (message-make-date)))
+       (setq comments (nnrss-node-text rss-ns 'comments item))
+       (when (setq enclosure (cadr (assq (intern (concat rss-ns "enclosure")) item)))
+         (let ((url (cdr (assq 'url enclosure)))
+               (len (cdr (assq 'length enclosure)))
+               (type (cdr (assq 'type enclosure)))
+               (name))
+           (setq len
+                 (if (and len (integerp (setq len (string-to-number len))))
+                     ;; actually already in `ls-lisp-format-file-size' but
+                     ;; probably not worth to require it for one function
+                     (do ((size (/ len 1.0) (/ size 1024.0))
+                          (post-fixes (list "" "k" "M" "G" "T" "P" "E")
+                                      (cdr post-fixes)))
+                         ((< size 1024)
+                          (format "%.1f%s" size (car post-fixes))))
+                   "0"))
+           (setq url (or url ""))
+           (setq name (if (string-match "/\\([^/]*\\)$" url)
+                          (match-string 1 url)
+                        "file"))
+           (setq type (or type ""))
+           (setq enclosure (list url name len type))))
        (push
         (list
          (incf nnrss-group-max)
@@ -556,11 +612,13 @@ nnrss: %s: Not valid XML %s and w3-parse doesn't work %s"
          (and subject (nnrss-mime-encode-string subject))
          (and author (nnrss-mime-encode-string author))
          date
-         (and extra (nnrss-decode-entities-string extra)))
+         (and extra (nnrss-decode-entities-string extra))
+         enclosure
+         comments)
         nnrss-group-data)
        (puthash (or url extra) t nnrss-group-hashtb)
        (setq changed t))
-       (setq extra nil))
+      (setq extra nil))
     (when changed
       (nnrss-save-group-data group server)
       (let ((pair (assoc group nnrss-server-data)))
@@ -575,12 +633,12 @@ Read the file and attempt to subscribe to each Feed in the file."
   (interactive "fImport file: ")
   (mapcar
    (lambda (node) (gnus-group-make-rss-group
-                   (cdr (assq 'xmlUrl (cadr node)))))
+                  (cdr (assq 'xmlUrl (cadr node)))))
    (nnrss-find-el 'outline
-                  (progn
-                    (find-file opml-file)
-                    (xml-parse-region (point-min)
-                                      (point-max))))))
+                 (progn
+                   (find-file opml-file)
+                   (xml-parse-region (point-min)
+                                     (point-max))))))
 
 (defun nnrss-opml-export ()
   "OPML subscription export.
@@ -588,26 +646,22 @@ Export subscriptions to a buffer in OPML Format."
   (interactive)
   (with-current-buffer (get-buffer-create "*OPML Export*")
     (mm-set-buffer-file-coding-system 'utf-8)
-    (insert (concat
-            "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
-            "<!-- OPML generated by Emacs Gnus' nnrss.el -->\n"
-            "<opml version=\"1.1\">\n"
-            "  <head>\n"
-            "    <title>mySubscriptions</title>\n"
-            "    <dateCreated>" (format-time-string "%a, %d %b %Y %T %z")
-            "</dateCreated>\n"
-            "    <ownerEmail>" user-mail-address "</ownerEmail>\n"
-            "    <ownerName>" (user-full-name) "</ownerName>\n"
-            "  </head>\n"
-            "  <body>\n"))
-    (mapc (lambda (sub)
-           (insert (concat
-                    "    <outline text=\"" (car sub) "\" xmlUrl=\""
-                    (cadr sub) "\"/>\n")))
-         nnrss-group-alist)
-    (insert (concat
-            "  </body>\n"
-           "</opml>\n")))
+    (insert "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
+           "<!-- OPML generated by Emacs Gnus' nnrss.el -->\n"
+           "<opml version=\"1.1\">\n"
+           "  <head>\n"
+           "    <title>mySubscriptions</title>\n"
+           "    <dateCreated>" (format-time-string "%a, %d %b %Y %T %z")
+           "</dateCreated>\n"
+           "    <ownerEmail>" user-mail-address "</ownerEmail>\n"
+           "    <ownerName>" (user-full-name) "</ownerName>\n"
+           "  </head>\n"
+           "  <body>\n")
+    (dolist (sub nnrss-group-alist)
+      (insert "    <outline text=\"" (car sub)
+             "\" xmlUrl=\"" (cadr sub) "\"/>\n"))
+    (insert "  </body>\n"
+           "</opml>\n"))
   (pop-to-buffer "*OPML Export*")
   (when (fboundp 'sgml-mode)
     (sgml-mode)))
@@ -664,8 +718,11 @@ It is useful when `(setq nnrss-use-local t)'."
         (text (if (and node (listp node))
                   (nnrss-node-just-text node)
                 node))
-        (cleaned-text (if text (gnus-replace-in-string
-                                text "^[\000-\037\177]+\\|^ +\\| +$" ""))))
+        (cleaned-text (if text
+                          (gnus-replace-in-string
+                           (gnus-replace-in-string
+                            text "^[\000-\037\177]+\\|^ +\\| +$" "")
+                           "\r\n" "\n"))))
     (if (string-equal "" cleaned-text)
        nil
       cleaned-text)))
@@ -678,28 +735,27 @@ It is useful when `(setq nnrss-use-local t)'."
 (defun nnrss-find-el (tag data &optional found-list)
   "Find the all matching elements in the data.
 Careful with this on large documents!"
-  (when (listp data)
-    (mapc (lambda (bit)
-           (when (car-safe bit)
-             (when (equal tag (car bit))
-               ;; Old xml.el may return a list of string.
-               (when (and (consp (caddr bit))
-                          (stringp (caaddr bit)))
-                 (setcar (cddr bit) (caaddr bit)))
-               (setq found-list
-                     (append found-list
-                             (list bit))))
-             (if (and (listp (car-safe (caddr bit)))
-                      (not (stringp (caddr bit))))
-                 (setq found-list
-                       (append found-list
-                               (nnrss-find-el
-                                tag (caddr bit))))
-               (setq found-list
-                     (append found-list
-                             (nnrss-find-el
-                              tag (cddr bit)))))))
-         data))
+  (when (consp data)
+    (dolist (bit data)
+      (when (car-safe bit)
+       (when (equal tag (car bit))
+         ;; Old xml.el may return a list of string.
+         (when (and (consp (caddr bit))
+                    (stringp (caaddr bit)))
+           (setcar (cddr bit) (caaddr bit)))
+         (setq found-list
+               (append found-list
+                       (list bit))))
+       (if (and (consp (car-safe (caddr bit)))
+                (not (stringp (caddr bit))))
+           (setq found-list
+                 (append found-list
+                         (nnrss-find-el
+                          tag (caddr bit))))
+         (setq found-list
+               (append found-list
+                       (nnrss-find-el
+                        tag (cddr bit))))))))
   found-list)
 
 (defun nnrss-rsslink-p (el)
@@ -725,12 +781,11 @@ DATA should be the output of `xml-parse-region' or
            (cdr (assoc 'href (cadr ahref))))
          (nnrss-find-el 'a data)))
 
-(defmacro nnrss-match-macro (base-uri item
-                                          onsite-list offsite-list)
+(defmacro nnrss-match-macro (base-uri item onsite-list offsite-list)
   `(cond ((or (string-match (concat "^" ,base-uri) ,item)
-              (not (string-match "://" ,item)))
-          (setq ,onsite-list (append ,onsite-list (list ,item))))
-         (t (setq ,offsite-list (append ,offsite-list (list ,item))))))
+             (not (string-match "://" ,item)))
+         (setq ,onsite-list (append ,onsite-list (list ,item))))
+        (t (setq ,offsite-list (append ,offsite-list (list ,item))))))
 
 (defun nnrss-order-hrefs (base-uri hrefs)
   "Given a list of hrefs, sort them using the following priorities:
@@ -746,27 +801,26 @@ whether they are `offsite' or `onsite'."
        rss-onsite-in   rdf-onsite-in   xml-onsite-in
        rss-offsite-end rdf-offsite-end xml-offsite-end
        rss-offsite-in rdf-offsite-in xml-offsite-in)
-    (mapc (lambda (href)
-           (if (not (null href))
-               (cond ((string-match "\\.rss$" href)
-                      (nnrss-match-macro
-                       base-uri href rss-onsite-end rss-offsite-end))
-                     ((string-match "\\.rdf$" href)
-                      (nnrss-match-macro
-                       base-uri href rdf-onsite-end rdf-offsite-end))
-                     ((string-match "\\.xml$" href)
-                      (nnrss-match-macro
-                       base-uri href xml-onsite-end xml-offsite-end))
-                     ((string-match "rss" href)
-                      (nnrss-match-macro
-                       base-uri href rss-onsite-in rss-offsite-in))
-                     ((string-match "rdf" href)
-                      (nnrss-match-macro
-                       base-uri href rdf-onsite-in rdf-offsite-in))
-                     ((string-match "xml" href)
-                      (nnrss-match-macro
-                       base-uri href xml-onsite-in xml-offsite-in)))))
-         hrefs)
+    (dolist (href hrefs)
+      (cond ((null href))
+           ((string-match "\\.rss$" href)
+            (nnrss-match-macro
+             base-uri href rss-onsite-end rss-offsite-end))
+           ((string-match "\\.rdf$" href)
+            (nnrss-match-macro
+             base-uri href rdf-onsite-end rdf-offsite-end))
+           ((string-match "\\.xml$" href)
+            (nnrss-match-macro
+             base-uri href xml-onsite-end xml-offsite-end))
+           ((string-match "rss" href)
+            (nnrss-match-macro
+             base-uri href rss-onsite-in rss-offsite-in))
+           ((string-match "rdf" href)
+            (nnrss-match-macro
+             base-uri href rdf-onsite-in rdf-offsite-in))
+           ((string-match "xml" href)
+            (nnrss-match-macro
+             base-uri href xml-onsite-in xml-offsite-in))))
     (append
      rss-onsite-end  rdf-onsite-end  xml-onsite-end
      rss-onsite-in   rdf-onsite-in   xml-onsite-in
@@ -803,17 +857,17 @@ whether they are `offsite' or `onsite'."
                 (hrefs (nnrss-order-hrefs
                         base-uri (nnrss-extract-hrefs parsed-page)))
                 (rss-link nil))
-         (while (and (eq rss-link nil) (not (eq hrefs nil)))
-           (let ((href-data (nnrss-fetch (car hrefs))))
-             (if (nnrss-rss-p href-data)
-                 (let* ((rss-ns (nnrss-get-namespace-prefix href-data "http://purl.org/rss/1.0/")))
-                   (setq rss-link (nnrss-rss-title-description
-                                   rss-ns href-data (car hrefs))))
-               (setq hrefs (cdr hrefs)))))
-         (if rss-link rss-link
+           (while (and (eq rss-link nil) (not (eq hrefs nil)))
+             (let ((href-data (nnrss-fetch (car hrefs))))
+               (if (nnrss-rss-p href-data)
+                   (let* ((rss-ns (nnrss-get-namespace-prefix href-data "http://purl.org/rss/1.0/")))
+                     (setq rss-link (nnrss-rss-title-description
+                                     rss-ns href-data (car hrefs))))
+                 (setq hrefs (cdr hrefs)))))
+           (if rss-link rss-link
 
 ;;    4. check syndic8
-           (nnrss-find-rss-via-syndic8 url))))))))
+             (nnrss-find-rss-via-syndic8 url))))))))
 
 (defun nnrss-find-rss-via-syndic8 (url)
   "Query syndic8 for the rss feeds it has for URL."
@@ -854,7 +908,7 @@ whether they are `offsite' or `onsite'."
                  (selection
                   (mapcar (lambda (listinfo)
                             (cons (cdr (assoc "sitename" listinfo))
-                                  (string-to-int
+                                  (string-to-number
                                    (cdr (assoc "feedid" listinfo)))))
                           feedinfo)))
              (cdr (assoc