2004-12-26 Katsumi Yamaoka <yamaoka@jpl.org>
authorKatsumi Yamaoka <yamaoka@jpl.org>
Sun, 26 Dec 2004 13:44:52 +0000 (13:44 +0000)
committerKatsumi Yamaoka <yamaoka@jpl.org>
Sun, 26 Dec 2004 13:44:52 +0000 (13:44 +0000)
* nnrss.el: Require rfc2047 and mml.
(nnrss-file-coding-system): New variable.
(nnrss-format-string): Redefine it as an inline function.
(nnrss-decode-group-name): New function.
(nnrss-string-as-multibyte): Remove.
(nnrss-retrieve-headers): Decode group name; don't use nnrss-format-string.
(nnrss-request-group): Decode group name.
(nnrss-request-article): Decode group name; allow a Message-ID as well as an
 article number; don't use nnrss-format-string; encode a Message-ID string
 which may contain non-ASCII characters; use mml-to-mime to compose a MIME
 article.
(nnrss-request-expire-articles): Decode group name.
(nnrss-request-delete-group): Decode group name.
(nnrss-fetch): Clarify error message.
(nnrss-read-server-data): Use insert-file-contents instead of load;
 bind file-name-coding-system; use multibyte buffer.
(nnrss-save-server-data): Bind coding-system-for-write to the value of
 nnrss-file-coding-system; bind file-name-coding-system; add coding cookie.
(nnrss-read-group-data): Use insert-file-contents instead of load;
 bind file-name-coding-system; use multibyte buffer.
(nnrss-save-group-data): Bind coding-system-for-write to the value of
 nnrss-file-coding-system; bind file-name-coding-system.
(nnrss-decode-entities-string): Rename from n-d-e-unibyte-string;
 make it work with non-ASCII text.
(nnrss-find-el): Make it work with old xml.el as well.

2004-12-26  Tsuyoshi AKIHO  <akiho@kawachi.zaq.ne.jp>

* nnrss.el (nnrss-get-encoding): New function.
(nnrss-fetch): Use unibyte buffer initially; bind coding-system-for-read while
 performing mm-url-insert; remove ^Ms; decode contents according to the
 encoding attribute.
(nnrss-save-group-data): Add coding cookie.
(nnrss-mime-encode-string): New function.
(nnrss-check-group): Use it to encode subject and author.

lisp/ChangeLog
lisp/nnrss.el

index 79d64bd..e5316b8 100644 (file)
@@ -1,3 +1,43 @@
+2004-12-26  Katsumi Yamaoka  <yamaoka@jpl.org>
+
+       * nnrss.el: Require rfc2047 and mml.
+       (nnrss-file-coding-system): New variable.
+       (nnrss-format-string): Redefine it as an inline function.
+       (nnrss-decode-group-name): New function.
+       (nnrss-string-as-multibyte): Remove.
+       (nnrss-retrieve-headers): Decode group name; don't use
+       nnrss-format-string.
+       (nnrss-request-group): Decode group name.
+       (nnrss-request-article): Decode group name; allow a Message-ID as
+       well as an article number; don't use nnrss-format-string; encode a
+       Message-ID string which may contain non-ASCII characters; use
+       mml-to-mime to compose a MIME article.
+       (nnrss-request-expire-articles): Decode group name.
+       (nnrss-request-delete-group): Decode group name.
+       (nnrss-fetch): Clarify error message.
+       (nnrss-read-server-data): Use insert-file-contents instead of load;
+       bind file-name-coding-system; use multibyte buffer.
+       (nnrss-save-server-data): Bind coding-system-for-write to the
+       value of nnrss-file-coding-system; bind file-name-coding-system;
+       add coding cookie.
+       (nnrss-read-group-data): Use insert-file-contents instead of load;
+       bind file-name-coding-system; use multibyte buffer.
+       (nnrss-save-group-data): Bind coding-system-for-write to the
+       value of nnrss-file-coding-system; bind file-name-coding-system.
+       (nnrss-decode-entities-string): Rename from n-d-e-unibyte-string;
+       make it work with non-ASCII text.
+       (nnrss-find-el): Make it work with old xml.el as well.
+
+2004-12-26  Tsuyoshi AKIHO  <akiho@kawachi.zaq.ne.jp>
+
+       * nnrss.el (nnrss-get-encoding): New function.
+       (nnrss-fetch): Use unibyte buffer initially; bind
+       coding-system-for-read while performing mm-url-insert; remove ^Ms;
+       decode contents according to the encoding attribute.
+       (nnrss-save-group-data): Add coding cookie.
+       (nnrss-mime-encode-string): New function.
+       (nnrss-check-group): Use it to encode subject and author.
+
 2004-12-23  Teodor Zlatanov  <tzz@lifelogs.com>
 
        * spam.el (spam-check-BBDB): don't get the symbol-value of an
index a8c4f49..878c8a7 100644 (file)
@@ -36,6 +36,8 @@
 (require 'time-date)
 (require 'rfc2231)
 (require 'mm-url)
+(require 'rfc2047)
+(require 'mml)
 (eval-when-compile
   (ignore-errors
     (require 'xml)))
@@ -78,17 +80,23 @@ The arguments are (ENTRY GROUP ARTICLE).
 ENTRY is the record of the current headline.  GROUP is the group name.
 ARTICLE is the article number of the current headline.")
 
+(defvar nnrss-file-coding-system mm-universal-coding-system
+  "Coding system used when reading and writing files.")
+
 (nnoo-define-basics nnrss)
 
 ;;; Interface functions
 
-(eval-when-compile
-  (defmacro nnrss-string-as-multibyte (string)
-    (if (featurep 'xemacs)
-       string
-      `(string-as-multibyte ,string))))
+(defsubst nnrss-format-string (string)
+  (gnus-replace-in-string string " *\n *" " "))
+
+(defun nnrss-decode-group-name (group)
+  (if (and group (mm-coding-system-p 'utf-8))
+      (setq group (mm-decode-coding-string group 'utf-8))
+    group))
 
 (deffoo nnrss-retrieve-headers (articles &optional group server fetch-old)
+  (setq group (nnrss-decode-group-name group))
   (nnrss-possibly-change-group group server)
   (let (e)
     (save-excursion
@@ -97,21 +105,26 @@ ARTICLE is the article number of the current headline.")
       (dolist (article articles)
        (if (setq e (assq article nnrss-group-data))
            (insert (number-to-string (car e)) "\t" ;; number
-                   (if (nth 3 e)
-                       (nnrss-format-string (nth 3 e)) "")
-                   "\t" ;; subject
-                   (if (nth 4 e)
-                       (nnrss-format-string (nth 4 e))
-                     "(nobody)")
-                   "\t" ;;from
+                   ;; subject
+                   (or (nth 3 e) "")
+                   "\t"
+                   ;; from
+                   (or (nth 4 e) "(nobody)")
+                   "\t"
+                   ;; date
                    (or (nth 5 e) "")
-                   "\t" ;; date
+                   "\t"
+                   ;; id
                    (format "<%d@%s.nnrss>" (car e) group)
-                   "\t" ;; id
-                   "\t" ;; refs
-                   "-1" "\t" ;; chars
-                   "-1" "\t" ;; lines
-                   "" "\t" ;; Xref
+                   "\t"
+                   ;; refs
+                   "\t"
+                   ;; chars
+                   "-1" "\t"
+                   ;; lines
+                   "-1" "\t"
+                   ;; Xref
+                   "" "\t"
                    (if (and (nth 6 e)
                             (memq nnrss-description-field
                                   nnmail-extra-headers))
@@ -132,6 +145,7 @@ ARTICLE is the article number of the current headline.")
   'nov)
 
 (deffoo nnrss-request-group (group &optional server dont-check)
+  (setq group (nnrss-decode-group-name group))
   (nnrss-possibly-change-group group server)
   (if dont-check
       t
@@ -146,53 +160,71 @@ ARTICLE is the article number of the current headline.")
   t)
 
 (deffoo nnrss-request-article (article &optional group server buffer)
+  (setq group (nnrss-decode-group-name group))
+  (when (stringp article)
+    (setq article (if (string-match "\\`<\\([0-9]+\\)@" article)
+                     (string-to-number (match-string 1 article))
+                   0)))
   (nnrss-possibly-change-group group server)
   (let ((e (assq article nnrss-group-data))
-       (boundary "=-=-=-=-=-=-=-=-=-")
        (nntp-server-buffer (or buffer nntp-server-buffer))
        post err)
     (when e
       (catch 'error
        (with-current-buffer nntp-server-buffer
          (erase-buffer)
-         (goto-char (point-min))
-         (insert "Mime-Version: 1.0\nContent-Type: multipart/alternative; boundary=\"" boundary "\"\n")
          (if group
              (insert "Newsgroups: " group "\n"))
          (if (nth 3 e)
-             (insert "Subject: " (nnrss-format-string (nth 3 e)) "\n"))
+             (insert "Subject: " (nth 3 e) "\n"))
          (if (nth 4 e)
-             (insert "From: " (nnrss-format-string (nth 4 e)) "\n"))
+             (insert "From: " (nth 4 e) "\n"))
          (if (nth 5 e)
              (insert "Date: " (nnrss-format-string (nth 5 e)) "\n"))
-         (insert "Message-ID: " (format "<%d@%s.nnrss>" (car e) group) "\n")
          (insert "\n")
          (let ((text (if (nth 6 e)
-                         (nnrss-string-as-multibyte (nth 6 e))))
-               (link (if (nth 2 e)
-                         (nth 2 e))))
-           (insert "\n\n--" boundary "\nContent-Type: text/plain\n\n")
-           (let ((point (point)))
-             (when text
-               (insert text)
-               (goto-char point)
-               (while (search-forward "\n" nil t)
-                 (replace-match " "))
-               (goto-char (point-max))
-               (insert "\n\n"))
-             (when link
-               (insert link)))
-           (insert "\n\n--" boundary "\nContent-Type: text/html\n\n")
-           (let ((point (point)))
+                         (mapconcat 'identity
+                                    (delete "" (split-string (nth 6 e) "\n+"))
+                                    " ")))
+               (link (nth 2 e))
+               ;; Enable encoding of Newsgroups header in XEmacs.
+               (default-enable-multibyte-characters t)
+               (rfc2047-header-encoding-alist
+                (if (mm-coding-system-p 'utf-8)
+                    (cons '("Newsgroups" . utf-8)
+                          rfc2047-header-encoding-alist)
+                  rfc2047-header-encoding-alist))
+               rfc2047-encode-encoded-words)
+           (when (or text link)
+             (insert "<#multipart type=alternative>\n"
+                     "<#part type=\"text/plain\">\n")
+             (if text
+                 (progn
+                   (insert text "\n")
+                   (when link
+                     (insert "\n" link "\n")))
+               (when link
+                 (insert link "\n")))
+             (insert "<#/part>\n"
+                     "<#part type=\"text/html\">\n"
+                     "<html><head></head><body>\n")
              (when text
-               (insert "<html><head></head><body>\n" text "\n</body></html>")
-               (goto-char point)
-               (while (search-forward "\n" nil t)
-                 (replace-match " "))
-               (goto-char (point-max))
-               (insert "\n\n"))
+               (insert text "\n"))
              (when link
-               (insert "<p><a href=\"" link "\">link</a></p>\n"))))
+               (insert "<p><a href=\"" link "\">link</a></p>\n"))
+             (insert "</body></html>\n"
+                     "<#/part>\n"
+                     "<#/multipart>\n")
+             (mml-to-mime)))
+         (goto-char (point-min))
+         (search-forward "\n\n")
+         (forward-line -1)
+         (insert (format "Message-ID: <%d@%s.nnrss>\n"
+                         (car e)
+                         (let ((rfc2047-encoding-type 'mime)
+                               rfc2047-encode-max-chars)
+                           (rfc2047-encode-string
+                            (gnus-replace-in-string group "[\t\n ]+" "_")))))
          (when nnrss-content-function
            (funcall nnrss-content-function e group article)))))
     (cond
@@ -217,6 +249,7 @@ ARTICLE is the article number of the current headline.")
 
 (deffoo nnrss-request-expire-articles
     (articles group &optional server force)
+  (setq group (nnrss-decode-group-name group))
   (nnrss-possibly-change-group group server)
   (let (e days not-expirable changed)
     (dolist (art articles)
@@ -234,6 +267,7 @@ ARTICLE is the article number of the current headline.")
     not-expirable))
 
 (deffoo nnrss-request-delete-group (group &optional force server)
+  (setq group (nnrss-decode-group-name group))
   (nnrss-possibly-change-group group server)
   (setq nnrss-server-data
        (delq (assoc group nnrss-server-data) nnrss-server-data))
@@ -256,16 +290,38 @@ ARTICLE is the article number of the current headline.")
 
 ;;; Internal functions
 (eval-when-compile (defun xml-rpc-method-call (&rest args)))
+
+(defun nnrss-get-encoding ()
+  "Return an encoding attribute specified in the current xml contents."
+  (goto-char (point-min))
+  (mm-coding-system-p
+   (if (re-search-forward
+       "<\\?[^>]*encoding=\\(?:\"\\([^>]+\\)\"\\|'\\([^>]+\\)'\\)"
+       nil t)
+       (intern-soft (downcase (or (match-string-no-properties 1)
+                                 (match-string-no-properties 2))))
+     ;; The default encoding for xml.
+     'utf-8)))
+
 (defun nnrss-fetch (url &optional local)
   "Fetch URL and put it in a the expected Lisp structure."
-  (with-temp-buffer
+  (mm-with-unibyte-buffer
     ;;some CVS versions of url.el need this to close the connection quickly
-    (let (xmlform htmlform)
+    (let (cs xmlform htmlform)
       ;; bit o' work necessary for w3 pre-cvs and post-cvs
       (if local
          (let ((coding-system-for-read 'binary))
            (insert-file-contents url))
-       (mm-url-insert url))
+       ;; FIXME: shouldn't binding `coding-system-for-read' be moved
+       ;; to `mm-url-insert'?
+       (let ((coding-system-for-read 'binary))
+         (mm-url-insert url)))
+      (nnheader-remove-cr-followed-by-lf)
+      ;; Decode text according to the encoding attribute.
+      (when (setq cs (nnrss-get-encoding))
+       (mm-decode-coding-region (point-min) (point-max) cs)
+       (mm-enable-multibyte))
+      (goto-char (point-min))
 
       ;; Because xml-parse-region can't deal with anything that isn't
       ;; xml and w3-parse-buffer can't deal with some xml, we have to
@@ -274,15 +330,16 @@ ARTICLE is the article number of the current headline.")
       ;; why w3-parse-buffer fails to parse some well-formed xml and
       ;; fix it.
 
-      (condition-case nil
+      (condition-case err1
          (setq xmlform (xml-parse-region (point-min) (point-max)))
        (error
-        (condition-case err
+        (condition-case err2
             (setq htmlform (caddar (w3-parse-buffer
                                     (current-buffer))))
           (error
-           (message "nnrss: %s: Not valid XML and w3-parse doesn't work: %s"
-                    url err)))))
+           (message "\
+nnrss: %s: Not valid XML %s and w3-parse doesn't work %s"
+                    url err1 err2)))))
       (if htmlform
          htmlform
        xmlform))))
@@ -315,13 +372,21 @@ ARTICLE is the article number of the current headline.")
   (setq nnrss-server-data nil)
   (let ((file (nnrss-make-filename "nnrss" server)))
     (when (file-exists-p file)
-      (let ((coding-system-for-read 'binary))
-       (load file nil nil t)))))
+      ;; In Emacs 21.3 and earlier, `load' doesn't support non-ASCII
+      ;; file names.  So, we use `insert-file-contents' instead.
+      (mm-with-multibyte-buffer
+       (let ((coding-system-for-read nnrss-file-coding-system)
+             (file-name-coding-system nnmail-pathname-coding-system))
+         (insert-file-contents file)
+         (eval-region (point-min) (point-max)))))))
 
 (defun nnrss-save-server-data (server)
   (gnus-make-directory nnrss-directory)
-  (let ((coding-system-for-write 'binary))
+  (let ((coding-system-for-write nnrss-file-coding-system)
+       (file-name-coding-system nnmail-pathname-coding-system))
     (with-temp-file (nnrss-make-filename "nnrss" server)
+      (insert (format ";; -*- coding: %s; -*-\n"
+                     nnrss-file-coding-system))
       (gnus-prin1 `(setq nnrss-group-alist ',nnrss-group-alist))
       (gnus-prin1 `(setq nnrss-server-data ',nnrss-server-data)))))
 
@@ -335,8 +400,13 @@ ARTICLE is the article number of the current headline.")
     (setq nnrss-group-min (+ nnrss-group-max 1)))
   (let ((file (nnrss-make-filename group server)))
     (when (file-exists-p file)
-      (let ((coding-system-for-read 'binary))
-       (load file nil t t))
+      ;; In Emacs 21.3 and earlier, `load' doesn't support non-ASCII
+      ;; file names.  So, we use `insert-file-contents' instead.
+      (mm-with-multibyte-buffer
+       (let ((coding-system-for-read nnrss-file-coding-system)
+             (file-name-coding-system nnmail-pathname-coding-system))
+         (insert-file-contents file)
+         (eval-region (point-min) (point-max))))
       (dolist (e nnrss-group-data)
        (puthash (or (nth 2 e) (nth 6 e)) t nnrss-group-hashtb)
        (when (and (car e) (> nnrss-group-min (car e)))
@@ -346,8 +416,11 @@ ARTICLE is the article number of the current headline.")
 
 (defun nnrss-save-group-data (group server)
   (gnus-make-directory nnrss-directory)
-  (let ((coding-system-for-write 'binary))
+  (let ((coding-system-for-write nnrss-file-coding-system)
+       (file-name-coding-system nnmail-pathname-coding-system))
     (with-temp-file (nnrss-make-filename group server)
+      (insert (format ";; -*- coding: %s; -*-\n"
+                     nnrss-file-coding-system))
       (gnus-prin1 `(setq nnrss-group-data ',nnrss-group-data)))))
 
 (defun nnrss-make-filename (name server)
@@ -379,15 +452,36 @@ ARTICLE is the article number of the current headline.")
   (mm-with-unibyte-current-buffer
     (mm-url-insert url)))
 
-(defun nnrss-decode-entities-unibyte-string (string)
+(defun nnrss-decode-entities-string (string)
   (if string
-      (mm-with-unibyte-buffer
+      (mm-with-multibyte-buffer
        (insert string)
        (mm-url-decode-entities-nbsp)
        (buffer-string))))
 
 (defalias 'nnrss-insert 'nnrss-insert-w3)
 
+(defun nnrss-mime-encode-string (string)
+  (mm-with-multibyte-buffer
+    (insert string)
+    (mm-url-decode-entities-nbsp)
+    (goto-char (point-min))
+    (while (re-search-forward "[\t\n ]+" nil t)
+      (replace-match " "))
+    (goto-char (point-min))
+    (skip-chars-forward " ")
+    (delete-region (point-min) (point))
+    (goto-char (point-max))
+    (skip-chars-forward " ")
+    (delete-region (point) (point-max))
+    (let ((rfc2047-encoding-type 'mime)
+         rfc2047-encode-max-chars)
+      (rfc2047-encode-region (point-min) (point-max)))
+    (goto-char (point-min))
+    (while (search-forward "\n" nil t)
+      (delete-backward-char 1))
+    (buffer-string)))
+
 ;;; Snarf functions
 
 (defun nnrss-check-group (group server)
@@ -424,7 +518,7 @@ ARTICLE is the article number of the current headline.")
     (dolist (item (nreverse (nnrss-find-el (intern (concat rss-ns "item")) xml)))
       (when (and (listp item)
                 (string= (concat rss-ns "item") (car item))
-                (if (setq url (nnrss-decode-entities-unibyte-string
+                (if (setq url (nnrss-decode-entities-string
                                (nnrss-node-text rss-ns 'link (cddr item))))
                     (not (gethash url nnrss-group-hashtb))
                   (setq extra (or (nnrss-node-text content-ns 'encoded item)
@@ -445,10 +539,10 @@ ARTICLE is the article number of the current headline.")
          (incf nnrss-group-max)
          (current-time)
          url
-         (and subject (nnrss-decode-entities-unibyte-string subject))
-         (and author (nnrss-decode-entities-unibyte-string author))
+         (and subject (nnrss-mime-encode-string subject))
+         (and author (nnrss-mime-encode-string author))
          date
-         (and extra (nnrss-decode-entities-unibyte-string extra)))
+         (and extra (nnrss-decode-entities-string extra)))
         nnrss-group-data)
        (puthash (or url extra) t nnrss-group-hashtb)
        (setq changed t))
@@ -550,9 +644,6 @@ It is useful when `(setq nnrss-use-local t)'."
     (if changed
        (nnrss-save-server-data ""))))
 
-(defun nnrss-format-string (string)
-  (gnus-replace-in-string (nnrss-string-as-multibyte string) " *\n *" " "))
-
 (defun nnrss-node-text (namespace local-name element)
   (let* ((node (assq (intern (concat namespace (symbol-name local-name)))
                     element))
@@ -577,6 +668,9 @@ Careful with this on large documents!"
     (mapc (lambda (bit)
            (when (car-safe bit)
              (when (equal tag (car bit))
+               ;; Old xml.el may return a list of string.
+               (when (consp (caddr bit))
+                 (setcar (cddr bit) (caaddr bit)))
                (setq found-list
                      (append found-list
                              (list bit))))