X-Git-Url: http://cgit.sxemacs.org/?p=gnus;a=blobdiff_plain;f=lisp%2Fnnrss.el;h=ac9c5ffab2cd1a15fa1b7b16397ef4a4a05a58d3;hp=56134817b8758f48a2a2389dd4f16aef9ebd727d;hb=6c7ec91b0fc3cba68595a10981820e24d031e82a;hpb=591fe1a8582f36879a7e08c4838d4017e19d2740 diff --git a/lisp/nnrss.el b/lisp/nnrss.el index 56134817b..ac9c5ffab 100644 --- a/lisp/nnrss.el +++ b/lisp/nnrss.el @@ -1,25 +1,24 @@ ;;; nnrss.el --- interfacing with RSS -;; Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc. + +;; Copyright (C) 2001-2015 Free Software Foundation, Inc. ;; Author: Shenghuo Zhu ;; Keywords: RSS ;; This file is part of GNU Emacs. -;; GNU Emacs is free software; you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published -;; by the Free Software Foundation; either version 2, or (at your -;; option) any later version. +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. -;; GNU Emacs is distributed in the hope that it will be useful, but -;; WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -;; General Public License for more details. +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License -;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. +;; along with GNU Emacs. If not, see . ;;; Commentary: @@ -36,9 +35,11 @@ (require 'time-date) (require 'rfc2231) (require 'mm-url) +(require 'rfc2047) +(require 'mml) (eval-when-compile (ignore-errors - (require 'xml))) + (require 'xml))) (eval '(require 'xml)) (nnoo-declare nnrss) @@ -46,6 +47,15 @@ (defvoo nnrss-directory (nnheader-concat gnus-directory "rss/") "Where nnrss will save its files.") +(defvoo nnrss-ignore-article-fields '(slash:comments) + "*List of fields that should be ignored when comparing RSS articles. +Some RSS feeds update article fields during their lives, e.g. to +indicate the number of comments or the number of times the +articles have been seen. However, if there is a difference +between the local article and the distant one, the latter is +considered to be new. To avoid this and discard some fields, set +this variable to the list of fields to be ignored.") + ;; (group max rss-url) (defvoo nnrss-server-data nil) @@ -62,7 +72,8 @@ (defvar nnrss-group-alist '() "List of RSS addresses.") -(defvar nnrss-use-local nil) +(defvar nnrss-use-local nil + "If non-nil nnrss will read the feeds from local files in nnrss-directory.") (defvar nnrss-description-field 'X-Gnus-Description "Field name used for DESCRIPTION. @@ -75,37 +86,70 @@ To use the description in headers, put this name into `nnmail-extra-headers'.") (defvar nnrss-content-function nil "A function which is called in `nnrss-request-article'. The arguments are (ENTRY GROUP ARTICLE). -ENTRY is the record of the current headline. GROUP is the group name. +ENTRY is the record of the current headline. GROUP is the group name. ARTICLE is the article number of the current headline.") +(defvar nnrss-file-coding-system mm-universal-coding-system + "*Coding system used when reading and writing files. +If you run Gnus with various versions of Emacsen, the value of this +variable should be the coding system that all those Emacsen support. +Note that you have to regenerate all the nnrss groups if you change +the value. Moreover, you should be patient even if you are made to +read the same articles twice, that arises for the difference of the +versions of xml.el.") + +(defvar nnrss-compatible-encoding-alist + (delq nil (mapcar (lambda (elem) + (if (and (mm-coding-system-p (car elem)) + (mm-coding-system-p (cdr elem))) + elem)) + mm-charset-override-alist)) + "Alist of encodings and those supersets. +The cdr of each element is used to decode data if it is available when +the car is what the data specify as the encoding. Or, the car is used +for decoding when the cdr that the data specify is not available.") + (nnoo-define-basics nnrss) ;;; Interface functions +(defsubst nnrss-format-string (string) + (gnus-replace-in-string string " *\n *" " ")) + +(defun nnrss-decode-group-name (group) + (if (and group (mm-coding-system-p 'utf-8)) + (setq group (mm-decode-coding-string group 'utf-8)) + group)) + (deffoo nnrss-retrieve-headers (articles &optional group server fetch-old) + (setq group (nnrss-decode-group-name group)) (nnrss-possibly-change-group group server) (let (e) - (save-excursion - (set-buffer nntp-server-buffer) + (with-current-buffer nntp-server-buffer (erase-buffer) (dolist (article articles) (if (setq e (assq article nnrss-group-data)) (insert (number-to-string (car e)) "\t" ;; number - (if (nth 3 e) - (nnrss-format-string (nth 3 e)) "") - "\t" ;; subject - (if (nth 4 e) - (nnrss-format-string (nth 4 e)) - "(nobody)") - "\t" ;;from + ;; subject + (or (nth 3 e) "") + "\t" + ;; from + (or (nth 4 e) "(nobody)") + "\t" + ;; date (or (nth 5 e) "") - "\t" ;; date + "\t" + ;; id (format "<%d@%s.nnrss>" (car e) group) - "\t" ;; id - "\t" ;; refs - "-1" "\t" ;; chars - "-1" "\t" ;; lines - "" "\t" ;; Xref + "\t" + ;; refs + "\t" + ;; chars + "-1" "\t" + ;; lines + "-1" "\t" + ;; Xref + "" "\t" (if (and (nth 6 e) (memq nnrss-description-field nnmail-extra-headers)) @@ -125,70 +169,135 @@ ARTICLE is the article number of the current headline.") "\n"))))) 'nov) -(deffoo nnrss-request-group (group &optional server dont-check) +(deffoo nnrss-request-group (group &optional server dont-check info) + (setq group (nnrss-decode-group-name group)) + (nnheader-message 6 "nnrss: Requesting %s..." group) (nnrss-possibly-change-group group server) - (if dont-check - t - (nnrss-check-group group server) - (nnheader-report 'nnrss "Opened group %s" group) - (nnheader-insert - "211 %d %d %d %s\n" nnrss-group-max nnrss-group-min nnrss-group-max - (prin1-to-string group) - t))) + (prog1 + (if dont-check + t + (nnrss-check-group group server) + (nnheader-report 'nnrss "Opened group %s" group) + (nnheader-insert + "211 %d %d %d %s\n" nnrss-group-max nnrss-group-min nnrss-group-max + (prin1-to-string group) + t)) + (nnheader-message 6 "nnrss: Requesting %s...done" group))) (deffoo nnrss-close-group (group &optional server) t) (deffoo nnrss-request-article (article &optional group server buffer) + (setq group (nnrss-decode-group-name group)) + (when (stringp article) + (setq article (if (string-match "\\`<\\([0-9]+\\)@" article) + (string-to-number (match-string 1 article)) + 0))) (nnrss-possibly-change-group group server) (let ((e (assq article nnrss-group-data)) - (boundary "=-=-=-=-=-=-=-=-=-") (nntp-server-buffer (or buffer nntp-server-buffer)) post err) (when e - (catch 'error - (with-current-buffer nntp-server-buffer - (erase-buffer) - (goto-char (point-min)) - (insert "Mime-Version: 1.0\nContent-Type: multipart/alternative; boundary=\"" boundary "\"\n") - (if group - (insert "Newsgroups: " group "\n")) - (if (nth 3 e) - (insert "Subject: " (nnrss-format-string (nth 3 e)) "\n")) - (if (nth 4 e) - (insert "From: " (nnrss-format-string (nth 4 e)) "\n")) - (if (nth 5 e) - (insert "Date: " (nnrss-format-string (nth 5 e)) "\n")) - (insert "Message-ID: " (format "<%d@%s.nnrss>" (car e) group) "\n") - (insert "\n") - (let ((text (if (nth 6 e) - (nnrss-string-as-multibyte (nth 6 e)))) - (link (if (nth 2 e) - (nth 2 e)))) - (insert "\n\n--" boundary "\nContent-Type: text/plain\n\n") - (let ((point (point))) - (when text - (insert text) - (goto-char point) - (while (re-search-forward "\n" nil t) - (replace-match " ")) - (goto-char (point-max)) - (insert "\n\n")) - (when link - (insert link))) - (insert "\n\n--" boundary "\nContent-Type: text/html\n\n") - (let ((point (point))) - (when text - (insert "\n" text "\n") - (goto-char point) - (while (re-search-forward "\n" nil t) - (replace-match " ")) - (goto-char (point-max)) - (insert "\n\n")) - (when link - (insert "

link

\n")))) - (when nnrss-content-function - (funcall nnrss-content-function e group article))))) + (with-current-buffer nntp-server-buffer + (erase-buffer) + (if group + (insert "Newsgroups: " group "\n")) + (if (nth 3 e) + (insert "Subject: " (nth 3 e) "\n")) + (if (nth 4 e) + (insert "From: " (nth 4 e) "\n")) + (if (nth 5 e) + (insert "Date: " (nnrss-format-string (nth 5 e)) "\n")) + (let ((header (buffer-string)) + (text (nth 6 e)) + (link (nth 2 e)) + (enclosure (nth 7 e)) + (comments (nth 8 e)) + (rfc2047-header-encoding-alist + (if (mm-coding-system-p 'utf-8) + (cons '("Newsgroups" . utf-8) + rfc2047-header-encoding-alist) + rfc2047-header-encoding-alist)) + rfc2047-encode-encoded-words body fn) + (when (or text link enclosure comments) + (insert "\n") + (insert "<#multipart type=alternative>\n" + "<#part type=\"text/plain\">\n") + (setq body (point)) + (when text + (insert text) + (goto-char body) + (while (re-search-forward "\n+" nil t) + (replace-match " ")) + (goto-char body) + ;; See `nnrss-check-group', which inserts "

". + (when (search-forward "

" nil t) + (if (eobp) + (replace-match "\n") + (replace-match "\n\n"))) + (unless (eobp) + (let ((fill-column (default-value 'fill-column)) + (window (get-buffer-window nntp-server-buffer))) + (when window + (setq fill-column + (max 1 (/ (* (window-width window) 7) 8)))) + (fill-region (point) (point-max)) + (goto-char (point-max)) + ;; XEmacs version of `fill-region' inserts newline. + (unless (bolp) + (insert "\n")))) + (when (or link enclosure) + (insert "\n"))) + (when link + (insert link "\n")) + (when enclosure + (insert (car enclosure) " " + (nth 2 enclosure) " " + (nth 3 enclosure) "\n")) + (when comments + (insert comments "\n")) + (setq body (buffer-substring body (point))) + (insert "<#/part>\n" + "<#part type=\"text/html\">\n" + "\n") + (when text + (insert text "\n")) + (when link + (insert "

link

\n")) + (when enclosure + (insert "

" + (cadr enclosure) " " (nth 2 enclosure) + " " (nth 3 enclosure) "

\n")) + (when comments + (insert "

comments

\n")) + (insert "\n" + "<#/part>\n" + "<#/multipart>\n")) + (condition-case nil + ;; Allow `mml-to-mime' to generate MIME article without + ;; making inquiry to a user for unknown encoding. + (let ((mml-confirmation-set + (cons 'unknown-encoding mml-confirmation-set))) + (mml-to-mime)) + (error + (erase-buffer) + (insert header + "Content-Type: text/plain; charset=gnus-decoded\n" + "Content-Transfer-Encoding: 8bit\n\n" + body) + (nnheader-message + 3 "Warning - there might be invalid characters")))) + (goto-char (point-min)) + (search-forward "\n\n") + (forward-line -1) + (insert (format "Message-ID: <%d@%s.nnrss>\n" + (car e) + (let ((rfc2047-encoding-type 'mime) + rfc2047-encode-max-chars) + (rfc2047-encode-string + (gnus-replace-in-string group "[\t\n ]+" "_"))))) + (when nnrss-content-function + (funcall nnrss-content-function e group article)))) (cond (err (nnheader-report 'nnrss err)) @@ -199,11 +308,6 @@ ARTICLE is the article number of the current headline.") ;; we return the article number. (cons nnrss-group (car e)))))) -(deffoo nnrss-request-list (&optional server) - (nnrss-possibly-change-group nil server) - (nnrss-generate-active) - t) - (deffoo nnrss-open-server (server &optional defs connectionless) (nnrss-read-server-data server) (nnoo-change-server 'nnrss server defs) @@ -211,6 +315,7 @@ ARTICLE is the article number of the current headline.") (deffoo nnrss-request-expire-articles (articles group &optional server force) + (setq group (nnrss-decode-group-name group)) (nnrss-possibly-change-group group server) (let (e days not-expirable changed) (dolist (art articles) @@ -228,56 +333,107 @@ ARTICLE is the article number of the current headline.") not-expirable)) (deffoo nnrss-request-delete-group (group &optional force server) + (setq group (nnrss-decode-group-name group)) (nnrss-possibly-change-group group server) + (let (elem) + ;; There may be two or more entries in `nnrss-group-alist' since + ;; this function didn't delete them formerly. + (while (setq elem (assoc group nnrss-group-alist)) + (setq nnrss-group-alist (delq elem nnrss-group-alist)))) (setq nnrss-server-data (delq (assoc group nnrss-server-data) nnrss-server-data)) (nnrss-save-server-data server) (ignore-errors - (delete-file (nnrss-make-filename group server))) + (let ((file-name-coding-system nnmail-pathname-coding-system)) + (delete-file (nnrss-make-filename group server)))) t) (deffoo nnrss-request-list-newsgroups (&optional server) (nnrss-possibly-change-group nil server) - (save-excursion - (set-buffer nntp-server-buffer) + (with-current-buffer nntp-server-buffer (erase-buffer) (dolist (elem nnrss-group-alist) (if (third elem) (insert (car elem) "\t" (third elem) "\n")))) t) +(deffoo nnrss-retrieve-groups (groups &optional server) + (dolist (group groups) + (setq group (nnrss-decode-group-name group)) + (nnrss-possibly-change-group group server) + (nnrss-check-group group server)) + (with-current-buffer nntp-server-buffer + (erase-buffer) + (dolist (group groups) + (let ((elem (assoc (gnus-group-decoded-name group) nnrss-server-data))) + (insert (format "%S %s 1 y\n" group (or (cadr elem) 0))))) + 'active)) + (nnoo-define-skeleton nnrss) ;;; Internal functions (eval-when-compile (defun xml-rpc-method-call (&rest args))) + +(defun nnrss-get-encoding () + "Return an encoding attribute specified in the current xml contents. +If `nnrss-compatible-encoding-alist' specifies the compatible encoding, +it is used instead. If the xml contents doesn't specify the encoding, +return `utf-8' which is the default encoding for xml if it is available, +otherwise return nil." + (goto-char (point-min)) + (if (re-search-forward + "<\\?[^>]*encoding=\\(?:\"\\([^\">]+\\)\"\\|'\\([^'>]+\\)'\\)" + nil t) + (let ((encoding (intern (downcase (or (match-string 1) + (match-string 2)))))) + (or + (mm-coding-system-p (cdr (assq encoding + nnrss-compatible-encoding-alist))) + (mm-coding-system-p encoding) + (mm-coding-system-p (car (rassq encoding + nnrss-compatible-encoding-alist))))) + (mm-coding-system-p 'utf-8))) + +(declare-function libxml-parse-html-region "xml.c" + (start end &optional base-url)) (defun nnrss-fetch (url &optional local) - "Fetch the url and put it in a the expected lisp structure." - (with-temp-buffer - ;some CVS versions of url.el need this to close the connection quickly - (let* (xmlform htmlform) + "Fetch URL and put it in a the expected Lisp structure." + (mm-with-unibyte-buffer + ;;some versions of url.el need this to close the connection quickly + (let (cs xmlform htmlform) ;; bit o' work necessary for w3 pre-cvs and post-cvs (if local (let ((coding-system-for-read 'binary)) (insert-file-contents url)) - (mm-url-insert url)) - -;; Because xml-parse-region can't deal with anything that isn't -;; xml and w3-parse-buffer can't deal with some xml, we have to -;; parse with xml-parse-region first and, if that fails, parse -;; with w3-parse-buffer. Yuck. Eventually, someone should find out -;; why w3-parse-buffer fails to parse some well-formed xml and -;; fix it. + ;; FIXME: shouldn't binding `coding-system-for-read' be moved + ;; to `mm-url-insert'? + (let ((coding-system-for-read 'binary)) + (condition-case err + (mm-url-insert url) + (error (if (or debug-on-quit debug-on-error) + (signal (car err) (cdr err)) + (message "nnrss: Failed to fetch %s" url)))))) + (nnheader-remove-cr-followed-by-lf) + ;; Decode text according to the encoding attribute. + (when (setq cs (nnrss-get-encoding)) + (insert (prog1 + (mm-decode-coding-string (buffer-string) cs) + (erase-buffer) + (mm-enable-multibyte)))) + (goto-char (point-min)) - (condition-case err - (setq xmlform (xml-parse-region (point-min) (point-max))) - (error (if (fboundp 'w3-parse-buffer) - (setq htmlform (caddar (w3-parse-buffer - (current-buffer)))) - (message "nnrss: Not valid XML and w3 parse not available (%s)" - url)))) - (if htmlform - htmlform - xmlform)))) + (condition-case err1 + (setq xmlform (xml-parse-region (point-min) (point-max))) + (error + (condition-case err2 + (setq htmlform (libxml-parse-html-region (point-min) (point-max))) + (error + (message "\ +nnrss: %s: Not valid XML %s and libxml-parse-html-region doesn't work %s" + url err1 err2))))) + (if htmlform + htmlform + xmlform)))) (defun nnrss-possibly-change-group (&optional group server) (when (and server @@ -287,35 +443,95 @@ ARTICLE is the article number of the current headline.") (nnrss-read-group-data group server) (setq nnrss-group group))) -(defvar nnrss-extra-categories '(nnrss-snarf-moreover-categories)) - -(defun nnrss-generate-active () - (if (y-or-n-p "fetch extra categories? ") - (dolist (func nnrss-extra-categories) - (funcall func))) - (save-excursion - (set-buffer nntp-server-buffer) - (erase-buffer) - (dolist (elem nnrss-group-alist) - (insert (prin1-to-string (car elem)) " 0 1 y\n")) - (dolist (elem nnrss-server-data) - (unless (assoc (car elem) nnrss-group-alist) - (insert (prin1-to-string (car elem)) " 0 1 y\n"))))) +(autoload 'timezone-parse-date "timezone") + +(defun nnrss-normalize-date (date) + "Return a date string of DATE in the RFC822 style. +This function handles the ISO 8601 date format described in +URL `http://www.w3.org/TR/NOTE-datetime', and also the RFC822 style +which RSS 2.0 allows." + (let (case-fold-search vector year month day time zone cts given) + (cond ((null date)) ; do nothing for this case + ;; if the date is just digits (unix time stamp): + ((string-match "^[0-9]+$" date) + (setq given (seconds-to-time (string-to-number date)))) + ;; RFC822 + ((string-match " [0-9]+ " date) + (setq vector (timezone-parse-date date) + year (string-to-number (aref vector 0))) + (when (>= year 1969) + (setq month (string-to-number (aref vector 1)) + day (string-to-number (aref vector 2))) + (unless (>= (length (setq time (aref vector 3))) 3) + (setq time "00:00:00")) + (when (and (setq zone (aref vector 4)) + (not (string-match "\\`[A-Z+-]" zone))) + (setq zone nil)))) + ;; ISO 8601 + ((string-match + (eval-when-compile + (concat + ;; 1. year + "\\(199[0-9]\\|20[0-9][0-9]\\)" + "\\(?:-" + ;; 2. month + "\\([01][0-9]\\)" + "\\(?:-" + ;; 3. day + "\\([0-3][0-9]\\)" + "\\)?\\)?\\(?:T" + ;; 4. hh:mm + "\\([012][0-9]:[0-5][0-9]\\)" + "\\(?:" + ;; 5. :ss + "\\(:[0-5][0-9]\\)" + "\\(?:\\.[0-9]+\\)?\\)?\\)?" + ;; 6+7,8,9. zone + "\\(?:\\(?:\\([+-][012][0-9]\\):\\([0-5][0-9]\\)\\)" + "\\|\\([+-][012][0-9][0-5][0-9]\\)" + "\\|\\(Z\\)\\)?")) + date) + (setq year (string-to-number (match-string 1 date)) + month (string-to-number (or (match-string 2 date) "1")) + day (string-to-number (or (match-string 3 date) "1")) + time (if (match-beginning 5) + (substring date (match-beginning 4) (match-end 5)) + (concat (or (match-string 4 date) "00:00") ":00")) + zone (cond ((match-beginning 6) + (concat (match-string 6 date) + (match-string 7 date))) + ((match-beginning 9) ;; Z + "+0000") + (t ;; nil if zone is not provided. + (match-string 8 date)))))) + (if month + (progn + (setq cts (current-time-string (encode-time 0 0 0 day month year))) + (format "%s, %02d %s %04d %s%s" + (substring cts 0 3) day (substring cts 4 7) year time + (if zone + (concat " " zone) + ""))) + (message-make-date given)))) ;;; data functions (defun nnrss-read-server-data (server) (setq nnrss-server-data nil) - (let ((file (nnrss-make-filename "nnrss" server))) + (let ((file (nnrss-make-filename "nnrss" server)) + (file-name-coding-system nnmail-pathname-coding-system)) (when (file-exists-p file) - (let ((coding-system-for-read 'binary)) - (load file nil nil t))))) + (load file nil t t)))) (defun nnrss-save-server-data (server) (gnus-make-directory nnrss-directory) - (let ((coding-system-for-write 'binary)) + (let ((coding-system-for-write nnrss-file-coding-system) + (file-name-coding-system nnmail-pathname-coding-system)) (with-temp-file (nnrss-make-filename "nnrss" server) + (insert (format ";; -*- coding: %s; -*-\n" + nnrss-file-coding-system)) (gnus-prin1 `(setq nnrss-group-alist ',nnrss-group-alist)) + (insert "\n") (gnus-prin1 `(setq nnrss-server-data ',nnrss-server-data))))) (defun nnrss-read-group-data (group server) @@ -326,12 +542,12 @@ ARTICLE is the article number of the current headline.") (let ((pair (assoc group nnrss-server-data))) (setq nnrss-group-max (or (cadr pair) 0)) (setq nnrss-group-min (+ nnrss-group-max 1))) - (let ((file (nnrss-make-filename group server))) + (let ((file (nnrss-make-filename group server)) + (file-name-coding-system nnmail-pathname-coding-system)) (when (file-exists-p file) - (let ((coding-system-for-read 'binary)) - (load file nil t t)) + (load file nil t t) (dolist (e nnrss-group-data) - (puthash (nth 2 e) e nnrss-group-hashtb) + (puthash (nth 9 e) t nnrss-group-hashtb) (when (and (car e) (> nnrss-group-min (car e))) (setq nnrss-group-min (car e))) (when (and (car e) (< nnrss-group-max (car e))) @@ -339,9 +555,12 @@ ARTICLE is the article number of the current headline.") (defun nnrss-save-group-data (group server) (gnus-make-directory nnrss-directory) - (let ((coding-system-for-write 'binary)) + (let ((coding-system-for-write nnrss-file-coding-system) + (file-name-coding-system nnmail-pathname-coding-system)) (with-temp-file (nnrss-make-filename group server) - (gnus-prin1 `(setq nnrss-group-data ',nnrss-group-data ))))) + (insert (format ";; -*- coding: %s; -*-\n" + nnrss-file-coding-system)) + (gnus-prin1 `(setq nnrss-group-data ',nnrss-group-data))))) (defun nnrss-make-filename (name server) (expand-file-name @@ -368,28 +587,58 @@ ARTICLE is the article number of the current headline.") (defun nnrss-no-cache (url) "") -(defun nnrss-insert-w3 (url) +(defun nnrss-insert (url) (mm-with-unibyte-current-buffer - (mm-url-insert url))) + (condition-case err + (mm-url-insert url) + (error (if (or debug-on-quit debug-on-error) + (signal (car err) (cdr err)) + (message "nnrss: Failed to fetch %s" url)))))) -(defun nnrss-decode-entities-unibyte-string (string) +(defun nnrss-decode-entities-string (string) (if string - (mm-with-unibyte-buffer + (mm-with-multibyte-buffer (insert string) (mm-url-decode-entities-nbsp) (buffer-string)))) -(defalias 'nnrss-insert 'nnrss-insert-w3) - -(if (featurep 'xemacs) - (defalias 'nnrss-string-as-multibyte 'identity) - (defalias 'nnrss-string-as-multibyte 'string-as-multibyte)) +(defun nnrss-mime-encode-string (string) + (mm-with-multibyte-buffer + (insert string) + (mm-url-decode-entities-nbsp) + (goto-char (point-min)) + (while (re-search-forward "[\t\n ]+" nil t) + (replace-match " ")) + (goto-char (point-min)) + (skip-chars-forward " ") + (delete-region (point-min) (point)) + (goto-char (point-max)) + (skip-chars-forward " ") + (delete-region (point) (point-max)) + (let ((rfc2047-encoding-type 'mime) + rfc2047-encode-max-chars) + (rfc2047-encode-region (point-min) (point-max))) + (goto-char (point-min)) + (while (search-forward "\n" nil t) + (delete-char -1)) + (buffer-string))) ;;; Snarf functions +(defun nnrss-make-hash-index (item) + (gnus-message 9 "nnrss: Making hash index of %s" (gnus-prin1-to-string item)) + (setq item (gnus-remove-if + (lambda (field) + (when (listp field) + (memq (car field) nnrss-ignore-article-fields))) + item)) + (md5 (gnus-prin1-to-string item) + nil nil + nnrss-file-coding-system)) (defun nnrss-check-group (group server) - (let (file xml subject url extra changed author - date rss-ns rdf-ns content-ns dc-ns) + (let (file xml subject url extra changed author date feed-subject + enclosure comments rss-ns rdf-ns content-ns dc-ns + hash-index) (if (and nnrss-use-local (file-exists-p (setq file (expand-file-name (nnrss-translate-file-chars @@ -400,51 +649,77 @@ ARTICLE is the article number of the current headline.") (second (assoc group nnrss-group-alist)))) (unless url (setq url - (cdr - (assoc 'href - (nnrss-discover-feed - (read-string - (format "URL to search for %s: " group) "http://"))))) + (cdr + (assoc 'href + (nnrss-discover-feed + (read-string + (format "URL to search for %s: " group) "http://"))))) (let ((pair (assoc group nnrss-server-data))) (if pair (setcdr (cdr pair) (list url)) (push (list group nnrss-group-max url) nnrss-server-data))) (setq changed t)) (setq xml (nnrss-fetch url))) - ;; See - ;; http://feeds.archive.org/validator/docs/howto/declare_namespaces.html - ;; for more RSS namespaces. (setq dc-ns (nnrss-get-namespace-prefix xml "http://purl.org/dc/elements/1.1/") rdf-ns (nnrss-get-namespace-prefix xml "http://www.w3.org/1999/02/22-rdf-syntax-ns#") rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/") content-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/modules/content/")) (dolist (item (nreverse (nnrss-find-el (intern (concat rss-ns "item")) xml))) (when (and (listp item) - (eq (intern (concat rss-ns "item")) (car item)) - (setq url (nnrss-decode-entities-unibyte-string - (nnrss-node-text rss-ns 'link (cddr item)))) - (not (gethash url nnrss-group-hashtb))) + (string= (concat rss-ns "item") (car item)) + (progn (setq hash-index (nnrss-make-hash-index item)) + (not (gethash hash-index nnrss-group-hashtb)))) (setq subject (nnrss-node-text rss-ns 'title item)) + (setq url (nnrss-decode-entities-string + (nnrss-node-text rss-ns 'link (cddr item)))) (setq extra (or (nnrss-node-text content-ns 'encoded item) (nnrss-node-text rss-ns 'description item))) + (if (setq feed-subject (nnrss-node-text dc-ns 'subject item)) + (setq extra (concat feed-subject "

" extra))) (setq author (or (nnrss-node-text rss-ns 'author item) (nnrss-node-text dc-ns 'creator item) (nnrss-node-text dc-ns 'contributor item))) - (setq date (or (nnrss-node-text dc-ns 'date item) - (nnrss-node-text rss-ns 'pubDate item) - (message-make-date))) + (setq date (nnrss-normalize-date + (or (nnrss-node-text dc-ns 'date item) + (nnrss-node-text rss-ns 'pubDate item)))) + (setq comments (nnrss-node-text rss-ns 'comments item)) + (when (setq enclosure (cadr (assq (intern (concat rss-ns "enclosure")) item))) + (let ((url (cdr (assq 'url enclosure))) + (len (cdr (assq 'length enclosure))) + (type (cdr (assq 'type enclosure))) + (name)) + (setq len + (if (and len (integerp (setq len (string-to-number len)))) + ;; actually already in `ls-lisp-format-file-size' but + ;; probably not worth to require it for one function + (do ((size (/ len 1.0) (/ size 1024.0)) + (post-fixes (list "" "k" "M" "G" "T" "P" "E") + (cdr post-fixes))) + ((< size 1024) + (format "%.1f%s" size (car post-fixes)))) + "0")) + (setq url (or url "")) + (setq name (if (string-match "/\\([^/]*\\)$" url) + (match-string 1 url) + "file")) + (setq type (or type "")) + (setq enclosure (list url name len type)))) (push (list (incf nnrss-group-max) (current-time) url - (and subject (nnrss-decode-entities-unibyte-string subject)) - (and author (nnrss-decode-entities-unibyte-string author)) + (and subject (nnrss-mime-encode-string subject)) + (and author (nnrss-mime-encode-string author)) date - (and extra (nnrss-decode-entities-unibyte-string extra))) + (and extra (nnrss-decode-entities-string extra)) + enclosure + comments + hash-index) nnrss-group-data) - (puthash url (car nnrss-group-data) nnrss-group-hashtb) - (setq changed t))) + (puthash hash-index t nnrss-group-hashtb) + (setq changed t)) + (setq extra nil)) (when changed (nnrss-save-group-data group server) (let ((pair (assoc group nnrss-server-data))) @@ -453,6 +728,62 @@ ARTICLE is the article number of the current headline.") (push (list group nnrss-group-max) nnrss-server-data))) (nnrss-save-server-data server)))) +(declare-function gnus-group-make-rss-group "gnus-group" (&optional url)) + +(defun nnrss-opml-import (opml-file) + "OPML subscriptions import. +Read the file and attempt to subscribe to each Feed in the file." + (interactive "fImport file: ") + (mapc + (lambda (node) + (let ((xmlurl (cdr (assq 'xmlUrl (cadr node))))) + (when (and xmlurl + (not (string-match "\\`[\t ]*\\'" xmlurl)) + (prog1 + (y-or-n-p (format "Subscribe to %s " xmlurl)) + (message ""))) + (condition-case err + (progn + (gnus-group-make-rss-group xmlurl) + (forward-line 1)) + (error + (message + "Failed to subscribe to %s (%s); type any key to continue: " + xmlurl + (error-message-string err)) + (let ((echo-keystrokes 0)) + (read-char))))))) + (nnrss-find-el 'outline + (mm-with-multibyte-buffer + (insert-file-contents opml-file) + (xml-parse-region (point-min) (point-max)))))) + +(defun nnrss-opml-export () + "OPML subscription export. +Export subscriptions to a buffer in OPML Format." + (interactive) + (with-current-buffer (get-buffer-create "*OPML Export*") + (mm-set-buffer-file-coding-system 'utf-8) + (insert "\n" + "\n" + "\n" + " \n" + " mySubscriptions\n" + " " (format-time-string "%a, %d %b %Y %T %z") + "\n" + " " user-mail-address "\n" + " " (user-full-name) "\n" + " \n" + " \n") + (dolist (sub nnrss-group-alist) + (insert " \n")) + (insert " \n" + "\n")) + (pop-to-buffer "*OPML Export*") + (when (fboundp 'sgml-mode) + (sgml-mode))) + (defun nnrss-generate-download-script () "Generate a download script in the current buffer. It is useful when `(setq nnrss-use-local t)'." @@ -472,44 +803,17 @@ It is useful when `(setq nnrss-use-local t)'." (append nnheader-file-name-translation-alist '((?' . ?_))))) (nnheader-translate-file-chars name))) -(defvar nnrss-moreover-url - "http://w.moreover.com/categories/category_list_rss.html" - "The url of moreover.com categories.") - -(defun nnrss-snarf-moreover-categories () - "Snarf RSS links from moreover.com." - (interactive) - (let (category name url changed) - (with-temp-buffer - (nnrss-insert nnrss-moreover-url) - (goto-char (point-min)) - (while (re-search-forward - "\\| elements that are links to RSS from the parsed data." - (delq nil (mapcar + (delq nil (mapcar (lambda (el) (if (nnrss-rsslink-p el) el)) (nnrss-find-el 'link data)))) (defun nnrss-extract-hrefs (data) - "Recursively extract hrefs from a page's source. DATA should be -the output of xml-parse-region or w3-parse-buffer." + "Recursively extract hrefs from a page's source. +DATA should be the output of `xml-parse-region'." (mapcar (lambda (ahref) (cdr (assoc 'href (cadr ahref)))) (nnrss-find-el 'a data))) -(defmacro nnrss-match-macro (base-uri item - onsite-list offsite-list) +(defmacro nnrss-match-macro (base-uri item onsite-list offsite-list) `(cond ((or (string-match (concat "^" ,base-uri) ,item) - (not (string-match "://" ,item))) - (setq ,onsite-list (append ,onsite-list (list ,item)))) - (t (setq ,offsite-list (append ,offsite-list (list ,item)))))) + (not (string-match "://" ,item))) + (setq ,onsite-list (append ,onsite-list (list ,item)))) + (t (setq ,offsite-list (append ,offsite-list (list ,item)))))) (defun nnrss-order-hrefs (base-uri hrefs) "Given a list of hrefs, sort them using the following priorities: @@ -585,77 +891,76 @@ whether they are `offsite' or `onsite'." rss-onsite-in rdf-onsite-in xml-onsite-in rss-offsite-end rdf-offsite-end xml-offsite-end rss-offsite-in rdf-offsite-in xml-offsite-in) - (mapcar (lambda (href) - (if (not (null href)) - (cond ((string-match "\\.rss$" href) - (nnrss-match-macro - base-uri href rss-onsite-end rss-offsite-end)) - ((string-match "\\.rdf$" href) - (nnrss-match-macro - base-uri href rdf-onsite-end rdf-offsite-end)) - ((string-match "\\.xml$" href) - (nnrss-match-macro - base-uri href xml-onsite-end xml-offsite-end)) - ((string-match "rss" href) - (nnrss-match-macro - base-uri href rss-onsite-in rss-offsite-in)) - ((string-match "rdf" href) - (nnrss-match-macro - base-uri href rdf-onsite-in rdf-offsite-in)) - ((string-match "xml" href) - (nnrss-match-macro - base-uri href xml-onsite-in xml-offsite-in))))) - hrefs) - (append + (dolist (href hrefs) + (cond ((null href)) + ((string-match "\\.rss$" href) + (nnrss-match-macro + base-uri href rss-onsite-end rss-offsite-end)) + ((string-match "\\.rdf$" href) + (nnrss-match-macro + base-uri href rdf-onsite-end rdf-offsite-end)) + ((string-match "\\.xml$" href) + (nnrss-match-macro + base-uri href xml-onsite-end xml-offsite-end)) + ((string-match "rss" href) + (nnrss-match-macro + base-uri href rss-onsite-in rss-offsite-in)) + ((string-match "rdf" href) + (nnrss-match-macro + base-uri href rdf-onsite-in rdf-offsite-in)) + ((string-match "xml" href) + (nnrss-match-macro + base-uri href xml-onsite-in xml-offsite-in)))) + (append rss-onsite-end rdf-onsite-end xml-onsite-end rss-onsite-in rdf-onsite-in xml-onsite-in rss-offsite-end rdf-offsite-end xml-offsite-end rss-offsite-in rdf-offsite-in xml-offsite-in))) (defun nnrss-discover-feed (url) - "Given a page, find an RSS feed using Mark Pilgrim's -`ultra-liberal rss locator' (http://diveintomark.org/2002/08/15.html)." - + "Given a page, find an RSS feed. +Use Mark Pilgrim's `ultra-liberal rss locator'." (let ((parsed-page (nnrss-fetch url))) - -;; 1. if this url is the rss, use it. + ;; 1. if this url is the rss, use it. (if (nnrss-rss-p parsed-page) (let ((rss-ns (nnrss-get-namespace-prefix parsed-page "http://purl.org/rss/1.0/"))) (nnrss-rss-title-description rss-ns parsed-page url)) -;; 2. look for the (length urllist) 1)) (cdar urllist) (let ((completion-ignore-case t) - (selection + (selection (mapcar (lambda (listinfo) - (cons (cdr (assoc "sitename" listinfo)) - (string-to-int + (cons (cdr (assoc "sitename" listinfo)) + (string-to-number (cdr (assoc "feedid" listinfo))))) feedinfo))) - (cdr (assoc - (completing-read - "Multiple feeds found. Select one: " - selection nil t) urllist))))))))) + (cdr (assoc + (gnus-completing-read + "Multiple feeds found. Select one" + selection t) urllist))))))))) (defun nnrss-rss-p (data) - "Test if data is an RSS feed. Simply ensures that the first -element is rss or rdf." + "Test if DATA is an RSS feed. +Simply ensures that the first element is rss or rdf." (or (eq (caar data) 'rss) (eq (caar data) 'rdf:RDF))) @@ -724,18 +1029,16 @@ element is rss or rdf." that gives the URI for which you want to retrieve the namespace prefix), return the prefix." (let* ((prefix (car (rassoc uri (cadar el)))) - (nslist (if prefix + (nslist (if prefix (split-string (symbol-name prefix) ":"))) (ns (cond ((eq (length nslist) 1) ; no prefix given "") ((eq (length nslist) 2) ; extract prefix (cadr nslist))))) - (if (and ns (not (eq ns ""))) + (if (and ns (not (string= ns ""))) (concat ns ":") ns))) (provide 'nnrss) - ;;; nnrss.el ends here -