;;; nnir.el --- search mail with various search engines -*- coding: iso-8859-1 -*-
-;; Copyright (C) 1998 Kai Großjohann
+
+;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+;; 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
;; Author: Kai Großjohann <grossjohann@ls6.cs.uni-dortmund.de>
-;; Keywords: news, mail, searching, ir, glimpse, wais, hyrex
+;; Swish-e and Swish++ backends by:
+;; Christoph Conrad <christoph.conrad@gmx.de>.
+;; IMAP backend by: Simon Josefsson <jas@pdc.kth.se>.
+;; IMAP search by: Torsten Hilbrich <torsten.hilbrich <at> gmx.net>
+;; IMAP search improved by Daniel Pittman <daniel@rimspace.net>.
+;; nnmaildir support for Swish++ and Namazu backends by:
+;; Justus Piater <Justus <at> Piater.name>
+
+;; FIXME: This file should be move to ../lisp/ after all copyright assignments
+;; are on file. As of 2008-04-13, we don't have an assignment/disclaimer from
+;; Torsten Hilbrich, but he's willing to sign. I've sent hmm the form.
+;; -- rsteib
+
+;; TODO: Documentation in the Gnus manual
+
+;; From: Reiner Steib
+;; Subject: Re: Including nnir.el
+;; Newsgroups: gmane.emacs.gnus.general
+;; Message-ID: <v9d5dnp6aq.fsf@marauder.physik.uni-ulm.de>
+;; Date: 2006-06-05 22:49:01 GMT
+;;
+;; On Sun, Jun 04 2006, Sascha Wilde wrote:
+;;
+;; > The one thing most hackers like to forget: Documentation. By now the
+;; > documentation is only in the comments at the head of the source, I
+;; > would use it as basis to cook up some minimal texinfo docs.
+;; >
+;; > Where in the existing gnus manual would this fit best?
+
+;; Maybe (info "(gnus)Combined Groups") for a general description.
+;; `gnus-group-make-nnir-group' might be described in (info
+;; "(gnus)Foreign Groups") as well.
-;; This file is not part of GNU Emacs.
+;; Keywords: news mail searching ir
+
+;; This file is part of GNU Emacs.
;; This is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 2, or (at your option)
+;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; with your ideas.
;; What does it do? Well, it allows you to index your mail using some
-;; search engine (freeWAIS-sf, Glimpse and others -- see later),
+;; search engine (freeWAIS-sf, swish-e and others -- see later),
;; then type `G G' in the Group buffer and issue a query to the search
;; engine. You will then get a buffer which shows all articles
;; matching the query, sorted by Retrieval Status Value (score).
;; others doesn't support nnfolder.
;; * It can only search the mail backend's which are supported by one
;; search engine, because of different query languages.
-;; * There are restrictions to the Glimpse setup.
;; * There are restrictions to the Wais setup.
;; * There are restrictions to the imap setup.
;; * gnus-summary-nnir-goto-thread: Fetches whole group first, before
;; this prefix. It defaults to `$HOME/Mail/' (note the trailing
;; slash).
-;; 2. Glimpse
-;;
-;; The code expects you to have one Glimpse index which contains all
-;; your mail files. The Lisp setup involves setting the
-;; `nnir-glimpse-*' variables. The most difficult to understand
-;; variable is probably `nnir-glimpse-remove-prefix', it corresponds
-;; to `nnir-wais-remove-prefix', see above. The `nnir-glimpse-home'
-;; variable should be set to the value of the `-H' option which allows
-;; one to search this Glimpse index. I have indexed my whole home
-;; directory with Glimpse, so I assume a default of `$HOME'.
-
-;; 3. Namazu
+;; 2. Namazu
;;
;; The Namazu backend requires you to have one directory containing all
;; index files, this is controlled by the `nnir-namazu-index-directory'
;; For maximum searching efficiency I have a cron job set to run this
;; command every four hours.
-;; 4. HyREX
+;; 3. HyREX
;;
;; The HyREX backend requires you to have one directory from where all
;; your relative paths are to, if you use them. This directory must be
;; To function the `nnir-hyrex-remove-prefix' variable must also be
;; correct, see the documentation for `nnir-wais-remove-prefix' above.
+;; 4. find-grep
+;;
+;; The find-grep engine simply runs find(1) to locate eligible
+;; articles and searches them with grep(1). This, of course, is much
+;; slower than using a proper search engine but OTOH doesn't require
+;; maintenance of an index and is still faster than using any built-in
+;; means for searching. The method specification of the server to
+;; search must include a directory for this engine to work (E.g.,
+;; `nnml-directory'). The tools must be POSIX compliant. GNU Find
+;; prior to version 4.2.12 (4.2.26 on Linux due to incorrect ARG_MAX
+;; handling) does not work.
+;; ,----
+;; | ;; find-grep configuration for searching the Gnus Cache
+;; |
+;; | (nnml "cache"
+;; | (nnml-get-new-mail nil)
+;; | (nnir-search-engine find-grep)
+;; | (nnml-directory "~/News/cache/")
+;; | (nnml-active-file "~/News/cache/active"))
+;; `----
+
;; Developer information:
;; I have tried to make the code expandable. Basically, it is divided
;; the second element should be the article number, and the third
;; element should be the Retrieval Status Value (RSV) as returned from
;; the search engine. An RSV is the score assigned to the document by
-;; the search engine. For Boolean search engines like Glimpse, the
+;; the search engine. For Boolean search engines, the
;; RSV is always 1000 (or 1 or 100, or whatever you like).
;; The sorting order of the articles in the summary buffer created by
;; Todo, or future ideas:
-;; * Make it so that Glimpse can also be called without `-F'.
-;;
;; * It should be possible to restrict search to certain groups.
;;
;; * There is currently no error checking.
;; * Support compressed mail files. Probably, just stripping off the
;; `.gz' or `.Z' file name extension is sufficient.
;;
-;; * Support a find/grep combination.
-;;
;; * At least for imap, the query is performed twice.
;;
;;; Setup Code:
-(require 'cl)
(require 'nnoo)
(require 'gnus-group)
(require 'gnus-sum)
+(require 'message)
+(require 'gnus-util)
(eval-and-compile
- (require 'gnus-util))
-(eval-when-compile
- (require 'nnimap)
- (autoload 'read-kbd-macro "edmacro" nil t))
+ (require 'cl))
(nnoo-declare nnir)
(nnoo-define-basics nnir)
;;; Developer Extension Variable:
(defvar nnir-engines
- `((glimpse nnir-run-glimpse
- ((group . "Group spec: ")))
- (wais nnir-run-waissearch
+ `((wais nnir-run-waissearch
())
- (excite nnir-run-excite-search
- ())
(imap nnir-run-imap
((criteria
"Search in: " ; Prompt
(namazu nnir-run-namazu
())
(hyrex nnir-run-hyrex
- ((group . "Group spec: "))))
+ ((group . "Group spec: ")))
+ (find-grep nnir-run-find-grep
+ ((grep-options . "Grep options: "))))
"Alist of supported search engines.
Each element in the alist is a three-element list (ENGINE FUNCTION ARGS).
ENGINE is a symbol designating the searching engine. FUNCTION is also
;;; User Customizable Variables:
(defgroup nnir nil
- "Search nnmh and nnml groups in Gnus with Glimpse, freeWAIS-sf, or EWS."
+ "Search nnmh and nnml groups in Gnus with swish-e, freeWAIS-sf, or EWS."
:group 'gnus)
;; Mail backend.
:type '(sexp)
:group 'nnir)
-;; Glimpse engine.
-
-(defcustom nnir-glimpse-program "glimpse"
- "*Name of Glimpse executable."
- :type '(string)
- :group 'nnir)
-
-(defcustom nnir-glimpse-home (getenv "HOME")
- "*Value of `-H' glimpse option.
-`~' and environment variables must be expanded, see the functions
-`expand-file-name' and `substitute-in-file-name'."
- :type '(directory)
- :group 'nnir)
-
-(defcustom nnir-glimpse-remove-prefix (concat (getenv "HOME") "/Mail/")
- "*The prefix to remove from each file name returned by Glimpse
-in order to get a group name (albeit with / instead of .). This is a
-regular expression.
-
-For example, suppose that Glimpse returns file names such as
-\"/home/john/Mail/mail/misc/42\". For this example, use the following
-setting: (setq nnir-glimpse-remove-prefix \"/home/john/Mail/\")
-Note the trailing slash. Removing this prefix gives \"mail/misc/42\".
-`nnir' knows to remove the \"/42\" and to replace \"/\" with \".\" to
-arrive at the correct group name, \"mail.misc\"."
- :type '(regexp)
- :group 'nnir)
-
-(defcustom nnir-glimpse-additional-switches '("-i")
- "*A list of strings, to be given as additional arguments to glimpse.
-The switches `-H', `-W', `-l' and `-y' are always used -- calling
-glimpse without them does not make sense in our situation.
-Suggested elements to put here are `-i' and `-w'.
-
-Note that this should be a list. Ie, do NOT use the following:
- (setq nnir-glimpse-additional-switches \"-i -w\") ; wrong!
-Instead, use this:
- (setq nnir-glimpse-additional-switches '(\"-i\" \"-w\"))"
- :type '(repeat (string))
- :group 'nnir)
-
;; freeWAIS-sf.
(defcustom nnir-wais-program "waissearch"
in order to get a group name (albeit with / instead of .). This is a
regular expression.
-This variable is similar to `nnir-glimpse-remove-prefix', only for Wais,
-not Glimpse."
- :type '(regexp)
- :group 'nnir)
-
-;; EWS (Excite for Web Servers) engine.
-
-(defcustom nnir-excite-aquery-program "aquery.pl"
- "*Name of the EWS query program. Should be `aquery.pl' or a path to same."
- :type '(string)
- :group 'nnir)
-
-(defcustom nnir-excite-collection "Mail"
- "*Name of the EWS collection to search."
- :type '(string)
- :group 'nnir)
-
-(defcustom nnir-excite-remove-prefix (concat (getenv "HOME") "/Mail/")
- "*The prefix to remove from each file name returned by EWS
-in order to get a group name (albeit with / instead of .). This is a
-regular expression.
-
-This variable is very similar to `nnir-glimpse-remove-prefix', except
-that it is for EWS, not Glimpse."
+For example, suppose that Wais returns file names such as
+\"/home/john/Mail/mail/misc/42\". For this example, use the following
+setting: (setq nnir-wais-remove-prefix \"/home/john/Mail/\")
+Note the trailing slash. Removing this prefix gives \"mail/misc/42\".
+`nnir' knows to remove the \"/42\" and to replace \"/\" with \".\" to
+arrive at the correct group name, \"mail.misc\"."
:type '(regexp)
:group 'nnir)
-;; Swish++. Next three variables Copyright (C) 2000, 2001 Christoph
-;; Conrad <christoph.conrad@gmx.de>.
-;; Swish++ home page: http://homepage.mac.com/pauljlucas/software/swish/
-
(defcustom nnir-swish++-configuration-file
(expand-file-name "~/Mail/swish++.conf")
"*Configuration file for swish++."
in order to get a group name (albeit with / instead of .). This is a
regular expression.
-This variable is very similar to `nnir-glimpse-remove-prefix', except
-that it is for swish++, not Glimpse."
+This variable is very similar to `nnir-wais-remove-prefix', except
+that it is for swish++, not Wais."
:type '(regexp)
:group 'nnir)
;; New version: http://www.boe.es/swish-e
;; Variables `nnir-swish-e-index-file', `nnir-swish-e-program' and
;; `nnir-swish-e-additional-switches'
-;; Copyright (C) 2000 Christoph Conrad <christoph.conrad@gmx.de>.
(make-obsolete-variable 'nnir-swish-e-index-file
'nnir-swish-e-index-files)
in order to get a group name (albeit with / instead of .). This is a
regular expression.
-This variable is very similar to `nnir-glimpse-remove-prefix', except
-that it is for swish-e, not Glimpse.
+This variable is very similar to `nnir-wais-remove-prefix', except
+that it is for swish-e, not Wais.
This could be a server parameter."
:type '(regexp)
"*The prefix to remove from each file name returned by Namazu
in order to get a group name (albeit with / instead of .).
-This variable is very similar to `nnir-glimpse-remove-prefix', except
-that it is for Namazu, not Glimpse."
+This variable is very similar to `nnir-wais-remove-prefix', except
+that it is for Namazu, not Wais."
:type '(directory)
:group 'nnir)
(if extra-parms
(setq parms (nnir-read-parms query))
(setq parms (list (cons 'query query))))
+ (add-to-list 'parms (cons 'unique-id (message-unique-id)) t)
(gnus-group-read-ephemeral-group
(concat "nnir:" (prin1-to-string parms)) '(nnir "") t
(cons (current-buffer)
gnus-current-window-configuration)
nil)))
-;; Emacs 19 compatibility?
-(or (fboundp 'kbd) (defalias 'kbd 'read-kbd-macro))
-
(defun nnir-group-mode-hook ()
- (define-key gnus-group-mode-map
- (if (fboundp 'read-kbd-macro)
- (kbd "G G")
- "GG") ; XEmacs 19 compat
+ (define-key gnus-group-mode-map (kbd "G G")
'gnus-group-make-nnir-group))
(add-hook 'gnus-group-mode-hook 'nnir-group-mode-hook)
+;; Why is this needed? Is this for compatibility with old/new gnusae? Using
+;; gnus-group-server instead works for me. -- Justus Piater
(defmacro nnir-group-server (group)
- "Return the server for a foreign newsgroup GROUP.
+ "Return the server for a newsgroup GROUP.
The returned format is as `gnus-server-to-method' needs it. See
`gnus-group-real-prefix' and `gnus-group-real-name'."
`(let ((gname ,group))
(if (string-match "^\\([^:]+\\):" gname)
- (setq gname (match-string 1 gname))
- nil)
- (if (string-match "^\\([^+]+\\)\\+\\(.+\\)$" gname)
- (format "%s:%s" (match-string 1 gname) (match-string 2 gname))
- (concat gname ":"))))
+ (progn
+ (setq gname (match-string 1 gname))
+ (if (string-match "^\\([^+]+\\)\\+\\(.+\\)$" gname)
+ (format "%s:%s" (match-string 1 gname) (match-string 2 gname))
+ (concat gname ":")))
+ (format "%s:%s" (car gnus-select-method) (cadr gnus-select-method)))))
;; Summary mode commands.
(deffoo nnir-retrieve-headers (articles &optional group server fetch-old)
(save-excursion
(let ((artlist (copy-sequence articles))
- (idx 1)
- (art nil)
- (artitem nil)
- (artgroup nil) (artno nil)
- (artrsv nil)
- (artfullgroup nil)
- (novitem nil)
- (novdata nil)
- (foo nil)
- server)
+ art artitem artgroup artno artrsv artfullgroup
+ novitem novdata foo server)
(while (not (null artlist))
(setq art (car artlist))
(or (numberp art)
;; NOV data and prepend to `novdata'
(set-buffer nntp-server-buffer)
(nnir-possibly-change-server server)
- (case (setq foo (gnus-retrieve-headers (list artno) artfullgroup nil))
- (nov
- (goto-char (point-min))
- (setq novitem (nnheader-parse-nov))
- (unless novitem
- (pop-to-buffer nntp-server-buffer)
- (error
- "nnheader-parse-nov returned nil for article %s in group %s"
- artno artfullgroup)))
- (headers
- (goto-char (point-min))
- (setq novitem (nnheader-parse-head))
- (unless novitem
- (pop-to-buffer nntp-server-buffer)
- (error
- "nnheader-parse-head returned nil for article %s in group %s"
- artno artfullgroup)))
- (t (nnheader-report 'nnir "Don't support header type %s." foo)))
- ;; replace article number in original group with article number
+ (let ((gnus-override-method
+ (gnus-server-to-method server)))
+ (case (setq foo (gnus-retrieve-headers (list artno) artfullgroup nil))
+ (nov
+ (goto-char (point-min))
+ (setq novitem (nnheader-parse-nov))
+ (unless novitem
+ (pop-to-buffer nntp-server-buffer)
+ (error
+ "nnheader-parse-nov returned nil for article %s in group %s"
+ artno artfullgroup)))
+ (headers
+ (goto-char (point-min))
+ (setq novitem (nnheader-parse-head))
+ (unless novitem
+ (pop-to-buffer nntp-server-buffer)
+ (error
+ "nnheader-parse-head returned nil for article %s in group %s"
+ artno artfullgroup)))
+ (t (error "Unknown header type %s while requesting article %s of group %s"
+ foo artno artfullgroup))))
+ ;; replace article number in original group with article number
;; in nnir group
- (mail-header-set-number novitem idx)
+ (mail-header-set-number novitem art)
(mail-header-set-from novitem
(mail-header-from novitem))
(mail-header-set-subject
(mail-header-subject novitem)))
;;-(mail-header-set-extra novitem nil)
(push novitem novdata)
- (setq artlist (cdr artlist))
- (setq idx (1+ idx)))
+ (setq artlist (cdr artlist)))
(setq novdata (nreverse novdata))
(set-buffer nntp-server-buffer) (erase-buffer)
- (mapcar 'nnheader-insert-nov novdata)
+ (mapc 'nnheader-insert-nov novdata)
'nov)))
(deffoo nnir-request-article (article
&optional group server to-buffer)
- (save-excursion
- (let* ((artitem (nnir-artlist-article nnir-artlist
- article))
- (artfullgroup (nnir-artitem-group artitem))
- (artno (nnir-artitem-number artitem))
- ;; Bug?
- ;; Why must we bind nntp-server-buffer here? It won't
- ;; work if `buf' is used, say. (Of course, the set-buffer
- ;; line below must then be updated, too.)
- (nntp-server-buffer (or to-buffer nntp-server-buffer)))
- (set-buffer nntp-server-buffer)
- (erase-buffer)
- (message "Requesting article %d from group %s"
- artno artfullgroup)
- (gnus-request-article artno artfullgroup nntp-server-buffer)
- (cons artfullgroup artno))))
+ (if (stringp article)
+ (nnheader-report
+ 'nnir
+ "nnir-retrieve-headers doesn't grok message ids: %s"
+ article)
+ (save-excursion
+ (let* ((artitem (nnir-artlist-article nnir-artlist
+ article))
+ (artfullgroup (nnir-artitem-group artitem))
+ (artno (nnir-artitem-number artitem))
+ ;; Bug?
+ ;; Why must we bind nntp-server-buffer here? It won't
+ ;; work if `buf' is used, say. (Of course, the set-buffer
+ ;; line below must then be updated, too.)
+ (nntp-server-buffer (or to-buffer nntp-server-buffer)))
+ (set-buffer nntp-server-buffer)
+ (erase-buffer)
+ (message "Requesting article %d from group %s"
+ artno artfullgroup)
+ (gnus-request-article artno artfullgroup nntp-server-buffer)
+ (cons artfullgroup artno)))))
(nnoo-define-skeleton nnir)
-;;; Search Engine Interfaces:
-;; Glimpse interface.
-(defun nnir-run-glimpse (query server &optional group)
- "Run given query against glimpse. Returns a vector of (group name, file name)
-pairs (also vectors, actually)."
- (save-excursion
- (let ((artlist nil)
- (groupspec (cdr (assq 'group query)))
- (qstring (cdr (assq 'query query)))
- (prefix (nnir-read-server-parm 'nnir-glimpse-remove-prefix server))
- artno dirnam)
- (when (and group groupspec)
- (error (concat "It does not make sense to use a group spec"
- " with process-marked groups.")))
- (when group
- (setq groupspec (gnus-group-real-name group)))
- (set-buffer (get-buffer-create nnir-tmp-buffer))
- (erase-buffer)
- (if groupspec
- (message "Doing glimpse query %s on %s..." query groupspec)
- (message "Doing glimpse query %s..." query))
- (let* ((cp-list
- `( ,nnir-glimpse-program
- nil ; input from /dev/null
- t ; output
- nil ; don't redisplay
- "-H" ,(nnir-read-server-parm 'nnir-glimpse-home server) ; search home dir
- "-W" ; match pattern in file
- "-l" "-y" ; misc options
- ,@(nnir-read-server-parm 'nnir-glimpse-additional-switches server)
- "-F" ,prefix ; restrict output to mail
- ,qstring ; the query, in glimpse format
- ))
- (exitstatus
- (progn
- (message "%s args: %s" nnir-glimpse-program
- (mapconcat 'identity (cddddr cp-list) " "))
- (apply 'call-process cp-list))))
- (unless (or (null exitstatus)
- (zerop exitstatus))
- (nnheader-report 'nnir "Couldn't run glimpse: %s" exitstatus)
- ;; Glimpse failure reason is in this buffer, show it if
- ;; the user wants it.
- (when (> gnus-verbose 6)
- (display-buffer nnir-tmp-buffer))))
- (when groupspec
- (keep-lines groupspec))
- (if groupspec
- (message "Doing glimpse query %s on %s...done" query groupspec)
- (message "Doing glimpse query %s...done" query))
- (sit-for 0)
- ;; remove superfluous stuff from glimpse output
- (goto-char (point-min))
- (delete-non-matching-lines "/[0-9]+$")
- ;;(delete-matching-lines "\\.overview~?$")
- (goto-char (point-min))
- (while (re-search-forward (concat "^" prefix "\\(.+\\)" "/\\([0-9]\\)+$") nil t)
- ;; replace / with . in group names
- (setq dirnam (substitute ?. ?/ (match-string 1))
- artno (match-string 2))
- (push (vector (nnir-group-full-name dirnam server)
- (string-to-int artno)) artlist))
-
- (sort* artlist
- (function (lambda (x y)
- (if (string-lessp (nnir-artitem-group x)
- (nnir-artitem-group y))
- t
- (< (nnir-artitem-number x)
- (nnir-artitem-number y))))))
- )))
+(defmacro nnir-add-result (dirnam artno score prefix server artlist)
+ "Ask `nnir-compose-result' to construct a result vector,
+and if it is non-nil, add it to artlist."
+ `(let ((result (nnir-compose-result dirnam artno score prefix server)))
+ (when (not (null result))
+ (push result artlist))))
+
+(autoload 'nnmaildir-base-name-to-article-number "nnmaildir")
+
+;; Helper function currently used by the Swish++ and Namazu backends;
+;; perhaps useful for other backends as well
+(defun nnir-compose-result (dirnam article score prefix server)
+ "Extract the group from dirnam, and create a result vector
+ready to be added to the list of search results."
+
+ ;; remove nnir-*-remove-prefix from beginning of dirnam filename
+ (when (string-match (concat "^" prefix) dirnam)
+ (setq dirnam (replace-match "" t t dirnam)))
+
+ (when (file-readable-p (concat prefix dirnam article))
+ ;; remove trailing slash and, for nnmaildir, cur/new/tmp
+ (setq dirnam
+ (substring dirnam 0 (if (string= server "nnmaildir:") -5 -1)))
+
+ ;; Set group to dirnam without any leading dots or slashes,
+ ;; and with all subsequent slashes replaced by dots
+ (let ((group (gnus-replace-in-string
+ (gnus-replace-in-string dirnam "^[./\\]" "" t)
+ "[/\\]" "." t)))
+
+ (vector (nnir-group-full-name group server)
+ (if (string= server "nnmaildir:")
+ (nnmaildir-base-name-to-article-number
+ (substring article 0 (string-match ":" article))
+ group nil)
+ (string-to-number article))
+ (string-to-number score)))))
+
+;;; Search Engine Interfaces:
;; freeWAIS-sf interface.
(defun nnir-run-waissearch (query server &optional group)
(save-excursion
(let ((qstring (cdr (assq 'query query)))
(prefix (nnir-read-server-parm 'nnir-wais-remove-prefix server))
- (artlist nil)
- (score nil) (artno nil) (dirnam nil) (group nil))
+ artlist score artno dirnam group)
(set-buffer (get-buffer-create nnir-tmp-buffer))
(erase-buffer)
(message "Doing WAIS query %s..." query)
dirnam prefix))
(setq group (substitute ?. ?/ (replace-match "" t t dirnam)))
(push (vector (nnir-group-full-name group server)
- (string-to-int artno)
- (string-to-int score))
+ (string-to-number artno)
+ (string-to-number score))
artlist))
(message "Massaging waissearch output...done")
(apply 'vector
(> (nnir-artitem-rsv x)
(nnir-artitem-rsv y)))))))))
-;; EWS (Excite for Web Servers) interface
-(defun nnir-run-excite-search (query server &optional group)
- "Run a given query against EWS. Returns vector of (group name, file name)
-pairs (also vectors, actually)."
- (when group
- (error "Searching specific groups not implemented for EWS."))
- (save-excursion
- (let ((qstring (cdr (assq 'query query)))
- (prefix (nnir-read-server-parm 'nnir-excite-remove-prefix server))
- artlist group article-num article)
- (setq nnir-current-query query)
- (set-buffer (get-buffer-create nnir-tmp-buffer))
- (erase-buffer)
- (message "Doing EWS query %s..." qstring)
- (call-process nnir-excite-aquery-program
- nil ; input from /dev/null
- t ; output to current buffer
- nil ; don't redisplay
- (nnir-read-server-parm 'nnir-excite-collection server)
- (if (string= (substring qstring 0 1) "(")
- qstring
- (format "(concept %s)" qstring)))
- (message "Gathering query output...")
-
- (goto-char (point-min))
- (while (re-search-forward
- "^[0-9]+\\s-[0-9]+\\s-[0-9]+\\s-\\(\\S-*\\)" nil t)
- (setq article (match-string 1))
- (unless (string-match
- (concat "^" (regexp-quote prefix)
- "\\(.*\\)/\\([0-9]+\\)") article)
- (nnheader-report 'nnir "Dir name %s doesn't contain prefix %s"
- article prefix))
- (setq group (substitute ?. ?/ (match-string 1 article)))
- (setq group (nnir-group-full-name group server))
- (setq article-num (match-string 2 article))
- (setq artlist (vconcat artlist (vector (vector group
- (string-to-int article-num)
- 1000)))))
- (message "Gathering query output...done")
- artlist)))
-
-;; IMAP interface. The following function is Copyright (C) 1998 Simon
-;; Josefsson <jas@pdc.kth.se>.
+;; IMAP interface.
;; todo:
;; nnir invokes this two (2) times???!
;; we should not use nnimap at all but open our own server connection
;; send queries as literals
;; handle errors
+(autoload 'nnimap-open-server "nnimap")
+(autoload 'imap-mailbox-select "imap")
+(autoload 'imap-search "imap")
+(autoload 'imap-quote-specials "imap")
+
(defun nnir-run-imap (query srv &optional group-option)
- (require 'imap)
- (require 'nnimap)
+ "Run a search against an IMAP back-end server.
+This uses a custom query language parser; see `nnir-imap-make-query' for
+details on the language and supported extensions"
(save-excursion
(let ((qstring (cdr (assq 'query query)))
(server (cadr (gnus-server-to-method srv)))
(let ((arts 0)
(mbx (gnus-group-real-name group)))
(when (imap-mailbox-select mbx nil buf)
- (mapcar
+ (mapc
(lambda (artnum)
(push (vector group artnum 1) artlist)
(setq arts (1+ arts)))
- (imap-search (concat criteria " \"" qstring "\"") buf))
+ (imap-search (nnir-imap-make-query criteria qstring) buf))
(message "Searching %s... %d matches" mbx arts)))
(message "Searching %s...done" group))
(quit nil))
(reverse artlist))))
-;; Swish++ interface. The following function is Copyright (C) 2000,
-;; 2001 Christoph Conrad <christoph.conrad@gmx.de>.
+(defun nnir-imap-make-query (criteria qstring)
+ "Parse the query string and criteria into an appropriate IMAP search
+expression, returning the string query to make.
+
+This implements a little language designed to return the expected results
+to an arbitrary query string to the end user.
+
+The search is always case-insensitive, as defined by RFC2060, and supports
+the following features (inspired by the Google search input language):
+
+Automatic \"and\" queries
+ If you specify multiple words then they will be treated as an \"and\"
+ expression intended to match all components.
+
+Phrase searches
+ If you wrap your query in double-quotes then it will be treated as a
+ literal string.
+
+Negative terms
+ If you precede a term with \"-\" then it will negate that.
+
+\"OR\" queries
+ If you include an upper-case \"OR\" in your search it will cause the
+ term before it and the term after it to be treated as alternatives.
+
+In future the following will be added to the language:
+ * support for date matches
+ * support for location of text matching within the query
+ * from/to/etc headers
+ * additional search terms
+ * flag based searching
+ * anything else that the RFC supports, basically."
+ ;; Walk through the query and turn it into an IMAP query string.
+ (nnir-imap-query-to-imap criteria (nnir-imap-parse-query qstring)))
+
+
+(defun nnir-imap-query-to-imap (criteria query)
+ "Turn a s-expression format query into IMAP."
+ (mapconcat
+ ;; Turn the expressions into IMAP text
+ (lambda (item)
+ (nnir-imap-expr-to-imap criteria item))
+ ;; The query, already in s-expr format.
+ query
+ ;; Append a space between each expression
+ " "))
+
+
+(defun nnir-imap-expr-to-imap (criteria expr)
+ "Convert EXPR into an IMAP search expression on CRITERIA"
+ ;; What sort of expression is this, eh?
+ (cond
+ ;; Simple string term
+ ((stringp expr)
+ (format "%s \"%s\"" criteria (imap-quote-specials expr)))
+ ;; Trivial term: and
+ ((eq expr 'and) nil)
+ ;; Composite term: or expression
+ ((eq (car-safe expr) 'or)
+ (format "OR %s %s"
+ (nnir-imap-expr-to-imap criteria (second expr))
+ (nnir-imap-expr-to-imap criteria (third expr))))
+ ;; Composite term: just the fax, mam
+ ((eq (car-safe expr) 'not)
+ (format "NOT (%s)" (nnir-imap-query-to-imap criteria (rest expr))))
+ ;; Composite term: just expand it all.
+ ((and (not (null expr)) (listp expr))
+ (format "(%s)" (nnir-imap-query-to-imap criteria expr)))
+ ;; Complex value, give up for now.
+ (t (error "Unhandled input: %S" expr))))
+
+
+(defun nnir-imap-parse-query (string)
+ "Turn STRING into an s-expression based query based on the IMAP
+query language as defined in `nnir-imap-make-query'.
+
+This involves turning individual tokens into higher level terms
+that the search language can then understand and use."
+ (with-temp-buffer
+ ;; Set up the parsing environment.
+ (insert string)
+ (goto-char (point-min))
+ ;; Now, collect the output terms and return them.
+ (let (out)
+ (while (not (nnir-imap-end-of-input))
+ (push (nnir-imap-next-expr) out))
+ (reverse out))))
+
+
+(defun nnir-imap-next-expr (&optional count)
+ "Return the next expression from the current buffer."
+ (let ((term (nnir-imap-next-term count))
+ (next (nnir-imap-peek-symbol)))
+ ;; Are we looking at an 'or' expression?
+ (cond
+ ;; Handle 'expr or expr'
+ ((eq next 'or)
+ (list 'or term (nnir-imap-next-expr 2)))
+ ;; Anything else
+ (t term))))
+
+
+(defun nnir-imap-next-term (&optional count)
+ "Return the next TERM from the current buffer."
+ (let ((term (nnir-imap-next-symbol count)))
+ ;; What sort of term is this?
+ (cond
+ ;; and -- just ignore it
+ ((eq term 'and) 'and)
+ ;; negated term
+ ((eq term 'not) (list 'not (nnir-imap-next-expr)))
+ ;; generic term
+ (t term))))
+
+
+(defun nnir-imap-peek-symbol ()
+ "Return the next symbol from the current buffer, but don't consume it."
+ (save-excursion
+ (nnir-imap-next-symbol)))
+
+(defun nnir-imap-next-symbol (&optional count)
+ "Return the next symbol from the current buffer, or nil if we are
+at the end of the buffer. If supplied COUNT skips some symbols before
+returning the one at the supplied position."
+ (when (and (numberp count) (> count 1))
+ (nnir-imap-next-symbol (1- count)))
+ (let ((case-fold-search t))
+ ;; end of input stream?
+ (unless (nnir-imap-end-of-input)
+ ;; No, return the next symbol from the stream.
+ (cond
+ ;; negated expression -- return it and advance one char.
+ ((looking-at "-") (forward-char 1) 'not)
+ ;; quoted string
+ ((looking-at "\"") (nnir-imap-delimited-string "\""))
+ ;; list expression -- we parse the content and return this as a list.
+ ((looking-at "(")
+ (nnir-imap-parse-query (nnir-imap-delimited-string ")")))
+ ;; keyword input -- return a symbol version
+ ((looking-at "\\band\\b") (forward-char 3) 'and)
+ ((looking-at "\\bor\\b") (forward-char 2) 'or)
+ ((looking-at "\\bnot\\b") (forward-char 3) 'not)
+ ;; Simple, boring keyword
+ (t (let ((start (point))
+ (end (if (search-forward-regexp "[[:blank:]]" nil t)
+ (prog1
+ (match-beginning 0)
+ ;; unskip if we hit a non-blank terminal character.
+ (when (string-match "[^[:blank:]]" (match-string 0))
+ (backward-char 1)))
+ (goto-char (point-max)))))
+ (buffer-substring start end)))))))
+
+(defun nnir-imap-delimited-string (delimiter)
+ "Return a delimited string from the current buffer."
+ (let ((start (point)) end)
+ (forward-char 1) ; skip the first delimiter.
+ (while (not end)
+ (unless (search-forward delimiter nil t)
+ (error "Unmatched delimited input with %s in query" delimiter))
+ (let ((here (point)))
+ (unless (equal (buffer-substring (- here 2) (- here 1)) "\\")
+ (setq end (point)))))
+ (buffer-substring (1+ start) (1- end))))
+
+(defun nnir-imap-end-of-input ()
+ "Are we at the end of input?"
+ (skip-chars-forward "[[:blank:]]")
+ (looking-at "$"))
+
+
+;; Swish++ interface.
;; -cc- Todo
;; Search by
;; - group
;; - file size
;; - group
(defun nnir-run-swish++ (query server &optional group)
- "Run given query against swish++.
+ "Run QUERY against swish++.
Returns a vector of (group name, file name) pairs (also vectors,
actually).
-Tested with swish++ 4.7 on GNU/Linux and with with swish++ 5.0b2 on
+Tested with swish++ 4.7 on GNU/Linux and with swish++ 5.0b2 on
Windows NT 4.0."
(when group
(let ( (qstring (cdr (assq 'query query)))
(groupspec (cdr (assq 'group query)))
(prefix (nnir-read-server-parm 'nnir-swish++-remove-prefix server))
- (artlist nil)
- (score nil) (artno nil) (dirnam nil) (group nil) )
+ artlist
+ ;; nnml-use-compressed-files might be any string, but probably this
+ ;; is sufficient. Note that we can't only use the value of
+ ;; nnml-use-compressed-files because old articles might have been
+ ;; saved with a different value.
+ (article-pattern (if (string= server "nnmaildir:")
+ ":[0-9]+"
+ "^[0-9]+\\(\\.[a-z0-9]+\\)?$"))
+ score artno dirnam group filenam )
(when (equal "" qstring)
(error "swish++: You didn't enter anything."))
(while (re-search-forward
"\\(^[0-9]+\\) \\([^ ]+\\) [0-9]+ \\(.*\\)$" nil t)
(setq score (match-string 1)
- artno (file-name-nondirectory (match-string 2))
- dirnam (file-name-directory (match-string 2)))
+ filenam (match-string 2)
+ artno (file-name-nondirectory filenam)
+ dirnam (file-name-directory filenam))
;; don't match directories
- (when (string-match "^[0-9]+\\(\\.gz\\)?$" artno)
+ (when (string-match article-pattern artno)
(when (not (null dirnam))
;; maybe limit results to matching groups.
(when (or (not groupspec)
(string-match groupspec dirnam))
-
- ;; remove nnir-swish++-remove-prefix from beginning of dirname
- (when (string-match (concat "^" prefix)
- dirnam)
- (setq dirnam (replace-match "" t t dirnam)))
-
- (setq dirnam (substring dirnam 0 -1))
- ;; eliminate all ".", "/", "\" from beginning. Always matches.
- (string-match "^[./\\]*\\(.*\\)$" dirnam)
- ;; "/" -> "."
- (setq group (substitute ?. ?/ (match-string 1 dirnam)))
- ;; "\\" -> "."
- (setq group (substitute ?. ?\\ group))
-
- (push (vector (nnir-group-full-name group server)
- (string-to-int artno)
- (string-to-int score))
- artlist)))))
+ (nnir-add-result dirnam artno score prefix server artlist)))))
(message "Massaging swish++ output...done")
(> (nnir-artitem-rsv x)
(nnir-artitem-rsv y)))))))))
-;; Swish-E interface. The following function is Copyright (C) 2000,
-;; 2001 by Christoph Conrad <christoph.conrad@gmx.de>.
+;; Swish-E interface.
(defun nnir-run-swish-e (query server &optional group)
"Run given query against swish-e.
Returns a vector of (group name, file name) pairs (also vectors,
(prefix
(or (nnir-read-server-parm 'nnir-swish-e-remove-prefix server)
(error "Missing parameter `nnir-swish-e-remove-prefix'")))
- (artlist nil)
- (score nil) (artno nil) (dirnam nil) (group nil) )
+ artlist score artno dirnam group )
(when (equal "" qstring)
(error "swish-e: You didn't enter anything."))
(setq group (substitute ?. ?\\ group))
(push (vector (nnir-group-full-name group server)
- (string-to-int artno)
- (string-to-int score))
+ (string-to-number artno)
+ (string-to-number score))
artlist))))
(message "Massaging swish-e output...done")
(when (string-match prefix dirnam)
(setq dirnam (replace-match "" t t dirnam)))
(push (vector (nnir-group-full-name (substitute ?. ?/ dirnam) server)
- (string-to-int artno)
- (string-to-int score))
+ (string-to-number artno)
+ (string-to-number score))
artlist))
(message "Massaging hyrex-search output...done.")
(apply 'vector
(when group
(error "The Namazu backend cannot search specific groups"))
(save-excursion
- (let (
- (artlist nil)
- (qstring (cdr (assq 'query query)))
+ (let ((article-pattern (if (string= server "nnmaildir:")
+ ":[0-9]+"
+ "^[0-9]+$"))
+ artlist
+ (qstring (cdr (assq 'query query)))
(prefix (nnir-read-server-parm 'nnir-namazu-remove-prefix server))
- (score nil)
- (group nil)
- (article nil)
- (process-environment (copy-sequence process-environment))
- )
+ score group article
+ (process-environment (copy-sequence process-environment)))
(setenv "LC_MESSAGES" "C")
(set-buffer (get-buffer-create nnir-tmp-buffer))
(erase-buffer)
article (file-name-nondirectory (match-string 4)))
;; make sure article and group is sane
- (when (and (string-match "^[0-9]+$" article)
+ (when (and (string-match article-pattern article)
(not (null group)))
- (when (string-match (concat "^" prefix) group)
- (setq group (replace-match "" t t group)))
-
- ;; remove trailing slash from groupname
- (setq group (substring group 0 -1))
-
- ;; stuff results into artlist vector
- (push (vector (nnir-group-full-name (substitute ?. ?/ group) server)
- (string-to-int article)
- (string-to-int score)) artlist)))
+ (nnir-add-result group article score prefix server artlist)))
;; sort artlist by score
(apply 'vector
(> (nnir-artitem-rsv x)
(nnir-artitem-rsv y)))))))))
+(defun nnir-run-find-grep (query server &optional group)
+ "Run find and grep to obtain matching articles."
+ (let* ((method (gnus-server-to-method server))
+ (sym (intern
+ (concat (symbol-name (car method)) "-directory")))
+ (directory (cadr (assoc sym (cddr method))))
+ (regexp (cdr (assoc 'query query)))
+ (grep-options (cdr (assoc 'grep-options query)))
+ artlist)
+ (unless directory
+ (error "No directory found in method specification of server %s"
+ server))
+ (message "Searching %s using find-grep..." (or group server))
+ (save-window-excursion
+ (set-buffer (get-buffer-create nnir-tmp-buffer))
+ (erase-buffer)
+ (if (> gnus-verbose 6)
+ (pop-to-buffer (current-buffer)))
+ (cd directory) ; Using relative paths simplifies postprocessing.
+ (let ((group
+ (if (not group)
+ "."
+ ;; Try accessing the group literally as well as
+ ;; interpreting dots as directory separators so the
+ ;; engine works with plain nnml as well as the Gnus
+ ;; Cache.
+ (find-if 'file-directory-p
+ (let ((group (gnus-group-real-name group)))
+ (list group (gnus-replace-in-string group "\\." "/" t)))))))
+ (unless group
+ (error "Cannot locate directory for group"))
+ (save-excursion
+ (apply
+ 'call-process "find" nil t
+ "find" group "-type" "f" "-name" "[0-9]*" "-exec"
+ "grep"
+ `("-l" ,@(and grep-options (split-string grep-options "\\s-" t))
+ "-e" ,regexp "{}" "+"))))
+
+ ;; Translate relative paths to group names.
+ (while (not (eobp))
+ (let* ((path (split-string
+ (buffer-substring (point) (line-end-position)) "/" t))
+ (art (string-to-number (car (last path)))))
+ (while (string= "." (car path))
+ (setq path (cdr path)))
+ (let ((group (mapconcat 'identity (subseq path 0 -1) ".")))
+ (push (vector (nnir-group-full-name group server) art 0)
+ artlist))
+ (forward-line 1)))
+ (message "Searching %s using find-grep...done" (or group server))
+ artlist)))
+
;;; Util Code:
(defun nnir-read-parms (query)
((and nnir-mail-backend
(gnus-server-equal method nnir-mail-backend))
(symbol-value key))
- ((null nnir-mail-backend)
- (symbol-value key))
(t nil))))
;; (if method
;; (if (assq key (cddr method))
(defun nnir-artlist-artitem-rsv (artlist n)
"Returns from ARTLIST the Retrieval Status Value of the Nth artitem
-(counting from 1)."
+\(counting from 1)."
(nnir-artitem-rsv (nnir-artlist-article artlist n)))
;; unused?
;; from each artitem, extract group component
(setq with-dups (mapcar 'nnir-artitem-group artlist))
;; remove duplicates from above
- (mapcar (function (lambda (x) (add-to-list 'res x)))
+ (mapc (function (lambda (x) (add-to-list 'res x)))
with-dups)
res))