1 ;;; registry.el --- Track and remember data items by various fields
3 ;; Copyright (C) 2011 Teodor Zlatanov
5 ;; Author: Teodor Zlatanov <tzz@lifelogs.com>
8 ;; This program is free software; you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation, either version 3 of the License, or
11 ;; (at your option) any later version.
13 ;; This program is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with this program. If not, see <http://www.gnu.org/licenses/>.
23 ;; This library provides a general-purpose EIEIO-based registry
24 ;; database with persistence, initialized with these fields:
26 ;; version: a float, 0.1 currently (don't change it)
28 ;; max-hard: an integer, default 5000000
30 ;; max-soft: an integer, default 50000
32 ;; precious: a list of symbols
34 ;; tracked: a list of symbols
36 ;; tracker: a hashtable tuned for 100 symbols to track (you should
37 ;; only access this with the :lookup2-function and the
38 ;; :lookup2+-function)
40 ;; data: a hashtable with default size 10K and resize threshold 2.0
41 ;; (this reflects the expected usage so override it if you know better)
43 ;; ...plus methods to do all the work: `registry-search',
44 ;; `registry-lookup', `registry-lookup-secondary',
45 ;; `registry-lookup-secondary-value', `registry-insert',
46 ;; `registry-delete', `registry-prune', `registry-size' which see
48 ;; and with the following properties:
50 ;; Every piece of data has a unique ID and some general-purpose fields
51 ;; (F1=D1, F2=D2, F3=(a b c)...) expressed as an alist, e.g.
53 ;; ((F1 D1) (F2 D2) (F3 a b c))
55 ;; Note that whether a field has one or many pieces of data, the data
56 ;; is always a list of values.
58 ;; The user decides which fields are "precious", F2 for example. At
59 ;; PRUNE TIME (when the :prune-function is called), the registry will
60 ;; trim any entries without the F2 field until the size is :max-soft
61 ;; or less. No entries with the F2 field will be removed at PRUNE
64 ;; When an entry is inserted, the registry will reject new entries
65 ;; if they bring it over the max-hard limit, even if they have the F2
68 ;; The user decides which fields are "tracked", F1 for example. Any
69 ;; new entry is then indexed by all the tracked fields so it can be
70 ;; quickly looked up that way. The data is always a list (see example
71 ;; above) and each list element is indexed.
73 ;; Precious and tracked field names must be symbols. All other
74 ;; fields can be any other Emacs Lisp types.
78 (eval-when-compile (require 'ert))
79 (eval-when-compile (require 'cl))
81 (or (ignore-errors (progn
83 (require 'eieio-base)))
84 ;; gnus-fallback-lib/ from gnus/lisp/gnus-fallback-lib
86 (let ((load-path (cons (expand-file-name
87 "gnus-fallback-lib/eieio"
88 (file-name-directory (locate-library "gnus")))
91 (require 'eieio-base)))
93 "eieio not found in `load-path' or gnus-fallback-lib/ directory.")))
95 (defclass registry-db (eieio-persistent)
96 ((version :initarg :version
100 :documentation "The registry version.")
101 (max-hard :initarg :max-hard
105 :documentation "Never accept more than this many elements.")
106 (max-soft :initarg :max-soft
110 :documentation "Prune as much as possible to get to this size.")
111 (tracked :initarg :tracked
114 :documentation "The tracked (indexed) fields, a list of symbols.")
115 (precious :initarg :precious
118 :documentation "The precious fields, a list of symbols.")
119 (tracker :initarg :tracker
120 :initform (make-hash-table :size 100 :rehash-size 2.0)
122 :documentation "The field tracking hashtable.")
124 :initform (make-hash-table :size 10000 :rehash-size 2.0 :test 'equal)
126 :documentation "The data hashtable.")))
128 ;; (defmethod initialize-instance :after ((this registry-db) slots)
129 ;; "Set value of data slot of THIS after initialization."
130 ;; (with-slots (data tracker max-hard max-soft tracked precious version) this
131 ;; (setq data (make-hash-table :size 10000 :rehash-size 2.0 :test 'equal)
132 ;; tracker (make-hash-table :size 100 :rehash-size 2.0)
139 (defmethod registry-lookup ((db registry-db) keys)
140 "Search for KEYS in the registry-db THIS.
141 Returns a alist of the key followed by the entry in a list, not a cons cell."
142 (let ((data (oref db :data)))
146 (when (gethash k data)
147 (list k (gethash k data))))
150 (defmethod registry-lookup-secondary ((db registry-db) tracksym
152 "Search for TRACKSYM in the registry-db THIS.
153 When CREATE is not nil, create the secondary index hashtable if needed."
154 (let ((h (gethash tracksym (oref db :tracker))))
159 (make-hash-table :size 800 :rehash-size 2.0 :test 'equal)
161 (gethash tracksym (oref db :tracker))))))
163 (defmethod registry-lookup-secondary-value ((db registry-db) tracksym val
165 "Search for TRACKSYM with value VAL in the registry-db THIS.
166 When SET is not nil, set it for VAL (use t for an empty list)."
167 ;; either we're asked for creation or there should be an existing index
168 (when (or set (registry-lookup-secondary db tracksym))
169 ;; set the entry if requested,
171 (puthash val (if (eq t set) '() set)
172 (registry-lookup-secondary db tracksym t)))
173 (gethash val (registry-lookup-secondary db tracksym))))
175 (defun registry--match (mode entry check-list)
178 (let ((key (nth 0 (nth 0 check-list)))
179 (vals (cdr-safe (nth 0 check-list)))
181 (while (and key vals (not found))
182 (setq found (case mode
184 (member (car-safe vals) (cdr-safe (assoc key entry))))
186 (string-match (car vals)
189 (cdr-safe (assoc key entry))
191 vals (cdr-safe vals)))
193 (registry--match mode entry (cdr-safe check-list))))))
195 (defmethod registry-search ((db registry-db) &rest spec)
196 "Search for SPEC across the registry-db THIS.
197 For example calling with :member '(a 1 2) will match entry '((a 3 1)).
198 Calling with :all t (any non-nil value) will match all.
199 Calling with :regex '\(a \"h.llo\") will match entry '((a \"hullo\" \"bye\").
200 The test order is to check :all first, then :member, then :regex."
202 (let ((all (plist-get spec :all))
203 (member (plist-get spec :member))
204 (regex (plist-get spec :regex)))
205 (loop for k being the hash-keys of (oref db :data) using (hash-values v)
207 ;; :all non-nil returns all
210 (and member (registry--match :member v member))
212 (and regex (registry--match :regex v regex)))
215 (defmethod registry-delete ((db registry-db) keys assert &rest spec)
216 "Delete KEYS from the registry-db THIS.
217 If KEYS is nil, use SPEC to do a search.
218 Updates the secondary ('tracked') indices as well.
219 With assert non-nil, errors out if the key does not exist already."
220 (let* ((data (oref db :data))
222 (apply 'registry-search db spec)))
223 (tracked (oref db :tracked)))
226 (let ((entry (gethash key data)))
229 "Key %s does not exists in database" key))
230 ;; clean entry from the secondary indices
232 ;; is this tracked symbol indexed?
233 (when (registry-lookup-secondary db tr)
234 ;; for every value in the entry under that key...
235 (dolist (val (cdr-safe (assq tr entry)))
236 (let* ((value-keys (registry-lookup-secondary-value db tr val)))
237 (when (member key value-keys)
238 ;; override the previous value
239 (registry-lookup-secondary-value
241 ;; with the indexed keys MINUS the current key
242 ;; (we pass t when the list is empty)
243 (or (delete key value-keys) t)))))))
247 (defmethod registry-insert ((db registry-db) key entry)
248 "Insert ENTRY under KEY into the registry-db THIS.
249 Updates the secondary ('tracked') indices as well.
250 Errors out if the key exists already."
252 (assert (not (gethash key (oref db :data))) nil
253 "Key already exists in database")
255 (assert (< (registry-size db)
258 "max-hard size limit reached")
261 (puthash key entry (oref db :data))
263 ;; store the secondary indices
264 (dolist (tr (oref db :tracked))
265 ;; for every value in the entry under that key...
266 (dolist (val (cdr-safe (assq tr entry)))
267 (let* ((value-keys (registry-lookup-secondary-value db tr val)))
268 (pushnew key value-keys :test 'equal)
269 (registry-lookup-secondary-value db tr val value-keys))))
272 (defmethod registry-size ((db registry-db))
273 "Returns the size of the registry-db object THIS.
274 This is the key count of the :data slot."
275 (hash-table-count (oref db :data)))
277 (defmethod registry-prune ((db registry-db))
278 "Prunes the registry-db object THIS.
279 Removes only entries without the :precious keys."
280 (let* ((precious (oref db :precious))
281 (precious-p (lambda (entry-key) (cdr (memq (car entry-key) precious))))
282 (data (oref db :data))
283 (limit (oref db :max-soft))
284 (size (registry-size db))
285 (candidates (loop for k being the hash-keys of data
286 using (hash-values v)
287 when (notany precious-p v)
289 (candidates-count (length candidates))
290 ;; are we over max-soft?
291 (prune-needed (> size limit)))
293 ;; while we have more candidates than we need to remove...
294 (while (and (> candidates-count (- size limit)) candidates)
295 (decf candidates-count)
296 (setq candidates (cdr candidates)))
298 (registry-delete db candidates nil)))
300 (ert-deftest registry-instantiation-test ()
301 (should (registry-db "Testing")))
303 (ert-deftest registry-match-test ()
304 (let ((entry '((hello "goodbye" "bye") (blank))))
306 (message "Testing :regex matching")
307 (should (registry--match :regex entry '((hello "nye" "bye"))))
308 (should (registry--match :regex entry '((hello "good"))))
309 (should-not (registry--match :regex entry '((hello "nye"))))
310 (should-not (registry--match :regex entry '((hello))))
312 (message "Testing :member matching")
313 (should (registry--match :member entry '((hello "bye"))))
314 (should (registry--match :member entry '((hello "goodbye"))))
315 (should-not (registry--match :member entry '((hello "good"))))
316 (should-not (registry--match :member entry '((hello "nye"))))
317 (should-not (registry--match :member entry '((hello)))))
318 (message "Done with matching testing."))
320 (defun registry-make-testable-db (n &optional name file)
321 (let* ((db (registry-db
323 :file (or file "unused")
325 :max-soft 0 ; keep nothing not precious
326 :precious '(extra more-extra)
327 :tracked '(sender subject groups))))
329 (registry-insert db i `((sender "me")
330 (subject "about you")
331 (more-extra) ; empty data key should be pruned
332 ;; first 5 entries will NOT have this extra data
333 ,@(when (< 5 i) (list (list 'extra "more data")))
334 (groups ,(number-to-string i)))))
337 (ert-deftest registry-usage-test ()
339 (db (registry-make-testable-db n)))
340 (message "size %d" n)
341 (should (= n (registry-size db)))
342 (message "max-hard test")
343 (should-error (registry-insert db "new" '()))
344 (message "Individual lookup")
345 (should (= 58 (caadr (registry-lookup db '(1 58 99)))))
346 (message "Grouped individual lookup")
347 (should (= 3 (length (registry-lookup db '(1 58 99)))))
349 (should (= n (length (registry-search db :all t))))
350 (should (= n (length (registry-search db :member '((sender "me"))))))
351 (message "Secondary index search")
352 (should (= n (length (registry-lookup-secondary-value db 'sender "me"))))
353 (should (equal '(74) (registry-lookup-secondary-value db 'groups "74")))
355 (should (registry-delete db '(1) t))
357 (message "Search after delete")
358 (should (= n (length (registry-search db :all t))))
359 (message "Secondary search after delete")
360 (should (= n (length (registry-lookup-secondary-value db 'sender "me"))))
362 (let* ((tokeep (registry-search db :member '((extra "more data"))))
363 (count (- n (length tokeep)))
364 (pruned (registry-prune db))
365 (prune-count (length pruned)))
366 (message "Expecting to prune %d entries and pruned %d"
368 (should (and (= count 5)
369 (= count prune-count))))
370 (message "Done with usage testing.")))
372 (ert-deftest registry-persistence-test ()
374 (tempfile (make-temp-file "registry-persistence-"))
375 (name "persistence tester")
376 (db (registry-make-testable-db n name tempfile))
378 (message "Saving to %s" tempfile)
379 (eieio-persistent-save db)
380 (setq size (nth 7 (file-attributes tempfile)))
381 (message "Saved to %s: size %d" tempfile size)
384 (insert-file-contents-literally tempfile)
385 (should (looking-at (concat ";; Object "
387 "\n;; EIEIO PERSISTENT OBJECT"))))
388 (message "Reading object back")
389 (setq back (eieio-persistent-read tempfile))
391 (message "Read object back: %d keys, expected %d==%d"
392 (registry-size back) n (registry-size db))
393 (should (= (registry-size back) n))
394 (should (= (registry-size back) (registry-size db)))
395 (delete-file tempfile))
396 (message "Done with persistence testing."))
399 ;;; registry.el ends here