1 ;; ffi-magic.el --- SXEmacs interface to libmagic -*- mode: emacs-lisp -*-
3 ;; Copyright (C) 2008 - 2020 Steve Youngs
5 ;; Author: Steve Youngs <steve@sxemacs.org>
6 ;; Maintainer: Steve Youngs <steve@sxemacs.org>
7 ;; Created: <2008-04-02>
8 ;; Homepage: https://www.sxemacs.org
9 ;; Keywords: ffi, file, magic, extension
11 ;; This file is part of SXEmacs.
13 ;; Redistribution and use in source and binary forms, with or without
14 ;; modification, are permitted provided that the following conditions
17 ;; 1. Redistributions of source code must retain the above copyright
18 ;; notice, this list of conditions and the following disclaimer.
20 ;; 2. Redistributions in binary form must reproduce the above copyright
21 ;; notice, this list of conditions and the following disclaimer in the
22 ;; documentation and/or other materials provided with the distribution.
24 ;; 3. Neither the name of the author nor the names of any contributors
25 ;; may be used to endorse or promote products derived from this
26 ;; software without specific prior written permission.
28 ;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
29 ;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
30 ;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
31 ;; DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 ;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 ;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 ;; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35 ;; BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
36 ;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
37 ;; OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
38 ;; IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 ;; (magic:file (expand-file-name "about.el" lisp-directory))
43 ;; => "Lisp/Scheme program, ISO-8859 text"
45 ;; That's the vanilla use, however there's more!
46 ;; See `C-h f magic:file RET'.
48 ;; If you'd like to use magic for coding system detection for
49 ;; #'find-file, put...
51 ;; (magic:find-file-magic-alist-enable)
53 ;; in your init.el. You _might_ not want that though because you'd
54 ;; be surprised at how little it takes for libmagic to believe a
55 ;; file is binary. A single character can trigger it. To see for
56 ;; yourself, visit a file in SXEmacs that you know file(1) reports
57 ;; as normal text and do: `C-u M-: (format "%c" #o177)', save
58 ;; it, and go see what file(1) reports now.
60 ;; Steps are now taken to guard against this sort of thing so it
61 ;; should be quite safe to enable this.
66 ;; #'magic:file-audio-p
67 ;; #'magic:file-video-p
68 ;; #'magic:file-image-p
69 ;; #'magic:file-text-p
72 ;;; Todo: <<< ffi-magic notes
74 ;; o Optionally output MIME type strings like "text/plain",
75 ;; "applicaton/octet-stream"
76 ;; Ah, yep, can do that now, as of 2020-02-23
78 ;; o magic_getparam(), magic_setparam(), magic_list(), magic_compile(),
79 ;; magic_check() are all to be added.
86 ;; Can't do anything without this
89 (defvar ffi-magic-shared nil
90 "Shared context with preloaded magic file, to speed up things.")
92 ;;;###autoload(put 'ffi-magic-persistent-flags 'risky-local-variable t)
93 (defvar ffi-magic-persistent-flags '(:error)
94 "A list of libmagic option flags to always set.
96 A good thing to keep on this list is ':error', the default. It will
97 ensure that any errors, from either libmagic or the shell, will be
98 captured in, and thus reportable via, `magic:error'.
100 If you were to get an error from libmagic \(or the shell\) and you
101 _didn't_ have the ':error' flag set, one of two things would happen
102 depending on the version of libmagic you have. Either it'd be lost to
103 stderr, or it'd hit stdout but wouldn't be an error from our POV. In
104 both cases `magic:error' would return nil.
106 See `ffi-magic-options-list' for what can be included here.")
109 (unless (ffi-find-named-type 'magic_t)
110 (define-ffi-type magic_t pointer))
112 (unless (ffi-find-named-type 'magic-options)
113 (define-ffi-enum magic-options
114 (:none #x0000000) ; No flags
115 (:debug #x0000001) ; Turn on debugging
116 (:symlink #x0000002) ; Follow symlinks
117 (:compress #x0000004) ; Check inside compressed files
118 (:devices #x0000008) ; Look at contents of devices
119 (:mime-type #x0000010) ; Return the MIME type
120 (:continue #x0000020) ; Return all matches
121 (:check #x0000040) ; Print warnings to stderr
122 (:preserve-atime #x0000080) ; Restore access time on exit
123 (:raw #x0000100) ; Don't translate unprintable chars
124 (:error #x0000200) ; Handle ENOENT etc as real errors
125 (:mime-encoding #x0000400) ; Return MIME encoding
126 (:mime #x0000410) ; :mime-type + :mime-encoding
127 (:apple #x0000800) ; Return Apple creator and type
128 (:extension #x1000000) ; Return a / separated list of
130 (:compress-transp #x2000000) ; Check inside compressed files but
131 ; not report compression
132 (:nodesc #x1001210) ; :extension + :mime + :apple
133 (:no-check-compress #x0001000) ; Don't check for compressed files
134 (:no-check-tar #x0002000) ; Don't check for tar files
135 (:no-check-soft #x0004000) ; Don't check magic entries
136 (:no-check-apptype #x0008000) ; Don't check application type
137 (:no-check-elf #x0010000) ; Don't check for elf details
138 (:no-check-text #x0020000) ; Don't check for text files
139 (:no-check-cdf #x0040000) ; Don't check for cdf files
140 (:no-check-csv #x0080000) ; Don't check for CSV files
141 (:no-check-tokens #x0100000) ; Don't check tokens
142 (:no-check-encoding #x0200000) ; Don't check for text encodings
143 (:no-check-json #x0400000))) ; Don't check for JSON files
145 (defvar ffi-magic-options-list
146 (mapfam #'car :mode 'keyw (ffi-enum-values 'magic-options))
147 "List of possible option flags.
149 These flags are used to influence the results of querying the magic
150 db. See below for a few little \"quirks\".
155 :debug Turn on debugging
156 :symlink Follow symlinks
157 :compress Check inside compressed files
158 :devices Look at contents of devices
159 :mime-type Return the MIME type
160 :continue Return all matches
161 :check Print warnings to stderr
162 :preserve-atime Restore access time on exit
163 :raw Don't translate unprintable chars
164 :error Handle ENOENT etc as real errors
165 :mime-encoding Return MIME encoding
166 :mime Alias for :mime-type + :mime-encoding
167 :apple Return Apple creator and type
168 :extension Return a / separated list of
170 :compress-transp Check inside compressed files but
171 not report compression
172 :nodesc :extension + :mime + :apple
173 :no-check-compress Don't check for compressed files
174 :no-check-tar Don't check for tar files
175 :no-check-soft Don't check magic entries
176 :no-check-apptype Don't check application type
177 :no-check-elf Don't check for elf details
178 :no-check-text Don't check for text files
179 :no-check-cdf Don't check for cdf files
180 :no-check-csv Don't check for CSV files
181 :no-check-tokens Don't check tokens
182 :no-check-encoding Don't check for text encodings
183 :no-check-json Don't check for JSON files
185 A couple of points to note:
187 If you want to pretend that you didn't access the files you just
188 accessed you can use ':preserve-atime'. Be aware that it only works
189 on systems that support utime(3) or utimes(2).
191 Using ':none' effectively turns off all flags so setting this along
192 with any other flag is pointless. One possible good reason to use
193 ':none' is if you want to override `ffi-magic-persistent-flags'. Be
194 aware though, that in our eyes, none means none. If you use ':none'
195 that is what you'll get... no flags.
197 The ':mime' flag being an alias for ':mime-type :mime-encoding'
198 means that if you use it, you don't need either of the other two.
200 When using ':debug' it is also good to add ':error' and ':check' as
201 well. Oh, and you've just lost all of your output if you didn't
202 remember to redirect stderr.
204 Trying to do things like use ':no-check-encoding' with ':mime-encoding'
205 at the same time is... well... I'm sure you can see the problem.
207 Using ':apple' only makes sense for apple mac created files. If
208 you're not using a Mac, you probably don't have any. Sure, you can
209 still use this flag on any file, arch, or iron, but non-mac files will
210 return a ever-so-enlightening \"UNKNUNKN\". Have fun with that.
212 ':continue' will output literal \\012 instead of linefeed so you need
213 to add ':raw' to get actual linefeeds.
215 I have absolutely no idea what ':nodesc' might return \(I don't
216 have a Mac, and haven't ever encountered any of these \"extensions\"
217 libmagic talks of\), so... YMMV and all that.")
219 (define-ffi-function magic-open (flag)
220 "Call libmagic's magic_open()."
221 '(function magic_t int)
224 (define-ffi-function magic-close (magic)
225 "Call libmagic's magic_close()."
226 '(function void magic_t)
229 (define-ffi-function magic-load (magic magicfile)
230 "Call libmagic's magic_load()."
231 '(function int magic_t c-string)
234 (define-ffi-function magic-list (magic magicfile)
235 "Call libmagic's magic_list()."
236 '(function int magic_t c-string)
239 (define-ffi-function magic-file (magic file)
240 "Call libmagic's magic_file()."
241 '(function safe-string magic_t c-string)
244 (define-ffi-function magic-error (magic)
245 "Call libmagic's magic_error()."
246 '(function safe-string magic_t)
249 (define-ffi-function magic-errno (magic)
250 "Call libmagic's magic_errno()."
251 '(function int magic_t)
254 (define-ffi-function magic-getflags (magic)
255 "Call libmagic's magic_setflags()."
256 '(function int magic_t)
259 (define-ffi-function magic-setflags (magic flag)
260 "Call libmagic's magic_setflags()."
261 '(function int magic_t int)
264 (define-ffi-function magic-version ()
265 "Call libmagic's magic_version()."
269 (defconst magic:version (magic-version)
272 (defun magic:version (&optional test ver)
273 "Return libmagic version number \(integer\).
275 With optional argument TEST return t if libmagic version is
276 \(=, <, <=, >, >=\) VER, depending on what TEST.
278 TEST can be one of the following symbols:
282 'lte' Less than or equal to
284 'gte' Greater than or equal to
286 VER can be either an int or a float. The reason is that file\(1\)
287 reports its version as \"5.38\", for example, but the underlying
288 libmagic of the same version reports as \"538\". And because I'm
289 such a nice guy and always thinking of you, I cater for both."
293 (= (/ magic:version 100.0) ver)
294 (= magic:version ver)))
296 (< (/ magic:version 100.0) ver)
297 (< magic:version ver)))
298 (lte (if (floatp ver)
299 (<= (/ magic:version 100.0) ver)
300 (<= magic:version ver)))
302 (> (/ magic:version 100.0) ver)
303 (> magic:version ver)))
304 (gte (if (floatp ver)
305 (>= (/ magic:version 100.0) ver)
306 (>= magic:version ver)))
307 (otherwise (error 'invalid-argument test)))
310 (define-ffi-function magic-descriptor (magic fd)
311 "Call libmagic's magic_descriptor()."
312 '(function safe-string magic_t int)
315 (defun magic:getflags (magic)
316 (magic-getflags magic))
318 ;;;###autoload(put 'ffi-magic-no-safety 'risky-local-variable t)
319 (defvar ffi-magic-no-safety nil
320 "Set to non-nil if you DO NOT want your option flags sanitised.")
322 (defun ffi-magic-sanitise-flags (flags)
323 "Do our best to not let the user shoot themselves in the foot.
325 Argument FLAGS is the list of options given to `magic:file' to influence
328 There are a few cases where using some of those flags is bad or
329 downright crazy. This function tries to help you by taking a few
330 rudimentary precautions:
332 Don't have _any_ flags if you want :none
333 Don't have any duplicates
334 Don't have any superfluous flags \(if you have :mime, you don't need
335 :mime-type or :mime-encoding\)
336 Don't include :mime-encoding if :no-check-encoding is present
337 Don't have any unknown flags.
338 Don't let you :debug without giving you :error and :check.
339 Don't over cook :continue, serve it :raw.
341 However, if you really really want to, you can bypass this sanitation
342 completely by setting `ffi-magic-no-safety' non-nil. "
343 (let ((cleanflags (copy-sequence flags)))
344 (unless ffi-magic-no-safety
345 ;; Did you say ":none"? OK, no flags for you!
346 (and (memq ':none cleanflags) (setq cleanflags nil))
349 (setq cleanflags (remove-duplicates cleanflags))
350 ;; Too much MIME on our hands
351 (when (memq ':mime cleanflags)
352 (delq ':mime-type cleanflags)
353 (delq ':mime-encoding cleanflags))
354 ;; Want encoding, don't wanna check for encoding. How's that
355 ;; supposed to work, Sparky?
356 (when (memq ':no-check-encoding cleanflags)
357 (delq ':mime-encoding cleanflags))
358 ;; I've never heard of that flag
361 (and (not (memq elt ffi-magic-options-list))
362 (delq elt cleanflags)))
364 ;; The :debug tastes better when served with :error and :check
365 (when (memq ':debug cleanflags)
366 (add-to-list 'cleanflags ':error)
367 (add-to-list 'cleanflags ':check))
368 ;; :continue is best served :raw
369 (when (memq ':continue cleanflags)
370 (add-to-list 'cleanflags ':raw)))
371 ;; Warn about any changes
372 (unless (eq (length cleanflags) (length flags))
373 (lwarn 'magic-flags 'warning
374 "*** Your libmagic option flags have been altered! ***
376 Flags you asked for: %s
379 See `display-warning-suppressed-classes' to suppress this warning.
380 See `ffi-magic-sanitise-flags' and `ffi-magic-options-list' to see why
381 you got it in the first place."
383 ;; Return the new and improved squeaky-clean flags
386 (defun ffi-magic-flag-value (flag)
387 "Return numeric value of FLAG."
388 (cdr (assq flag (ffi-enum-values 'magic-options))))
390 (defun magic:setflags (magic &rest flags)
392 (flags (ffi-magic-sanitise-flags (car flags))))
393 ;; Remove any previously set flags
394 (magic-setflags magic 0)
395 ;; Set flags if any survived sanitation
399 (setq f (+ f (ffi-magic-flag-value flg))))
401 (magic-setflags magic f))))
403 (globally-declare-fboundp 'completing-read-multiple)
404 (globally-declare-boundp 'crm-separator)
406 (defun magic:file (file &rest flags)
407 "Return as a string information about FILE using libmagic.
409 FLAGS are optional keys for determining the type of output required.
410 Interactively, they can be set via prefix arg.
412 A second prefix arg will insert the result into the current buffer at
415 The supported flags are:
418 :debug Turn on debugging
419 :symlink Follow symlinks
420 :compress Check inside compressed files
421 :devices Look at contents of devices
422 :mime-type Return the MIME type
423 :continue Return all matches
424 :check Print warnings to stderr
425 :preserve-atime Restore access time on exit
426 :raw Don't translate unprintable chars
427 :error Handle ENOENT etc as real errors
428 :mime-encoding Return MIME encoding
429 :mime Alias for :mime-type + :mime-encoding
430 :apple Return Apple creator and type
431 :extension Return a / separated list of
433 :compress-transp Check inside compressed files but
434 not report compression
435 :nodesc :extension + :mime + :apple
436 :no-check-compress Don't check for compressed files
437 :no-check-tar Don't check for tar files
438 :no-check-soft Don't check magic entries
439 :no-check-apptype Don't check application type
440 :no-check-elf Don't check for elf details
441 :no-check-text Don't check for text files
442 :no-check-cdf Don't check for cdf files
443 :no-check-csv Don't check for CSV files
444 :no-check-tokens Don't check tokens
445 :no-check-encoding Don't check for text encodings
446 :no-check-json Don't check for JSON files
450 \(magic:file \(expand-file-name \"about.el\" lisp-directory\)\)
451 => \"Lisp/Scheme program, ISO-8859 text\"
453 \(magic:file \(expand-file-name \"about.el\" lisp-directory\)
457 \(magic:file \(expand-file-name \"about.el\" lisp-directory\)
461 \(magic:file \(expand-file-name \"about.el\" lisp-directory\)
464 => \"text/x-lisp; charset=iso-8859-1\"
466 \(magic:file \"~/tmp/foo\"\)
467 => \"symbolic link to `a-dummy-file.txt'\"
469 \(magic:file \"~/tmp/foo\" :symlink\)
470 => \"UTF-8 Unicode text\""
473 (read-file-name "File name: " default-directory nil t)
474 (when current-prefix-arg
478 (completing-read-multiple
479 (format "Magic options ('%s' separated): " crm-separator)
481 (mapfam #'symbol-name ffi-magic-options-list))))
482 (if (string-equal (car flags) "")
484 (setq flags (car (mapfam #'intern flags)))))
485 (error 'unimplemented 'crm
486 ;; Bogus, yes, but crm isn't currently listed as a
487 ;; provide in the edit-utils pkg
488 (package-get-package-provider 'avoid))))))
489 (unless ffi-magic-shared
490 (setq ffi-magic-shared (magic-open 0))
491 (magic-load ffi-magic-shared (ffi-null-pointer)))
492 (when ffi-magic-persistent-flags
493 (setq flags (append ffi-magic-persistent-flags flags))
495 (and flags (magic:setflags ffi-magic-shared flags))
497 (let ((ftype (magic-file ffi-magic-shared
498 (expand-file-name file))))
500 (if (eq (car current-prefix-arg) 16)
504 (let ((emsg (magic:error ffi-magic-shared)))
505 (and emsg (error 'ffi-magic-error emsg)))))
507 (define-obsolete-function-alias #'magic:file-type #'magic:file)
509 (defun magic:cleanup ()
510 "Ensure that the magic file is closed on SXEmacs exit.
511 Called from `kill-emacs-hook'."
512 (when ffi-magic-shared
513 (magic-close ffi-magic-shared)
514 (setq ffi-magic-shared nil)))
516 (add-hook 'kill-emacs-hook #'magic:cleanup)
518 (define-error 'ffi-magic-error "%S" 'file-error)
520 (defun magic:error (magic)
521 "Return string of errors/warnings from libmagic.
523 Argument MAGIC is the magic db, normally `ffi-magic-shared'.
525 This is designed to be used as an argument to `error', `warn', and the
526 like. There is a corresponding error datum symbol, `ffi-magic-error'.
530 \(or \(magic:file \"/path/to/file\" :error\)
531 \(error 'ffi-magic-error \(magic:error ffi-magic-shared\)\)\)
533 Note: For `magic:error' to return anything other than nil the libmagic
534 option flag, `:error' must be set. See `ffi-magic-persistent-flags'."
535 (let ((estr (magic-error magic))
536 (enum (magic-errno magic))
537 (sh shell-file-name))
538 (if (not (zerop enum))
539 (format "[%s RC:%d]: %s" sh enum estr)
540 (and estr (format "[libmagic]: %s" estr)))))
543 (defun magic:file-audio-p (file)
544 "Return non-nil if FILE is an audio file.
546 As reported by libmagic. If so, the audio type is returned."
547 (let ((type (magic:file file :mime-type)))
548 (when (string-match #r"^audio/\(.*$\)" type)
549 (substring type (match-beginning 1)))))
552 (defun magic:file-video-p (file)
553 "Return non-nil if FILE is a video file.
555 As reported by libmagic. If so, the video type is returned."
556 (let ((type (magic:file file :mime-type)))
557 (when (string-match #r"^video/\(.*$\)" type)
558 (substring type (match-beginning 1)))))
561 (defun magic:file-image-p (file)
562 "Return non-nil if FILE is an image file.
564 As reported by libmagic. If so, the image type is returned."
565 (let ((type (magic:file file :mime-type)))
566 (when (string-match #r"^image/\(.*$\)" type)
567 (substring type (match-beginning 1)))))
570 (defun magic:file-text-p (file)
571 "Return non-nil if FILE is a text file.
573 As reported by libmagic. If so, the text type is returned."
574 (let ((type (magic:file file :mime-type)))
575 (when (string-match #r"^text/\(.*$\)" type)
576 (substring type (match-beginning 1)))))
578 (defun magic:file-coding-system-p (file)
579 "Return non-nil if FILE is encoded in a known coding system.
581 This will return nil if `coding-system-for-read' had been explicitly
582 set by something else prior. For example, if the user had called
583 `find-file' with a prefix arg."
584 (unless coding-system-for-read
585 (let ((cs (magic:file file :mime-encoding)))
587 (magic:file-text-p file)
588 (find-coding-system (intern cs))))))
590 (defun magic:find-file-noselect (file)
591 (let* ((codesys (intern (magic:file file :mime-encoding)))
592 (coding-system-for-read codesys)
593 (buf (create-file-buffer file))
595 (with-current-buffer buf
596 (insert-file-contents file t)
597 (hack-local-variables)
598 (when (and coding (not (eq coding codesys)))
599 (let ((coding-system-for-read coding))
600 (set-buffer-file-coding-system coding)
601 (insert-file-contents file t nil nil t)))
602 (after-find-file nil t))
606 (defun magic:find-file-magic-alist-enable ()
607 "Enables libmagic backed coding system detection."
608 (add-to-list 'find-file-magic-files-alist
609 (cons 'magic:file-coding-system-p
610 'magic:find-file-noselect) t))
613 ;;; ffi-magic.el ends here