1 ;;; un-data.el --- basic data for UNICODE.
3 ;; Copyright (C) 1997-1999 Miyashita Hisashi
5 ;; Keywords: mule, multilingual,
6 ;; character set, coding-system, ISO10646, Unicode
8 ;; This file is part of Mule-UCS
10 ;; Mule-UCS is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; Mule-UCS is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
26 ;; This module defines basic data for unicode.
28 ;; Because of the incompatibility between XEmacs and Emacs on
29 ;; number literal, we use cn macro that translates a string in
30 ;; C-notation to number.
33 (require 'trans-util))
35 (defconst unicode-byte-order-mark-lsb (cn "0xFF"))
36 (defconst unicode-byte-order-mark-msb (cn "0xFE"))
37 (defconst unicode-signature (cn "0xFEFF"))
38 (defconst unicode-reverse-signature (cn "0xFFFE"))
39 (defconst unicode-line-separator (cn "0x2028"))
40 (defconst unicode-paragraph-separator (cn "0x2029"))
41 (defconst unicode-ignore-characters
46 (defconst utf-8-signature
51 (defconst unicode-cr (cn "0x000D"))
52 (defconst unicode-lf (cn "0x000A"))
54 (defvar utf-8-ws-dos-signature-regexp
55 "^\xef\xbb\xbf[^\n]*\xd\xa\\([^\xd\xa]\\|\\'\\)")
56 (defvar utf-8-ws-unix-signature-regexp
57 "^\xef\xbb\xbf[^\xd]*\xa\\([^\xd\xa]\\|\\'\\)")
58 (defvar utf-8-ws-mac-signature-regexp
59 "^\xef\xbb\xbf[^\n]*\xd\\([^\xd\xa]\\|\\'\\)")
60 (defvar utf-8-ws-signature-regexp "^\xef\xbb\xbf")
62 (defvar utf-16-le-dos-signature-regexp
63 "^\xff\xfe\\(\xa[^\x0]\\|[^\xa].\\)*\xd\x0\xa\x0")
64 (defvar utf-16-le-unix-signature-regexp
65 "^\xff\xfe\\(\xa[^\x0]\\|[^\xa].\\)*\xa\x0")
66 (defvar utf-16-le-mac-signature-regexp
67 "^\xff\xfe\\(\xa[^\x0]\\|[^\xa].\\)*\xd\x0")
68 (defvar utf-16-le-signature-regexp "^\xff\xfe")
70 (defvar utf-16-be-dos-signature-regexp
71 "^\xfe\xff\\(\x0[^\xa]\\|[^\x0].\\)*\x0\xd\x0\xa")
72 (defvar utf-16-be-unix-signature-regexp
73 "^\xfe\xff\\(\x0[^\xa]\\|[^\x0].\\)*\x0\xa")
74 (defvar utf-16-be-mac-signature-regexp
75 "^\xfe\xff\\(\x0[^\xa]\\|[^\x0].\\)*\x0\xd")
76 (defvar utf-16-be-signature-regexp "^\xfe\xff")
78 (defvar unicode-special-relation-alist
79 (list (cons ?\n unicode-line-separator)))