1 ;;; un-trbase.el --- Basic Translation rule for Unicode.
3 ;; Copyright (C) 1999-2001 MIYASHITA Hisashi
5 ;; Keywords: mule, multilingual,
6 ;; character set, coding-system, ISO/IEC 10646,
9 ;; This file is part of Mule-UCS
11 ;; Mule-UCS is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
16 ;; Mule-UCS is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with this program; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
27 ;;; Because this module only define translation rules,
28 ;;; this should not be byte-compiled alone.
29 ;;; When using this module, you should require this
30 ;;; in eval-when-compile form.
34 (or (featurep 'mule-ucs-unicode)
35 (load-library "unicode"))
39 ; Replacement configuration.
42 (defvar mucs-unicode-default-decode-replacement ??)
43 (defvar mucs-unicode-default-encode-replacement (cn "0xFFFD"))
46 ;; UCS replacement or ignore translation rule.
49 (defvar unicode-not-found-to-replace-or-invalid-assoc
50 `(assoc (char-1 . ucs-generic)
54 unicode-ignore-characters)
55 (all . ,mucs-unicode-default-encode-replacement)
56 (,mucs-unicode-default-decode-replacement . all)))
57 "Translate any values to replacement character or invalid code.
58 If you want to deal with untranslated character, use this translation rule.")
60 (defvar unicode-not-found-to-invalid-assoc
61 '(assoc (char-1 . ucs-generic)
64 "Translate any values to invalid code.
65 If you want to deal with untranslated character, use this translation rule.")
68 ;; Dealing with line separator problem.
71 (defvar lf-vs-cr-assoc
72 `(assoc (char-1 . ucs-generic)
73 ((?\n . ,unicode-cr))))
75 (defvar lf-vs-unicode-line-separator-assoc
76 `(assoc (char-1 . ucs-generic)
77 ((?\r . ,unicode-line-separator))))
80 ; Loader for parts of translation rule.
83 (defvar unicode-charset-library-alist
85 (latin-iso8859-1 . uiso8859-1)
86 (latin-iso8859-2 . uiso8859-2)
87 (latin-iso8859-3 . uiso8859-3)
88 (latin-iso8859-4 . uiso8859-4)
89 (latin-iso8859-13 . uiso8859-13)
90 (latin-iso8859-14 . uiso8859-14)
91 (latin-iso8859-15 . uiso8859-15)
92 (latin-iso8859-16 . uiso8859-16)
93 (cyrillic-iso8859-5 . uiso8859-5)
94 (arabic-iso8859-6 . uiso8859-6)
95 (greek-iso8859-7 . uiso8859-7)
96 (hebrew-iso8859-8 . uiso8859-8)
97 (latin-iso8859-9 . uiso8859-9)
98 (latin-jisx0201 . ujisx0201)
99 (katakana-jisx0201 . ujisx0201)
100 (japanese-jisx0208 . ujisx0208)
101 (japanese-jisx0212 . ujisx0212)
102 (chinese-gb2312 . ugb2312)
103 (chinese-big5-1 . ubig5)
104 (chinese-big5-2 . ubig5)
105 (chinese-cns11643-1 . u-cns-1)
106 (chinese-cns11643-2 . u-cns-2)
107 (chinese-cns11643-3 . u-cns-3)
108 (chinese-cns11643-4 . u-cns-4)
109 (chinese-cns11643-5 . u-cns-5)
110 (chinese-cns11643-6 . u-cns-6)
111 (chinese-cns11643-7 . u-cns-7)
112 (korean-ksc5601 . uksc5601)
114 (mule-unicode-0100-24ff . nil)
115 (mule-unicode-2500-33ff . nil)
116 (mule-unicode-e000-ffff . nil)
117 (thai-tis620 . utis620)
118 (indian-is13194 . uiscii)
119 (ethiopic . uethiopic)
120 (chinese-sisheng . usisheng)
121 (vietnamese-viscii-lower . uviscii)
122 (vietnamese-viscii-upper . uviscii)
123 (tibetan . utibetan)))
125 (defun require-unicode-charset-data (charset)
126 (let ((package (cdr (assq charset unicode-charset-library-alist))))
128 (or (featurep package)
129 (load (concat (file-name-as-directory mucs-data-path)
130 (symbol-name package))
132 (require package)))))
134 ;; load-table & declare basic translation rule.
135 ;; translation definition start
136 ;;; basic translation rule, which can be modified dynamically.
137 (tae-declare-translation
138 'unicode-basic-translation-rule
142 (unicode-get-translation-rule-from-charset
144 (require-unicode-charset-data charset)
145 (tae-declare-translation
147 (symbol-value (get charset 'unicode-assoc))
150 unicode-basic-translation-charset-order-list))
152 t ;; for dynamic modification!
155 ;; below translations are static.
157 (tae-declare-translation
158 'unicode-basic-stream-translation-rule
159 `(| unicode-basic-translation-rule
160 ,unicode-not-found-to-replace-or-invalid-assoc) t)
162 (tae-declare-translation
163 'unicode-basic-non-stream-translation-rule
164 `(| unicode-basic-translation-rule
165 ,unicode-not-found-to-invalid-assoc) t)
167 (tae-declare-translation
168 'unicode-line-separator-translation-rule
169 `(| ,lf-vs-unicode-line-separator-assoc
170 unicode-basic-stream-translation-rule) t)
172 (tae-declare-translation
173 'unicode-mac-line-separator-translation-rule
175 unicode-basic-stream-translation-rule) t)
177 ;; translation rule definition ends here
181 ;;; un-trbase ends here.