1 ;;; mule-category.el --- category functions for SXEmacs/Mule.
3 ;; Copyright (C) 1992,93,94,95 Free Software Foundation, Inc.
4 ;; Copyright (C) 1995, 1997, 1999 Electrotechnical Laboratory, JAPAN.
5 ;; Licensed to the Free Software Foundation.
6 ;; Copyright (C) 1995 Amdahl Corporation.
7 ;; Copyright (C) 1995 Sun Microsystems.
9 ;; This file is part of SXEmacs.
11 ;; SXEmacs is free software: you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation, either version 3 of the License, or
14 ;; (at your option) any later version.
16 ;; SXEmacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with this program. If not, see <http://www.gnu.org/licenses/>.
26 ;; Functions for working with category tables, which are a particular
27 ;; type of char table. Some function names / arguments should be
28 ;; parallel with syntax tables.
30 ;; Written by Ben Wing <ben@xemacs.org>. The initialization code
31 ;; at the end of this file comes from Mule.
32 ;; Some bugfixes by Jareth Hein <jhod@po.iijnet.or.jp>
36 (defvar defined-category-hashtable (make-hash-table :size 50))
38 (defun define-category (designator doc-string)
39 "Make a new category whose designator is DESIGNATOR.
40 DESIGNATOR should be a visible letter of ' ' thru '~'.
41 STRING is a doc string for the category.
42 Letters of 'a' thru 'z' are already used or kept for the system."
43 (check-argument-type 'category-designator-p designator)
44 (check-argument-type 'stringp doc-string)
45 (puthash designator doc-string defined-category-hashtable))
47 (defun undefine-category (designator)
48 "Undefine DESIGNATOR as a designator for a category."
49 (check-argument-type 'category-designator-p designator)
50 (remhash designator defined-category-hashtable))
52 (defun defined-category-p (designator)
53 "Return non-nil if DESIGNATOR is a designator for a defined category."
54 (and (category-designator-p designator)
55 (gethash designator defined-category-hashtable)))
57 (defun defined-category-list ()
58 "Return a list of the currently defined categories.
59 Categories are given by their designators."
61 (maphash #'(lambda (key value)
62 (setq list (cons key list)))
63 defined-category-hashtable)
66 (defun undefined-category-designator ()
67 "Return an undefined category designator, or nil if there are none."
69 (while (and (< a 127) (not found))
70 (unless (gethash a defined-category-hashtable)
71 (setq found (make-char 'ascii a)))
75 (defun category-doc-string (designator)
76 "Return the doc-string for the category denoted by DESIGNATOR."
77 (check-argument-type 'defined-category-p designator)
78 (gethash designator defined-category-hashtable))
80 (defun modify-category-entry (char-range designator &optional category-table reset)
81 "Add a category to the categories associated with CHAR-RANGE.
82 CHAR-RANGE is a single character or a range of characters,
83 as per `put-char-table'.
84 The category is given by a designator character.
85 The changes are made in CATEGORY-TABLE, which defaults to the current
86 buffer's category table.
87 If optional fourth argument RESET is non-nil, previous categories associated
88 with CHAR-RANGE are removed before adding the specified category."
89 (or category-table (setq category-table (category-table)))
90 (check-argument-type 'category-table-p category-table)
91 (check-argument-type 'defined-category-p designator)
93 ;; clear all existing stuff.
94 (put-char-table char-range nil category-table))
97 ;; make sure that this range has a bit-vector assigned to it
98 (if (not (bit-vector-p value))
99 (setq value (make-bit-vector 95 0))
100 (setq value (copy-sequence value)))
101 ;; set the appropriate bit in that vector.
102 (aset value (- designator 32) 1)
103 ;; put the vector back, thus assuring we have a unique setting for this range
104 (put-char-table key value category-table))
105 category-table char-range))
107 (defun char-category-list (character &optional category-table)
108 "Return a list of the categories that CHARACTER is in.
109 CATEGORY-TABLE defaults to the current buffer's category table.
110 The categories are given by their designators."
111 (or category-table (setq category-table (category-table)))
112 (check-argument-type 'category-table-p category-table)
113 (let ((vec (get-char-table character category-table)))
117 (if (= 1 (aref vec (- a 32)))
118 (setq list (cons (make-char 'ascii a) list)))
122 ;; implemented in C, file chartab.c (97/3/14 jhod@po.iijnet.or.jp)
123 ;(defun char-in-category-p (char category &optional table)
124 ; "Return non-nil if CHAR is in CATEGORY.
125 ;TABLE defaults to the current buffer's category table.
126 ;Categories are specified by their designators."
127 ; (or table (setq table (category-table)))
128 ; (check-argument-type 'category-table-p table)
129 ; (check-argument-type 'category-designator-p category)
130 ; (let ((vec (get-char-table char table)))
132 ; (= 1 (aref vec (- category 32))))))
134 (defun describe-category ()
135 "Describe the category specifications in the category table.
136 The descriptions are inserted in a buffer, which is then displayed."
138 (with-displaying-help-buffer
140 (describe-category-table (category-table) standard-output))))
142 (defun describe-category-table (table stream)
147 (lambda (first last value stream)
148 (if (and (bit-vector-p value)
149 (> (reduce '+ value) 0))
151 (if (equal first last)
152 (cond ((vectorp first)
153 (princ (format "%s, row %d"
159 (princ (charset-name first) stream))
160 (t (princ first stream)))
161 (cond ((vectorp first)
162 (princ (format "%s, rows %d .. %d"
169 (princ (format "%s .. %s" first last)
171 (describe-category-code value stream))))))
173 (lambda (range value)
175 (and (characterp range)
176 (characterp first-char)
177 (eq (char-charset range) (char-charset first-char))
178 (= (char-to-int last-char) (1- (char-to-int range))))
181 (eq (aref range 0) (aref first-char 0))
182 (= (aref last-char 1) (1- (aref range 1))))
183 (equal value prev-val)))
184 (setq last-char range)
187 (funcall describe-one first-char last-char prev-val stream)
188 (setq first-char nil)))
189 (funcall describe-one range range value stream))
193 (funcall describe-one first-char last-char prev-val stream))))
195 (defun describe-category-code (code stream)
196 (let ((standard-output (or stream standard-output)))
197 (princ "\tin categories: ")
198 (if (not (bit-vector-p code))
203 (if (= 1 (aref code i))
205 (if (not already-matched)
206 (setq already-matched t)
208 (princ (int-to-char (+ 32 i)))))
210 (if (not already-matched)
214 (if (= 1 (aref code i))
215 (princ (format "\n\t\tmeaning: %s"
216 (category-doc-string (int-to-char (+ 32 i))))))
220 (defconst predefined-category-list
221 '((latin-iso8859-1 ?l "Latin-1 through Latin-5 character set")
226 (cyrillic-iso8859-5 ?y "Cyrillic character set")
227 (arabic-iso8859-6 ?b "Arabic character set")
228 (greek-iso8859-7 ?g "Greek character set")
229 (hebrew-iso8859-8 ?w "Hebrew character set")
230 (katakana-jisx0201 ?k "Japanese 1-byte Katakana character set")
231 (latin-jisx0201 ?r "Japanese 1-byte Roman character set")
232 (japanese-jisx0208-1978 ?j "Japanese 2-byte character set (old)")
233 (japanese-jisx0208 ?j "Japanese 2-byte character set")
234 (japanese-jisx0212 ?j)
235 (chinese-gb2312 ?c "Chinese GB (China, PRC) 2-byte character set")
236 (chinese-cns11643-1 ?t "Chinese Taiwan (CNS or Big5) 2-byte character set")
237 (chinese-cns11643-2 ?t)
240 (korean-ksc5601 ?h "Hangul (Korean) 2-byte character set")
242 "List of predefined categories.
243 Each element is a list of a charset, a designator, and maybe a doc string.")
246 (define-category ?a "ASCII character set.")
247 (define-category ?l "Latin-1 through Latin-5 character set")
250 (modify-category-entry i ?a)
251 (modify-category-entry i ?l)
253 (setq l predefined-category-list)
255 (if (and (nth 2 (car l))
256 (not (defined-category-p (nth 2 (car l)))))
257 (define-category (nth 1 (car l)) (nth 2 (car l))))
258 (modify-category-entry (car (car l)) (nth 1 (car l)))
261 ;;; Setting word boundary.
263 (setq word-combining-categories
266 (setq word-separating-categories ; (2-byte character sets)
267 '((?A . ?K) ; Alpha numeric - Katakana
268 (?A . ?C) ; Alpha numeric - Chinese
269 (?H . ?A) ; Hiragana - Alpha numeric
270 (?H . ?K) ; Hiragana - Katakana
271 (?H . ?C) ; Hiragana - Chinese
272 (?K . ?A) ; Katakana - Alpha numeric
273 (?K . ?C) ; Katakana - Chinese
274 (?C . ?A) ; Chinese - Alpha numeric
275 (?C . ?K) ; Chinese - Katakana
278 ;;; At the present, I know Japanese and Chinese text can
279 ;;; break line at any point under a restriction of 'kinsoku'.
280 ;;; #### SJT this needs to be set by language environments and probably should
281 ;;; be buffer-local---strategy for dealing with this: check all $language.el
282 ;;; files and also mule-base/$language-utils.el files for variables set;
283 ;;; these should be made buffer local and some kind of a- or p-list of vars
284 ;;; to be set for a language environment created.
285 (defvar word-across-newline "\\(\\cj\\|\\cc\\|\\ct\\)"
286 "Regular expression of such characters which can be a word across newline.")
288 (defvar ascii-char "[\40-\176]")
289 (defvar ascii-space "[ \t]")
290 (defvar ascii-symbols "[\40-\57\72-\100\133-\140\173-\176]")
291 (defvar ascii-numeric "[\60-\71]")
292 (defvar ascii-English-Upper "[\101-\132]")
293 (defvar ascii-English-Lower "[\141-\172]")
294 (defvar ascii-alphanumeric "[\60-\71\101-\132\141-\172]")
296 (defvar kanji-char "\\cj")
297 (defvar kanji-space "
\e$B!!
\e(B")
298 (defvar kanji-symbols "\\cS")
299 (defvar kanji-numeric "[
\e$B#0
\e(B-
\e$B#9
\e(B]")
300 (defvar kanji-English-Upper "[
\e$B#A
\e(B-
\e$B#Z
\e(B]")
301 (defvar kanji-English-Lower "[
\e$B#a
\e(B-
\e$B#z
\e(B]")
302 (defvar kanji-hiragana "\\cH")
303 (defvar kanji-katakana "\\cK")
304 (defvar kanji-Greek-Upper "[
\e$B&!
\e(B-
\e$B&8
\e(B]")
305 (defvar kanji-Greek-Lower "[
\e$B&A
\e(B-
\e$B&X
\e(B]")
306 (defvar kanji-Russian-Upper "[
\e$B'!
\e(B-
\e$B'A
\e(B]")
307 (defvar kanji-Russian-Lower "[
\e$B'Q
\e(B-
\e$B'q
\e(B]")
308 (defvar kanji-Kanji-1st-Level "[
\e$B0!
\e(B-
\e$BOS
\e(B]")
309 (defvar kanji-Kanji-2nd-Level "[
\e$BP!
\e(B-
\e$Bt$
\e(B]")
311 (defvar kanji-kanji-char "\\(\\cH\\|\\cK\\|\\cC\\)")