1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of SXEmacs
7 SXEmacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 SXEmacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 /* Synched up with: FSF 20.3. Not in FSF. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
32 #include "ui/device.h"
36 /* The various pre-defined charsets. */
38 Lisp_Object Vcharset_ascii;
39 Lisp_Object Vcharset_control_1;
40 Lisp_Object Vcharset_latin_iso8859_1;
41 Lisp_Object Vcharset_latin_iso8859_2;
42 Lisp_Object Vcharset_latin_iso8859_3;
43 Lisp_Object Vcharset_latin_iso8859_4;
44 Lisp_Object Vcharset_thai_tis620;
45 Lisp_Object Vcharset_greek_iso8859_7;
46 Lisp_Object Vcharset_arabic_iso8859_6;
47 Lisp_Object Vcharset_hebrew_iso8859_8;
48 Lisp_Object Vcharset_katakana_jisx0201;
49 Lisp_Object Vcharset_latin_jisx0201;
50 Lisp_Object Vcharset_cyrillic_iso8859_5;
51 Lisp_Object Vcharset_latin_iso8859_9;
52 Lisp_Object Vcharset_japanese_jisx0208_1978;
53 Lisp_Object Vcharset_chinese_gb2312;
54 Lisp_Object Vcharset_japanese_jisx0208;
55 Lisp_Object Vcharset_korean_ksc5601;
56 Lisp_Object Vcharset_japanese_jisx0212;
57 Lisp_Object Vcharset_chinese_cns11643_1;
58 Lisp_Object Vcharset_chinese_cns11643_2;
59 Lisp_Object Vcharset_chinese_big5_1;
60 Lisp_Object Vcharset_chinese_big5_2;
62 #ifdef ENABLE_COMPOSITE_CHARS
63 Lisp_Object Vcharset_composite;
65 /* Hash tables for composite chars. One maps string representing
66 composed chars to their equivalent chars; one goes the
68 Lisp_Object Vcomposite_char_char2string_hash_table;
69 Lisp_Object Vcomposite_char_string2char_hash_table;
71 static int composite_char_row_next;
72 static int composite_char_col_next;
74 #endif /* ENABLE_COMPOSITE_CHARS */
76 struct charset_lookup *chlook;
78 static const struct lrecord_description charset_lookup_description_1[] = {
79 {XD_LISP_OBJECT_ARRAY,
80 offsetof(struct charset_lookup, charset_by_leading_byte),
85 static const struct struct_description charset_lookup_description = {
86 sizeof(struct charset_lookup),
87 charset_lookup_description_1
90 /* Table of number of bytes in the string representation of a character
91 indexed by the first byte of that representation.
93 rep_bytes_by_first_byte(c) is more efficient than the equivalent
94 canonical computation:
96 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
98 const Bytecount rep_bytes_by_first_byte[0xA0] = { /* 0x00 - 0x7f are for straight ASCII */
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
101 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 /* 0x80 - 0x8f are for Dimension-1 official charsets */
108 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109 /* 0x90 - 0x9d are for Dimension-2 official charsets */
110 /* 0x9e is for Dimension-1 private charsets */
111 /* 0x9f is for Dimension-2 private charsets */
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
115 Lisp_Object Qcharsetp;
117 /* Qdoc_string, Qdimension, Qchars defined in general.c */
118 Lisp_Object Qregistry, Qfinal, Qgraphic;
119 Lisp_Object Qdirection;
120 Lisp_Object Qreverse_direction_charset;
121 Lisp_Object Qleading_byte;
122 Lisp_Object Qshort_name, Qlong_name;
138 Qjapanese_jisx0208_1978,
144 Qchinese_cns11643_2, Qchinese_big5_1, Qchinese_big5_2, Qcomposite;
146 Lisp_Object Ql2r, Qr2l;
148 Lisp_Object Vcharset_hash_table;
150 /* Composite characters are characters constructed by overstriking two
151 or more regular characters.
153 1) The old Mule implementation involves storing composite characters
154 in a buffer as a tag followed by all of the actual characters
155 used to make up the composite character. I think this is a bad
156 idea; it greatly complicates code that wants to handle strings
157 one character at a time because it has to deal with the possibility
158 of great big ungainly characters. It's much more reasonable to
159 simply store an index into a table of composite characters.
161 2) The current implementation only allows for 16,384 separate
162 composite characters over the lifetime of the SXEmacs process.
163 This could become a potential problem if the user
164 edited lots of different files that use composite characters.
165 Due to FSF bogosity, increasing the number of allowable
166 composite characters under Mule would decrease the number
167 of possible faces that can exist. Mule already has shrunk
168 this to 2048, and further shrinkage would become uncomfortable.
169 No such problems exist in SXEmacs.
171 Composite characters could be represented as 0x80 C1 C2 C3,
172 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
173 for slightly under 2^20 (one million) composite characters
174 over the SXEmacs process lifetime, and you only need to
175 increase the size of a Mule character from 19 to 21 bits.
176 Or you could use 0x80 C1 C2 C3 C4, allowing for about
177 85 million (slightly over 2^26) composite characters. */
179 /************************************************************************/
180 /* Basic Emchar functions */
181 /************************************************************************/
183 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
184 string in STR. Returns the number of bytes stored.
185 Do not call this directly. Use the macro set_charptr_emchar() instead.
188 Bytecount non_ascii_set_charptr_emchar(Bufbyte * str, Emchar c)
196 BREAKUP_CHAR(c, charset, c1, c2);
197 lb = CHAR_LEADING_BYTE(c);
198 if (LEADING_BYTE_PRIVATE_P(lb))
199 *p++ = PRIVATE_LEADING_BYTE_PREFIX(lb);
201 if (EQ(charset, Vcharset_control_1))
210 /* Return the first character from a Mule-encoded string in STR,
211 assuming it's non-ASCII. Do not call this directly.
212 Use the macro charptr_emchar() instead. */
214 Emchar non_ascii_charptr_emchar(const Bufbyte * str)
216 Bufbyte i0 = *str, i1, i2 = 0;
219 if (i0 == LEADING_BYTE_CONTROL_1)
220 return (Emchar) (*++str - 0x20);
222 if (LEADING_BYTE_PREFIX_P(i0))
227 charset = CHARSET_BY_LEADING_BYTE(i0);
228 if (XCHARSET_DIMENSION(charset) == 2)
231 return MAKE_CHAR(charset, i1, i2);
234 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
235 Do not call this directly. Use the macro valid_char_p() instead. */
237 int non_ascii_valid_char_p(Emchar ch)
241 /* Must have only lowest 19 bits set */
245 f1 = CHAR_FIELD1(ch);
246 f2 = CHAR_FIELD2(ch);
247 f3 = CHAR_FIELD3(ch);
252 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
253 (f2 > MAX_CHAR_FIELD2_OFFICIAL
254 && f2 < MIN_CHAR_FIELD2_PRIVATE)
255 || f2 > MAX_CHAR_FIELD2_PRIVATE)
260 if (f3 != 0x20 && f3 != 0x7F
261 && !(f2 >= MIN_CHAR_FIELD2_PRIVATE
262 && f2 <= MAX_CHAR_FIELD2_PRIVATE))
266 NOTE: This takes advantage of the fact that
267 FIELD2_TO_OFFICIAL_LEADING_BYTE and
268 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
271 CHARSET_BY_LEADING_BYTE(f2 +
272 FIELD2_TO_OFFICIAL_LEADING_BYTE);
273 if (EQ(charset, Qnil))
275 return (XCHARSET_CHARS(charset) == 96);
279 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
280 (f1 > MAX_CHAR_FIELD1_OFFICIAL
281 && f1 < MIN_CHAR_FIELD1_PRIVATE)
282 || f1 > MAX_CHAR_FIELD1_PRIVATE)
284 if (f2 < 0x20 || f3 < 0x20)
287 #ifdef ENABLE_COMPOSITE_CHARS
288 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE ==
289 LEADING_BYTE_COMPOSITE) {
293 Vcomposite_char_char2string_hash_table,
298 #endif /* ENABLE_COMPOSITE_CHARS */
300 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
301 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE
302 && f1 <= MAX_CHAR_FIELD1_PRIVATE))
305 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
307 CHARSET_BY_LEADING_BYTE(f1 +
308 FIELD1_TO_OFFICIAL_LEADING_BYTE);
311 CHARSET_BY_LEADING_BYTE(f1 +
312 FIELD1_TO_PRIVATE_LEADING_BYTE);
314 if (EQ(charset, Qnil))
316 return (XCHARSET_CHARS(charset) == 96);
320 /************************************************************************/
321 /* Basic string functions */
322 /************************************************************************/
324 /* Copy the character pointed to by SRC into DST. Do not call this
325 directly. Use the macro charptr_copy_char() instead.
326 Return the number of bytes copied. */
328 Bytecount non_ascii_charptr_copy_char(const Bufbyte * src, Bufbyte * dst)
330 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE(*src);
332 for (i = bytes; i; i--, dst++, src++)
337 /************************************************************************/
338 /* streams of Emchars */
339 /************************************************************************/
341 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
342 The functions below are not meant to be called directly; use
343 the macros in insdel.h. */
345 Emchar Lstream_get_emchar_1(Lstream * stream, int ch)
347 Bufbyte str[MAX_EMCHAR_LEN];
348 Bufbyte *strptr = str;
351 str[0] = (Bufbyte) ch;
353 for (bytes = REP_BYTES_BY_FIRST_BYTE(ch) - 1; bytes; bytes--) {
354 int c = Lstream_getc(stream);
355 bufpos_checking_assert(c >= 0);
356 *++strptr = (Bufbyte) c;
358 return charptr_emchar(str);
361 int Lstream_fput_emchar(Lstream * stream, Emchar ch)
363 Bufbyte str[MAX_EMCHAR_LEN];
364 Bytecount len = set_charptr_emchar(str, ch);
365 return Lstream_write(stream, str, len);
368 void Lstream_funget_emchar(Lstream * stream, Emchar ch)
370 Bufbyte str[MAX_EMCHAR_LEN];
371 Bytecount len = set_charptr_emchar(str, ch);
372 Lstream_unread(stream, str, len);
375 /************************************************************************/
377 /************************************************************************/
379 static Lisp_Object mark_charset(Lisp_Object obj)
381 Lisp_Charset *cs = XCHARSET(obj);
383 mark_object(cs->short_name);
384 mark_object(cs->long_name);
385 mark_object(cs->doc_string);
386 mark_object(cs->registry);
387 mark_object(cs->ccl_program);
392 print_charset(Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
394 Lisp_Charset *cs = XCHARSET(obj);
397 error("printing unreadable object #<charset %s 0x%x>",
398 string_data(XSYMBOL(CHARSET_NAME(cs))->name),
401 write_c_string("#<charset ", printcharfun);
402 print_internal(CHARSET_NAME(cs), printcharfun, 0);
403 write_c_string(" ", printcharfun);
404 print_internal(CHARSET_SHORT_NAME(cs), printcharfun, 1);
405 write_c_string(" ", printcharfun);
406 print_internal(CHARSET_LONG_NAME(cs), printcharfun, 1);
407 write_c_string(" ", printcharfun);
408 print_internal(CHARSET_DOC_STRING(cs), printcharfun, 1);
409 write_fmt_string(printcharfun, " %s %s cols=%d g%d final='%c' reg=",
410 (CHARSET_TYPE(cs) == CHARSET_TYPE_94 ? "94" :
411 CHARSET_TYPE(cs) == CHARSET_TYPE_96 ? "96" :
412 CHARSET_TYPE(cs) == CHARSET_TYPE_94X94 ? "94x94" :
414 (CHARSET_DIRECTION(cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l"),
415 CHARSET_COLUMNS(cs), CHARSET_GRAPHIC(cs), CHARSET_FINAL(cs));
416 print_internal(CHARSET_REGISTRY(cs), printcharfun, 0);
417 write_fmt_str(printcharfun, " 0x%x>", cs->header.uid);
420 static const struct lrecord_description charset_description[] = {
421 {XD_LISP_OBJECT, offsetof(Lisp_Charset, name)},
422 {XD_LISP_OBJECT, offsetof(Lisp_Charset, doc_string)},
423 {XD_LISP_OBJECT, offsetof(Lisp_Charset, registry)},
424 {XD_LISP_OBJECT, offsetof(Lisp_Charset, short_name)},
425 {XD_LISP_OBJECT, offsetof(Lisp_Charset, long_name)},
426 {XD_LISP_OBJECT, offsetof(Lisp_Charset, reverse_direction_charset)},
427 {XD_LISP_OBJECT, offsetof(Lisp_Charset, ccl_program)},
431 DEFINE_LRECORD_IMPLEMENTATION("charset", charset,
432 mark_charset, print_charset, 0, 0, 0,
433 charset_description, Lisp_Charset);
435 /* Make a new charset. */
436 /* #### SJT Should generic properties be allowed? */
438 make_charset(int id, Lisp_Object name, unsigned char rep_bytes,
439 unsigned char type, unsigned char columns, unsigned char graphic,
440 Bufbyte final, unsigned char direction, Lisp_Object short_name,
441 Lisp_Object long_name, Lisp_Object doc, Lisp_Object reg)
444 Lisp_Charset *cs = alloc_lcrecord_type(Lisp_Charset, &lrecord_charset);
448 XSETCHARSET(obj, cs);
451 CHARSET_NAME(cs) = name;
452 CHARSET_SHORT_NAME(cs) = short_name;
453 CHARSET_LONG_NAME(cs) = long_name;
454 CHARSET_REP_BYTES(cs) = rep_bytes;
455 CHARSET_DIRECTION(cs) = direction;
456 CHARSET_TYPE(cs) = type;
457 CHARSET_COLUMNS(cs) = columns;
458 CHARSET_GRAPHIC(cs) = graphic;
459 CHARSET_FINAL(cs) = final;
460 CHARSET_DOC_STRING(cs) = doc;
461 CHARSET_REGISTRY(cs) = reg;
462 CHARSET_CCL_PROGRAM(cs) = Qnil;
463 CHARSET_REVERSE_DIRECTION_CHARSET(cs) = Qnil;
465 CHARSET_DIMENSION(cs) = (CHARSET_TYPE(cs) == CHARSET_TYPE_94 ||
466 CHARSET_TYPE(cs) == CHARSET_TYPE_96) ? 1 : 2;
467 CHARSET_CHARS(cs) = (CHARSET_TYPE(cs) == CHARSET_TYPE_94 ||
468 CHARSET_TYPE(cs) == CHARSET_TYPE_94X94) ? 94 : 96;
471 /* some charsets do not have final characters. This includes
472 ASCII, Control-1, Composite, and the two faux private
475 (chlook->charset_by_attributes[type][final][direction]));
476 chlook->charset_by_attributes[type][final][direction] = obj;
479 assert(NILP(chlook->charset_by_leading_byte[id - 128]));
480 chlook->charset_by_leading_byte[id - 128] = obj;
482 /* Some charsets are "faux" and don't have names or really exist at
483 all except in the leading-byte table. */
485 Fputhash(name, obj, Vcharset_hash_table);
489 static int get_unallocated_leading_byte(int dimension)
493 if (dimension == 1) {
494 if (chlook->next_allocated_1_byte_leading_byte >
495 (Bufbyte)MAX_LEADING_BYTE_PRIVATE_1)
498 lb = chlook->next_allocated_1_byte_leading_byte++;
499 #if MAX_LEADING_BYTE_PRIVATE_2 == 255
500 } else if (chlook->next_allocated_2_byte_leading_byte == 0) {
503 } else if (chlook->next_allocated_2_byte_leading_byte >
504 (Bufbyte)MAX_LEADING_BYTE_PRIVATE_2) {
508 lb = chlook->next_allocated_2_byte_leading_byte++;
513 ("No more character sets free for this dimension",
514 make_int(dimension));
519 /************************************************************************/
520 /* Basic charset Lisp functions */
521 /************************************************************************/
523 DEFUN("charsetp", Fcharsetp, 1, 1, 0, /*
524 Return non-nil if OBJECT is a charset.
528 return CHARSETP(object) ? Qt : Qnil;
531 DEFUN("find-charset", Ffind_charset, 1, 1, 0, /*
532 Retrieve the charset of the given name.
533 If CHARSET-OR-NAME is a charset object, it is simply returned.
534 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
535 nil is returned. Otherwise the associated charset object is returned.
539 if (CHARSETP(charset_or_name))
540 return charset_or_name;
542 CHECK_SYMBOL(charset_or_name);
543 return Fgethash(charset_or_name, Vcharset_hash_table, Qnil);
546 DEFUN("get-charset", Fget_charset, 1, 1, 0, /*
547 Retrieve the charset of the given name.
548 Same as `find-charset' except an error is signalled if there is no such
549 charset instead of returning nil.
553 Lisp_Object charset = Ffind_charset(name);
556 signal_simple_error("No such charset", name);
560 /* We store the charsets in hash tables with the names as the key and the
561 actual charset object as the value. Occasionally we need to use them
562 in a list format. These routines provide us with that. */
563 struct charset_list_closure {
564 Lisp_Object *charset_list;
568 add_charset_to_list_mapper(Lisp_Object key, Lisp_Object value,
569 void *charset_list_closure)
571 /* This function can GC */
572 struct charset_list_closure *chcl =
573 (struct charset_list_closure *)charset_list_closure;
574 Lisp_Object *charset_list = chcl->charset_list;
576 *charset_list = Fcons(XCHARSET_NAME(value), *charset_list);
580 DEFUN("charset-list", Fcharset_list, 0, 0, 0, /*
581 Return a list of the names of all defined charsets.
585 Lisp_Object charset_list = Qnil;
587 struct charset_list_closure charset_list_closure;
589 GCPRO1(charset_list);
590 charset_list_closure.charset_list = &charset_list;
591 elisp_maphash(add_charset_to_list_mapper, Vcharset_hash_table,
592 &charset_list_closure);
598 DEFUN("charset-name", Fcharset_name, 1, 1, 0, /*
599 Return the name of charset CHARSET.
603 Lisp_Object tmp = Fget_charset(charset);
604 return XCHARSET_NAME(tmp);
607 /* #### SJT Should generic properties be allowed? */
608 DEFUN("make-charset", Fmake_charset, 3, 3, 0, /*
609 Define a new character set.
610 This function is for use with Mule support.
611 NAME is a symbol, the name by which the character set is normally referred.
612 DOC-STRING is a string describing the character set.
613 PROPS is a property list, describing the specific nature of the
614 character set. Recognized properties are:
616 'short-name Short version of the charset name (ex: Latin-1)
617 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
618 'registry A regular expression matching the font registry field for
620 'dimension Number of octets used to index a character in this charset.
621 Either 1 or 2. Defaults to 1.
622 'columns Number of columns used to display a character in this charset.
623 Only used in TTY mode. (Under X, the actual width of a
624 character can be derived from the font used to display the
625 characters.) If unspecified, defaults to the dimension
626 (this is almost always the correct value).
627 'chars Number of characters in each dimension (94 or 96).
628 Defaults to 94. Note that if the dimension is 2, the
629 character set thus described is 94x94 or 96x96.
630 'final Final byte of ISO 2022 escape sequence. Must be
631 supplied. Each combination of (DIMENSION, CHARS) defines a
632 separate namespace for final bytes. Note that ISO
633 2022 restricts the final byte to the range
634 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
635 dimension == 2. Note also that final bytes in the range
636 0x30 - 0x3F are reserved for user-defined (not official)
638 'graphic 0 (use left half of font on output) or 1 (use right half
639 of font on output). Defaults to 0. For example, for
640 a font whose registry is ISO8859-1, the left half
641 (octets 0x20 - 0x7F) is the `ascii' character set, while
642 the right half (octets 0xA0 - 0xFF) is the `latin-1'
643 character set. With 'graphic set to 0, the octets
644 will have their high bit cleared; with it set to 1,
645 the octets will have their high bit set.
646 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
648 'ccl-program A compiled CCL program used to convert a character in
649 this charset into an index into the font. This is in
650 addition to the 'graphic property. The CCL program
651 is passed the octets of the character, with the high
652 bit cleared and set depending upon whether the value
653 of the 'graphic property is 0 or 1.
655 (name, doc_string, props))
657 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
658 int direction = CHARSET_LEFT_TO_RIGHT;
660 Lisp_Object registry = Qnil;
662 Lisp_Object ccl_program = Qnil;
663 Lisp_Object short_name = Qnil, long_name = Qnil;
666 if (!NILP(doc_string))
667 CHECK_STRING(doc_string);
669 charset = Ffind_charset(name);
671 signal_simple_error("Cannot redefine existing charset", name);
674 EXTERNAL_PROPERTY_LIST_LOOP_3(keyword, value, props) {
675 if (EQ(keyword, Qshort_name)) {
680 else if (EQ(keyword, Qlong_name)) {
685 else if (EQ(keyword, Qdimension)) {
687 dimension = XINT(value);
688 if (dimension < 1 || dimension > 2)
690 ("Invalid value for 'dimension",
694 else if (EQ(keyword, Qchars)) {
697 if (chars != 94 && chars != 96)
699 ("Invalid value for 'chars", value);
702 else if (EQ(keyword, Qcolumns)) {
704 columns = XINT(value);
705 if (columns != 1 && columns != 2)
707 ("Invalid value for 'columns",
711 else if (EQ(keyword, Qgraphic)) {
713 graphic = XINT(value);
714 if (graphic < 0 || graphic > 1)
716 ("Invalid value for 'graphic",
720 else if (EQ(keyword, Qregistry)) {
725 else if (EQ(keyword, Qdirection)) {
727 direction = CHARSET_LEFT_TO_RIGHT;
728 else if (EQ(value, Qr2l))
729 direction = CHARSET_RIGHT_TO_LEFT;
732 ("Invalid value for 'direction",
736 else if (EQ(keyword, Qfinal)) {
737 CHECK_CHAR_COERCE_INT(value);
738 final = XCHAR(value);
739 if (final < '0' || final > '~')
741 ("Invalid value for 'final", value);
744 else if (EQ(keyword, Qccl_program)) {
745 struct ccl_program test_ccl;
747 if (setup_ccl_program(&test_ccl, value) < 0)
749 ("Invalid value for 'ccl-program",
755 signal_simple_error("Unrecognized property",
761 error("'final must be specified");
762 if (dimension == 2 && final > 0x5F)
764 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
768 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
770 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
772 if (!NILP(CHARSET_BY_ATTRIBUTES(type, final, CHARSET_LEFT_TO_RIGHT)) ||
773 !NILP(CHARSET_BY_ATTRIBUTES(type, final, CHARSET_RIGHT_TO_LEFT)))
775 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
777 id = get_unallocated_leading_byte(dimension);
779 if (NILP(doc_string))
780 doc_string = build_string("");
783 registry = build_string("");
785 if (NILP(short_name))
786 XSETSTRING(short_name, XSYMBOL(name)->name);
789 long_name = doc_string;
793 charset = make_charset(id, name, dimension + 2, type, columns, graphic,
794 final, direction, short_name, long_name,
795 doc_string, registry);
796 if (!NILP(ccl_program))
797 XCHARSET_CCL_PROGRAM(charset) = ccl_program;
801 DEFUN("make-reverse-direction-charset", Fmake_reverse_direction_charset, 2, 2, 0, /*
802 Make a charset equivalent to CHARSET but which goes in the opposite direction.
803 NEW-NAME is the name of the new charset. Return the new charset.
807 Lisp_Object new_charset = Qnil;
808 int id, dimension, columns, graphic, final;
810 Lisp_Object registry, doc_string, short_name, long_name;
813 charset = Fget_charset(charset);
814 if (!NILP(XCHARSET_REVERSE_DIRECTION_CHARSET(charset)))
816 ("Charset already has reverse-direction charset", charset);
818 CHECK_SYMBOL(new_name);
819 if (!NILP(Ffind_charset(new_name)))
820 signal_simple_error("Cannot redefine existing charset",
823 cs = XCHARSET(charset);
825 type = CHARSET_TYPE(cs);
826 columns = CHARSET_COLUMNS(cs);
827 dimension = CHARSET_DIMENSION(cs);
828 id = get_unallocated_leading_byte(dimension);
830 graphic = CHARSET_GRAPHIC(cs);
831 final = CHARSET_FINAL(cs);
832 direction = CHARSET_RIGHT_TO_LEFT;
833 if (CHARSET_DIRECTION(cs) == CHARSET_RIGHT_TO_LEFT)
834 direction = CHARSET_LEFT_TO_RIGHT;
835 doc_string = CHARSET_DOC_STRING(cs);
836 short_name = CHARSET_SHORT_NAME(cs);
837 long_name = CHARSET_LONG_NAME(cs);
838 registry = CHARSET_REGISTRY(cs);
840 new_charset = make_charset(id, new_name, dimension + 2, type, columns,
841 graphic, final, direction, short_name,
842 long_name, doc_string, registry);
844 CHARSET_REVERSE_DIRECTION_CHARSET(cs) = new_charset;
845 XCHARSET_REVERSE_DIRECTION_CHARSET(new_charset) = charset;
850 /* #### Reverse direction charsets not yet implemented. */
852 DEFUN("charset-reverse-direction-charset", Fcharset_reverse_direction_charset, 1, 1, 0, /*
853 Return the reverse-direction charset parallel to CHARSET, if any.
854 This is the charset with the same properties (in particular, the same
855 dimension, number of characters per dimension, and final byte) as
856 CHARSET but whose characters are displayed in the opposite direction.
860 charset = Fget_charset(charset);
861 return XCHARSET_REVERSE_DIRECTION_CHARSET(charset);
865 DEFUN("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
866 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
867 If DIRECTION is omitted, both directions will be checked (left-to-right
868 will be returned if character sets exist for both directions).
870 (dimension, chars, final, direction))
872 int dm, ch, fi, di = -1;
874 Lisp_Object obj = Qnil;
876 CHECK_INT(dimension);
877 dm = XINT(dimension);
878 if (dm < 1 || dm > 2)
879 signal_simple_error("Invalid value for DIMENSION", dimension);
883 if (ch != 94 && ch != 96)
884 signal_simple_error("Invalid value for CHARS", chars);
886 CHECK_CHAR_COERCE_INT(final);
888 if (fi < '0' || fi > '~')
889 signal_simple_error("Invalid value for FINAL", final);
891 if (EQ(direction, Ql2r))
892 di = CHARSET_LEFT_TO_RIGHT;
893 else if (EQ(direction, Qr2l))
894 di = CHARSET_RIGHT_TO_LEFT;
895 else if (!NILP(direction))
896 signal_simple_error("Invalid value for DIRECTION", direction);
898 if (dm == 2 && fi > 0x5F)
900 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
904 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
906 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
909 obj = CHARSET_BY_ATTRIBUTES(type, fi, CHARSET_LEFT_TO_RIGHT);
912 CHARSET_BY_ATTRIBUTES(type, fi,
913 CHARSET_RIGHT_TO_LEFT);
915 obj = CHARSET_BY_ATTRIBUTES(type, fi, di);
918 return XCHARSET_NAME(obj);
922 DEFUN("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
923 Return short name of CHARSET.
927 Lisp_Object tmp = Fget_charset(charset);
928 return XCHARSET_SHORT_NAME(tmp);
931 DEFUN("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
932 Return long name of CHARSET.
936 Lisp_Object tmp = Fget_charset(charset);
937 return XCHARSET_LONG_NAME(tmp);
940 DEFUN("charset-description", Fcharset_description, 1, 1, 0, /*
941 Return description of CHARSET.
945 Lisp_Object tmp = Fget_charset(charset);
946 return XCHARSET_DOC_STRING(tmp);
949 DEFUN("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
950 Return dimension of CHARSET.
954 Lisp_Object tmp = Fget_charset(charset);
955 return make_int(XCHARSET_DIMENSION(tmp));
958 DEFUN("charset-property", Fcharset_property, 2, 2, 0, /*
959 Return property PROP of CHARSET, a charset object or symbol naming a charset.
960 Recognized properties are those listed in `make-charset', as well as
961 'name and 'doc-string.
967 charset = Fget_charset(charset);
968 cs = XCHARSET(charset);
972 return CHARSET_NAME(cs);
973 if (EQ(prop, Qshort_name))
974 return CHARSET_SHORT_NAME(cs);
975 if (EQ(prop, Qlong_name))
976 return CHARSET_LONG_NAME(cs);
977 if (EQ(prop, Qdoc_string))
978 return CHARSET_DOC_STRING(cs);
979 if (EQ(prop, Qdimension))
980 return make_int(CHARSET_DIMENSION(cs));
981 if (EQ(prop, Qcolumns))
982 return make_int(CHARSET_COLUMNS(cs));
983 if (EQ(prop, Qgraphic))
984 return make_int(CHARSET_GRAPHIC(cs));
985 if (EQ(prop, Qfinal))
986 return make_char(CHARSET_FINAL(cs));
987 if (EQ(prop, Qchars))
988 return make_int(CHARSET_CHARS(cs));
989 if (EQ(prop, Qregistry))
990 return CHARSET_REGISTRY(cs);
991 if (EQ(prop, Qccl_program))
992 return CHARSET_CCL_PROGRAM(cs);
993 if (EQ(prop, Qdirection))
994 return CHARSET_DIRECTION(cs) ==
995 CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
996 if (EQ(prop, Qreverse_direction_charset)) {
997 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET(cs);
998 /* #### Is this translation OK? If so, error checking sufficient? */
999 return CHARSETP(obj) ? XCHARSET_NAME(obj) : obj;
1001 signal_simple_error("Unrecognized charset property name", prop);
1002 return Qnil; /* not reached */
1005 DEFUN("charset-id", Fcharset_id, 1, 1, 0, /*
1006 Return charset identification number of CHARSET.
1010 Lisp_Object tmp = Fget_charset(charset);
1011 return make_int(XCHARSET_LEADING_BYTE(tmp));
1014 /* #### We need to figure out which properties we really want to
1017 DEFUN("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1018 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1020 (charset, ccl_program))
1022 struct ccl_program test_ccl;
1024 charset = Fget_charset(charset);
1025 if (setup_ccl_program(&test_ccl, ccl_program) < 0)
1026 signal_simple_error("Invalid ccl-program", ccl_program);
1027 XCHARSET_CCL_PROGRAM(charset) = ccl_program;
1031 static void invalidate_charset_font_caches(Lisp_Object charset)
1033 /* Invalidate font cache entries for charset on all devices. */
1034 Lisp_Object devcons, concons, hash_table;
1035 DEVICE_LOOP_NO_BREAK(devcons, concons) {
1036 struct device *d = XDEVICE(XCAR(devcons));
1037 hash_table = Fgethash(charset, d->charset_font_cache, Qunbound);
1038 if (!UNBOUNDP(hash_table))
1039 Fclrhash(hash_table);
1043 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
1044 DEFUN("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1045 Set the 'registry property of CHARSET to REGISTRY.
1047 (charset, registry))
1049 charset = Fget_charset(charset);
1050 CHECK_STRING(registry);
1051 XCHARSET_REGISTRY(charset) = registry;
1052 invalidate_charset_font_caches(charset);
1053 face_property_was_changed(Vdefault_face, Qfont, Qglobal);
1057 /************************************************************************/
1058 /* Lisp primitives for working with characters */
1059 /************************************************************************/
1061 DEFUN("make-char", Fmake_char, 2, 3, 0, /*
1062 Make a character from CHARSET and octets ARG1 and ARG2.
1063 ARG2 is required only for characters from two-dimensional charsets.
1064 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1065 character s with caron.
1067 (charset, arg1, arg2))
1071 int lowlim, highlim;
1073 charset = Fget_charset(charset);
1074 cs = XCHARSET(charset);
1076 if (EQ(charset, Vcharset_ascii))
1077 lowlim = 0, highlim = 127;
1078 else if (EQ(charset, Vcharset_control_1))
1079 lowlim = 0, highlim = 31;
1080 else if (CHARSET_CHARS(cs) == 94)
1081 lowlim = 33, highlim = 126;
1082 else /* CHARSET_CHARS (cs) == 96) */
1083 lowlim = 32, highlim = 127;
1086 /* It is useful (and safe, according to Olivier Galibert) to strip
1087 the 8th bit off ARG1 and ARG2 because it allows programmers to
1088 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1089 Latin 2 code of the character. */
1090 a1 = XINT(arg1) & 0x7f;
1091 if (a1 < lowlim || a1 > highlim)
1092 args_out_of_range_3(arg1, make_int(lowlim), make_int(highlim));
1094 if (CHARSET_DIMENSION(cs) == 1) {
1097 ("Charset is of dimension one; second octet must be nil",
1099 return make_char(MAKE_CHAR(charset, a1, 0));
1103 a2 = XINT(arg2) & 0x7f;
1104 if (a2 < lowlim || a2 > highlim)
1105 args_out_of_range_3(arg2, make_int(lowlim), make_int(highlim));
1107 return make_char(MAKE_CHAR(charset, a1, a2));
1110 DEFUN("char-charset", Fchar_charset, 1, 1, 0, /*
1111 Return the character set of CHARACTER.
1117 CHECK_CHAR_COERCE_INT(character);
1119 tmp = CHARSET_BY_LEADING_BYTE(CHAR_LEADING_BYTE(XCHAR(character)));
1120 return XCHARSET_NAME(tmp);
1123 DEFUN("char-octet", Fchar_octet, 1, 2, 0, /*
1124 Return the octet numbered N (should be 0 or 1) of CHARACTER.
1125 N defaults to 0 if omitted.
1129 Lisp_Object charset;
1132 CHECK_CHAR_COERCE_INT(character);
1134 BREAKUP_CHAR(XCHAR(character), charset, octet0, octet1);
1136 if (NILP(n) || EQ(n, Qzero))
1137 return make_int(octet0);
1138 else if (EQ(n, make_int(1)))
1139 return make_int(octet1);
1141 signal_simple_error("Octet number must be 0 or 1", n);
1144 DEFUN("split-char", Fsplit_char, 1, 1, 0, /*
1145 Return list of charset and one or two position-codes of CHARACTER.
1149 /* This function can GC */
1150 struct gcpro gcpro1, gcpro2;
1151 Lisp_Object charset = Qnil;
1152 Lisp_Object rc = Qnil;
1156 GCPRO2(charset, rc);
1157 CHECK_CHAR_COERCE_INT(character);
1159 BREAKUP_CHAR(XCHAR(character), charset, c1, c2);
1161 tmp = Fget_charset(charset);
1162 if (XCHARSET_DIMENSION(tmp) == 2) {
1163 rc = list3(XCHARSET_NAME(charset), make_int(c1), make_int(c2));
1165 rc = list2(XCHARSET_NAME(charset), make_int(c1));
1172 #ifdef ENABLE_COMPOSITE_CHARS
1173 /************************************************************************/
1174 /* composite character functions */
1175 /************************************************************************/
1177 Emchar lookup_composite_char(Bufbyte * str, int len)
1179 Lisp_Object lispstr = make_string(str, len);
1180 Lisp_Object ch = Fgethash(lispstr,
1181 Vcomposite_char_string2char_hash_table,
1186 if (composite_char_row_next >= 128)
1187 signal_simple_error("No more composite chars available",
1190 MAKE_CHAR(Vcharset_composite, composite_char_row_next,
1191 composite_char_col_next);
1192 Fputhash(make_char(emch), lispstr,
1193 Vcomposite_char_char2string_hash_table);
1194 Fputhash(lispstr, make_char(emch),
1195 Vcomposite_char_string2char_hash_table);
1196 composite_char_col_next++;
1197 if (composite_char_col_next >= 128) {
1198 composite_char_col_next = 32;
1199 composite_char_row_next++;
1206 Lisp_Object composite_char_string(Emchar ch)
1208 Lisp_Object str = Fgethash(make_char(ch),
1209 Vcomposite_char_char2string_hash_table,
1211 assert(!UNBOUNDP(str));
1215 xxDEFUN("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1216 Convert a string into a single composite character.
1217 The character is the result of overstriking all the characters in
1222 CHECK_STRING(string);
1223 return make_char(lookup_composite_char(XSTRING_DATA(string),
1224 XSTRING_LENGTH(string)));
1227 xxDEFUN("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1228 Return a string of the characters comprising a composite character.
1236 if (CHAR_LEADING_BYTE(emch) != LEADING_BYTE_COMPOSITE)
1237 signal_simple_error("Must be composite char", ch);
1238 return composite_char_string(emch);
1240 #endif /* ENABLE_COMPOSITE_CHARS */
1242 /************************************************************************/
1243 /* initialization */
1244 /************************************************************************/
1246 void syms_of_mule_charset(void)
1248 INIT_LRECORD_IMPLEMENTATION(charset);
1251 DEFSUBR(Ffind_charset);
1252 DEFSUBR(Fget_charset);
1253 DEFSUBR(Fcharset_list);
1254 DEFSUBR(Fcharset_name);
1255 DEFSUBR(Fmake_charset);
1256 DEFSUBR(Fmake_reverse_direction_charset);
1257 /* DEFSUBR (Freverse_direction_charset); */
1258 DEFSUBR(Fcharset_from_attributes);
1259 DEFSUBR(Fcharset_short_name);
1260 DEFSUBR(Fcharset_long_name);
1261 DEFSUBR(Fcharset_description);
1262 DEFSUBR(Fcharset_dimension);
1263 DEFSUBR(Fcharset_property);
1264 DEFSUBR(Fcharset_id);
1265 DEFSUBR(Fset_charset_ccl_program);
1266 DEFSUBR(Fset_charset_registry);
1268 DEFSUBR(Fmake_char);
1269 DEFSUBR(Fchar_charset);
1270 DEFSUBR(Fchar_octet);
1271 DEFSUBR(Fsplit_char);
1273 #ifdef ENABLE_COMPOSITE_CHARS
1274 DEFSUBR(Fmake_composite_char);
1275 DEFSUBR(Fcomposite_char_string);
1278 defsymbol(&Qcharsetp, "charsetp");
1279 defsymbol(&Qregistry, "registry");
1280 defsymbol(&Qfinal, "final");
1281 defsymbol(&Qgraphic, "graphic");
1282 defsymbol(&Qdirection, "direction");
1283 defsymbol(&Qreverse_direction_charset, "reverse-direction-charset");
1284 defsymbol(&Qshort_name, "short-name");
1285 defsymbol(&Qlong_name, "long-name");
1287 defsymbol(&Ql2r, "l2r");
1288 defsymbol(&Qr2l, "r2l");
1290 /* Charsets, compatible with FSF 20.3
1291 Naming convention is Script-Charset[-Edition] */
1292 defsymbol(&Qascii, "ascii");
1293 defsymbol(&Qcontrol_1, "control-1");
1294 defsymbol(&Qlatin_iso8859_1, "latin-iso8859-1");
1295 defsymbol(&Qlatin_iso8859_2, "latin-iso8859-2");
1296 defsymbol(&Qlatin_iso8859_3, "latin-iso8859-3");
1297 defsymbol(&Qlatin_iso8859_4, "latin-iso8859-4");
1298 defsymbol(&Qthai_tis620, "thai-tis620");
1299 defsymbol(&Qgreek_iso8859_7, "greek-iso8859-7");
1300 defsymbol(&Qarabic_iso8859_6, "arabic-iso8859-6");
1301 defsymbol(&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1302 defsymbol(&Qkatakana_jisx0201, "katakana-jisx0201");
1303 defsymbol(&Qlatin_jisx0201, "latin-jisx0201");
1304 defsymbol(&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1305 defsymbol(&Qlatin_iso8859_9, "latin-iso8859-9");
1306 defsymbol(&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1307 defsymbol(&Qchinese_gb2312, "chinese-gb2312");
1308 defsymbol(&Qjapanese_jisx0208, "japanese-jisx0208");
1309 defsymbol(&Qkorean_ksc5601, "korean-ksc5601");
1310 defsymbol(&Qjapanese_jisx0212, "japanese-jisx0212");
1311 defsymbol(&Qchinese_cns11643_1, "chinese-cns11643-1");
1312 defsymbol(&Qchinese_cns11643_2, "chinese-cns11643-2");
1313 defsymbol(&Qchinese_big5_1, "chinese-big5-1");
1314 defsymbol(&Qchinese_big5_2, "chinese-big5-2");
1316 defsymbol(&Qcomposite, "composite");
1319 void vars_of_mule_charset(void)
1323 chlook = xnew_and_zero(struct charset_lookup); /* zero for Purify. */
1324 dump_add_root_struct_ptr(&chlook, &charset_lookup_description);
1326 /* Table of charsets indexed by leading byte. */
1327 for (i = 0; i < countof(chlook->charset_by_leading_byte); i++)
1328 chlook->charset_by_leading_byte[i] = Qnil;
1330 /* Table of charsets indexed by type/final-byte/direction. */
1331 for (i = 0; i < countof(chlook->charset_by_attributes); i++)
1332 for (j = 0; j < countof(chlook->charset_by_attributes[0]); j++)
1334 k < countof(chlook->charset_by_attributes[0][0]);
1336 chlook->charset_by_attributes[i][j][k] = Qnil;
1338 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1339 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1342 void complex_vars_of_mule_charset(void)
1344 staticpro(&Vcharset_hash_table);
1345 Vcharset_hash_table =
1346 make_lisp_hash_table(50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1348 /* Predefined character sets. We store them into variables for
1351 staticpro(&Vcharset_ascii);
1353 make_charset(LEADING_BYTE_ASCII, Qascii, 1,
1354 CHARSET_TYPE_94, 1, 0, 'B',
1355 CHARSET_LEFT_TO_RIGHT,
1356 build_string("ASCII"),
1357 build_string("ASCII)"),
1358 build_string("ASCII (ISO646 IRV)"),
1359 build_string("\\(iso8859-[0-9]*\\|-ascii\\)"));
1360 staticpro(&Vcharset_control_1);
1361 Vcharset_control_1 =
1362 make_charset(LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1363 CHARSET_TYPE_94, 1, 1, 0,
1364 CHARSET_LEFT_TO_RIGHT,
1366 build_string("Control characters"),
1367 build_string("Control characters 128-191"),
1369 staticpro(&Vcharset_latin_iso8859_1);
1370 Vcharset_latin_iso8859_1 =
1371 make_charset(LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
1372 CHARSET_TYPE_96, 1, 1, 'A',
1373 CHARSET_LEFT_TO_RIGHT,
1374 build_string("Latin-1"),
1375 build_string("ISO8859-1 (Latin-1)"),
1376 build_string("ISO8859-1 (Latin-1)"),
1377 build_string("iso8859-1"));
1378 staticpro(&Vcharset_latin_iso8859_2);
1379 Vcharset_latin_iso8859_2 =
1380 make_charset(LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
1381 CHARSET_TYPE_96, 1, 1, 'B',
1382 CHARSET_LEFT_TO_RIGHT,
1383 build_string("Latin-2"),
1384 build_string("ISO8859-2 (Latin-2)"),
1385 build_string("ISO8859-2 (Latin-2)"),
1386 build_string("iso8859-2"));
1387 staticpro(&Vcharset_latin_iso8859_3);
1388 Vcharset_latin_iso8859_3 =
1389 make_charset(LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
1390 CHARSET_TYPE_96, 1, 1, 'C',
1391 CHARSET_LEFT_TO_RIGHT,
1392 build_string("Latin-3"),
1393 build_string("ISO8859-3 (Latin-3)"),
1394 build_string("ISO8859-3 (Latin-3)"),
1395 build_string("iso8859-3"));
1396 staticpro(&Vcharset_latin_iso8859_4);
1397 Vcharset_latin_iso8859_4 =
1398 make_charset(LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
1399 CHARSET_TYPE_96, 1, 1, 'D',
1400 CHARSET_LEFT_TO_RIGHT,
1401 build_string("Latin-4"),
1402 build_string("ISO8859-4 (Latin-4)"),
1403 build_string("ISO8859-4 (Latin-4)"),
1404 build_string("iso8859-4"));
1405 staticpro(&Vcharset_thai_tis620);
1406 Vcharset_thai_tis620 =
1407 make_charset(LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
1408 CHARSET_TYPE_96, 1, 1, 'T',
1409 CHARSET_LEFT_TO_RIGHT,
1410 build_string("TIS620"),
1411 build_string("TIS620 (Thai)"),
1412 build_string("TIS620.2529 (Thai)"),
1413 build_string("tis620"));
1414 staticpro(&Vcharset_greek_iso8859_7);
1415 Vcharset_greek_iso8859_7 =
1416 make_charset(LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
1417 CHARSET_TYPE_96, 1, 1, 'F',
1418 CHARSET_LEFT_TO_RIGHT,
1419 build_string("ISO8859-7"),
1420 build_string("ISO8859-7 (Greek)"),
1421 build_string("ISO8859-7 (Greek)"),
1422 build_string("iso8859-7"));
1423 staticpro(&Vcharset_arabic_iso8859_6);
1424 Vcharset_arabic_iso8859_6 =
1425 make_charset(LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
1426 CHARSET_TYPE_96, 1, 1, 'G',
1427 CHARSET_RIGHT_TO_LEFT,
1428 build_string("ISO8859-6"),
1429 build_string("ISO8859-6 (Arabic)"),
1430 build_string("ISO8859-6 (Arabic)"),
1431 build_string("iso8859-6"));
1432 staticpro(&Vcharset_hebrew_iso8859_8);
1433 Vcharset_hebrew_iso8859_8 =
1434 make_charset(LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
1435 CHARSET_TYPE_96, 1, 1, 'H',
1436 CHARSET_RIGHT_TO_LEFT,
1437 build_string("ISO8859-8"),
1438 build_string("ISO8859-8 (Hebrew)"),
1439 build_string("ISO8859-8 (Hebrew)"),
1440 build_string("iso8859-8"));
1441 staticpro(&Vcharset_katakana_jisx0201);
1442 Vcharset_katakana_jisx0201 =
1443 make_charset(LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
1444 CHARSET_TYPE_94, 1, 1, 'I',
1445 CHARSET_LEFT_TO_RIGHT,
1446 build_string("JISX0201 Kana"),
1447 build_string("JISX0201.1976 (Japanese Kana)"),
1448 build_string("JISX0201.1976 Japanese Kana"),
1449 build_string("jisx0201.1976"));
1450 staticpro(&Vcharset_latin_jisx0201);
1451 Vcharset_latin_jisx0201 =
1452 make_charset(LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
1453 CHARSET_TYPE_94, 1, 0, 'J',
1454 CHARSET_LEFT_TO_RIGHT,
1455 build_string("JISX0201 Roman"),
1456 build_string("JISX0201.1976 (Japanese Roman)"),
1457 build_string("JISX0201.1976 Japanese Roman"),
1458 build_string("jisx0201.1976"));
1459 staticpro(&Vcharset_cyrillic_iso8859_5);
1460 Vcharset_cyrillic_iso8859_5 =
1461 make_charset(LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
1462 2, CHARSET_TYPE_96, 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
1463 build_string("ISO8859-5"),
1464 build_string("ISO8859-5 (Cyrillic)"),
1465 build_string("ISO8859-5 (Cyrillic)"),
1466 build_string("iso8859-5"));
1467 staticpro(&Vcharset_latin_iso8859_9);
1468 Vcharset_latin_iso8859_9 =
1469 make_charset(LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
1470 CHARSET_TYPE_96, 1, 1, 'M',
1471 CHARSET_LEFT_TO_RIGHT,
1472 build_string("Latin-5"),
1473 build_string("ISO8859-9 (Latin-5)"),
1474 build_string("ISO8859-9 (Latin-5)"),
1475 build_string("iso8859-9"));
1476 staticpro(&Vcharset_japanese_jisx0208_1978);
1477 Vcharset_japanese_jisx0208_1978 =
1478 make_charset(LEADING_BYTE_JAPANESE_JISX0208_1978,
1479 Qjapanese_jisx0208_1978, 3, CHARSET_TYPE_94X94, 2, 0,
1480 '@', CHARSET_LEFT_TO_RIGHT,
1481 build_string("JISX0208.1978"),
1482 build_string("JISX0208.1978 (Japanese)"),
1484 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
1485 build_string("\\(jisx0208\\|jisc6226\\)\\.1978"));
1486 staticpro(&Vcharset_chinese_gb2312);
1487 Vcharset_chinese_gb2312 =
1488 make_charset(LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
1489 CHARSET_TYPE_94X94, 2, 0, 'A',
1490 CHARSET_LEFT_TO_RIGHT,
1491 build_string("GB2312"),
1492 build_string("GB2312)"),
1493 build_string("GB2312 Chinese simplified"),
1494 build_string("gb2312"));
1495 staticpro(&Vcharset_japanese_jisx0208);
1496 Vcharset_japanese_jisx0208 =
1497 make_charset(LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
1498 CHARSET_TYPE_94X94, 2, 0, 'B',
1499 CHARSET_LEFT_TO_RIGHT,
1500 build_string("JISX0208"),
1501 build_string("JISX0208.1983/1990 (Japanese)"),
1502 build_string("JISX0208.1983/1990 Japanese Kanji"),
1503 build_string("jisx0208.19\\(83\\|90\\)"));
1504 staticpro(&Vcharset_korean_ksc5601);
1505 Vcharset_korean_ksc5601 =
1506 make_charset(LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
1507 CHARSET_TYPE_94X94, 2, 0, 'C',
1508 CHARSET_LEFT_TO_RIGHT,
1509 build_string("KSC5601"),
1510 build_string("KSC5601 (Korean"),
1511 build_string("KSC5601 Korean Hangul and Hanja"),
1512 build_string("ksc5601"));
1513 staticpro(&Vcharset_japanese_jisx0212);
1514 Vcharset_japanese_jisx0212 =
1515 make_charset(LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
1516 CHARSET_TYPE_94X94, 2, 0, 'D',
1517 CHARSET_LEFT_TO_RIGHT,
1518 build_string("JISX0212"),
1519 build_string("JISX0212 (Japanese)"),
1520 build_string("JISX0212 Japanese Supplement"),
1521 build_string("jisx0212"));
1523 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1524 staticpro(&Vcharset_chinese_cns11643_1);
1525 Vcharset_chinese_cns11643_1 =
1526 make_charset(LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
1527 3, CHARSET_TYPE_94X94, 2, 0, 'G',
1528 CHARSET_LEFT_TO_RIGHT, build_string("CNS11643-1"),
1529 build_string("CNS11643-1 (Chinese traditional)"),
1530 build_string("CNS 11643 Plane 1 Chinese traditional"),
1531 build_string(CHINESE_CNS_PLANE_RE("1")));
1532 staticpro(&Vcharset_chinese_cns11643_2);
1533 Vcharset_chinese_cns11643_2 =
1534 make_charset(LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
1535 3, CHARSET_TYPE_94X94, 2, 0, 'H',
1536 CHARSET_LEFT_TO_RIGHT, build_string("CNS11643-2"),
1537 build_string("CNS11643-2 (Chinese traditional)"),
1538 build_string("CNS 11643 Plane 2 Chinese traditional"),
1539 build_string(CHINESE_CNS_PLANE_RE("2")));
1540 staticpro(&Vcharset_chinese_big5_1);
1541 Vcharset_chinese_big5_1 =
1542 make_charset(LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
1543 CHARSET_TYPE_94X94, 2, 0, '0',
1544 CHARSET_LEFT_TO_RIGHT,
1545 build_string("Big5"),
1546 build_string("Big5 (Level-1)"),
1548 ("Big5 Level-1 Chinese traditional"),
1549 build_string("big5"));
1550 staticpro(&Vcharset_chinese_big5_2);
1551 Vcharset_chinese_big5_2 =
1552 make_charset(LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
1553 CHARSET_TYPE_94X94, 2, 0, '1',
1554 CHARSET_LEFT_TO_RIGHT,
1555 build_string("Big5"),
1556 build_string("Big5 (Level-2)"),
1558 ("Big5 Level-2 Chinese traditional"),
1559 build_string("big5"));
1561 #ifdef ENABLE_COMPOSITE_CHARS
1562 /* #### For simplicity, we put composite chars into a 96x96 charset.
1563 This is going to lead to problems because you can run out of
1564 room, esp. as we don't yet recycle numbers. */
1565 staticpro(&Vcharset_composite);
1566 Vcharset_composite =
1567 make_charset(LEADING_BYTE_COMPOSITE, Qcomposite, 3,
1568 CHARSET_TYPE_96X96, 2, 0, 0,
1569 CHARSET_LEFT_TO_RIGHT,
1570 build_string("Composite"),
1571 build_string("Composite characters"),
1572 build_string("Composite characters"),
1575 /* #### not dumped properly */
1576 composite_char_row_next = 32;
1577 composite_char_col_next = 32;
1579 Vcomposite_char_string2char_hash_table =
1580 make_lisp_hash_table(500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1581 Vcomposite_char_char2string_hash_table =
1582 make_lisp_hash_table(500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1583 staticpro(&Vcomposite_char_string2char_hash_table);
1584 staticpro(&Vcomposite_char_char2string_hash_table);
1585 #endif /* ENABLE_COMPOSITE_CHARS */