1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of SXEmacs
7 SXEmacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 SXEmacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 /* Synched up with: FSF 20.3. Not in FSF. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
32 #include "ui/device.h"
36 /* The various pre-defined charsets. */
38 Lisp_Object Vcharset_ascii;
39 Lisp_Object Vcharset_control_1;
40 Lisp_Object Vcharset_latin_iso8859_1;
41 Lisp_Object Vcharset_latin_iso8859_2;
42 Lisp_Object Vcharset_latin_iso8859_3;
43 Lisp_Object Vcharset_latin_iso8859_4;
44 Lisp_Object Vcharset_thai_tis620;
45 Lisp_Object Vcharset_greek_iso8859_7;
46 Lisp_Object Vcharset_arabic_iso8859_6;
47 Lisp_Object Vcharset_hebrew_iso8859_8;
48 Lisp_Object Vcharset_katakana_jisx0201;
49 Lisp_Object Vcharset_latin_jisx0201;
50 Lisp_Object Vcharset_cyrillic_iso8859_5;
51 Lisp_Object Vcharset_latin_iso8859_9;
52 Lisp_Object Vcharset_japanese_jisx0208_1978;
53 Lisp_Object Vcharset_chinese_gb2312;
54 Lisp_Object Vcharset_japanese_jisx0208;
55 Lisp_Object Vcharset_korean_ksc5601;
56 Lisp_Object Vcharset_japanese_jisx0212;
57 Lisp_Object Vcharset_chinese_cns11643_1;
58 Lisp_Object Vcharset_chinese_cns11643_2;
59 Lisp_Object Vcharset_chinese_big5_1;
60 Lisp_Object Vcharset_chinese_big5_2;
62 #ifdef ENABLE_COMPOSITE_CHARS
63 Lisp_Object Vcharset_composite;
65 /* Hash tables for composite chars. One maps string representing
66 composed chars to their equivalent chars; one goes the
68 Lisp_Object Vcomposite_char_char2string_hash_table;
69 Lisp_Object Vcomposite_char_string2char_hash_table;
71 static int composite_char_row_next;
72 static int composite_char_col_next;
74 #endif /* ENABLE_COMPOSITE_CHARS */
76 struct charset_lookup *chlook;
78 static const struct lrecord_description charset_lookup_description_1[] = {
79 {XD_LISP_OBJECT_ARRAY,
80 offsetof(struct charset_lookup, charset_by_leading_byte),
85 static const struct struct_description charset_lookup_description = {
86 sizeof(struct charset_lookup),
87 charset_lookup_description_1
90 /* Table of number of bytes in the string representation of a character
91 indexed by the first byte of that representation.
93 rep_bytes_by_first_byte(c) is more efficient than the equivalent
94 canonical computation:
96 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
98 const Bytecount rep_bytes_by_first_byte[0xA0] = { /* 0x00 - 0x7f are for straight ASCII */
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
101 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 /* 0x80 - 0x8f are for Dimension-1 official charsets */
108 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109 /* 0x90 - 0x9d are for Dimension-2 official charsets */
110 /* 0x9e is for Dimension-1 private charsets */
111 /* 0x9f is for Dimension-2 private charsets */
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
115 Lisp_Object Qcharsetp;
117 /* Qdoc_string, Qdimension, Qchars defined in general.c */
118 Lisp_Object Qregistry, Qfinal, Qgraphic;
119 Lisp_Object Qdirection;
120 Lisp_Object Qreverse_direction_charset;
121 Lisp_Object Qleading_byte;
122 Lisp_Object Qshort_name, Qlong_name;
138 Qjapanese_jisx0208_1978,
144 Qchinese_cns11643_2, Qchinese_big5_1, Qchinese_big5_2, Qcomposite;
146 Lisp_Object Ql2r, Qr2l;
148 Lisp_Object Vcharset_hash_table;
150 /* Composite characters are characters constructed by overstriking two
151 or more regular characters.
153 1) The old Mule implementation involves storing composite characters
154 in a buffer as a tag followed by all of the actual characters
155 used to make up the composite character. I think this is a bad
156 idea; it greatly complicates code that wants to handle strings
157 one character at a time because it has to deal with the possibility
158 of great big ungainly characters. It's much more reasonable to
159 simply store an index into a table of composite characters.
161 2) The current implementation only allows for 16,384 separate
162 composite characters over the lifetime of the SXEmacs process.
163 This could become a potential problem if the user
164 edited lots of different files that use composite characters.
165 Due to FSF bogosity, increasing the number of allowable
166 composite characters under Mule would decrease the number
167 of possible faces that can exist. Mule already has shrunk
168 this to 2048, and further shrinkage would become uncomfortable.
169 No such problems exist in SXEmacs.
171 Composite characters could be represented as 0x80 C1 C2 C3,
172 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
173 for slightly under 2^20 (one million) composite characters
174 over the SXEmacs process lifetime, and you only need to
175 increase the size of a Mule character from 19 to 21 bits.
176 Or you could use 0x80 C1 C2 C3 C4, allowing for about
177 85 million (slightly over 2^26) composite characters. */
179 /************************************************************************/
180 /* Basic Emchar functions */
181 /************************************************************************/
183 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
184 string in STR. Returns the number of bytes stored.
185 Do not call this directly. Use the macro set_charptr_emchar() instead.
188 Bytecount non_ascii_set_charptr_emchar(Bufbyte * str, Emchar c)
196 BREAKUP_CHAR(c, charset, c1, c2);
197 lb = CHAR_LEADING_BYTE(c);
198 if (LEADING_BYTE_PRIVATE_P(lb))
199 *p++ = PRIVATE_LEADING_BYTE_PREFIX(lb);
201 if (EQ(charset, Vcharset_control_1))
210 /* Return the first character from a Mule-encoded string in STR,
211 assuming it's non-ASCII. Do not call this directly.
212 Use the macro charptr_emchar() instead. */
214 Emchar non_ascii_charptr_emchar(const Bufbyte * str)
216 Bufbyte i0 = *str, i1, i2 = 0;
219 if (i0 == LEADING_BYTE_CONTROL_1)
220 return (Emchar) (*++str - 0x20);
222 if (LEADING_BYTE_PREFIX_P(i0))
227 charset = CHARSET_BY_LEADING_BYTE(i0);
228 if (XCHARSET_DIMENSION(charset) == 2)
231 return MAKE_CHAR(charset, i1, i2);
234 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
235 Do not call this directly. Use the macro valid_char_p() instead. */
237 int non_ascii_valid_char_p(Emchar ch)
241 /* Must have only lowest 19 bits set */
245 f1 = CHAR_FIELD1(ch);
246 f2 = CHAR_FIELD2(ch);
247 f3 = CHAR_FIELD3(ch);
252 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
253 (f2 > MAX_CHAR_FIELD2_OFFICIAL
254 && f2 < MIN_CHAR_FIELD2_PRIVATE)
255 || f2 > MAX_CHAR_FIELD2_PRIVATE)
260 if (f3 != 0x20 && f3 != 0x7F
261 && !(f2 >= MIN_CHAR_FIELD2_PRIVATE
262 && f2 <= MAX_CHAR_FIELD2_PRIVATE))
266 NOTE: This takes advantage of the fact that
267 FIELD2_TO_OFFICIAL_LEADING_BYTE and
268 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
271 CHARSET_BY_LEADING_BYTE(f2 +
272 FIELD2_TO_OFFICIAL_LEADING_BYTE);
273 if (EQ(charset, Qnil))
275 return (XCHARSET_CHARS(charset) == 96);
279 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
280 (f1 > MAX_CHAR_FIELD1_OFFICIAL
281 && f1 < MIN_CHAR_FIELD1_PRIVATE)
282 || f1 > MAX_CHAR_FIELD1_PRIVATE)
284 if (f2 < 0x20 || f3 < 0x20)
287 #ifdef ENABLE_COMPOSITE_CHARS
288 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE ==
289 LEADING_BYTE_COMPOSITE) {
293 Vcomposite_char_char2string_hash_table,
298 #endif /* ENABLE_COMPOSITE_CHARS */
300 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
301 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE
302 && f1 <= MAX_CHAR_FIELD1_PRIVATE))
305 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
307 CHARSET_BY_LEADING_BYTE(f1 +
308 FIELD1_TO_OFFICIAL_LEADING_BYTE);
311 CHARSET_BY_LEADING_BYTE(f1 +
312 FIELD1_TO_PRIVATE_LEADING_BYTE);
314 if (EQ(charset, Qnil))
316 return (XCHARSET_CHARS(charset) == 96);
320 /************************************************************************/
321 /* Basic string functions */
322 /************************************************************************/
324 /* Copy the character pointed to by SRC into DST. Do not call this
325 directly. Use the macro charptr_copy_char() instead.
326 Return the number of bytes copied. */
328 Bytecount non_ascii_charptr_copy_char(const Bufbyte * src, Bufbyte * dst)
330 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE(*src);
332 for (i = bytes; i; i--, dst++, src++)
337 /************************************************************************/
338 /* streams of Emchars */
339 /************************************************************************/
341 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
342 The functions below are not meant to be called directly; use
343 the macros in insdel.h. */
345 Emchar Lstream_get_emchar_1(Lstream * stream, int ch)
347 Bufbyte str[MAX_EMCHAR_LEN];
348 Bufbyte *strptr = str;
351 str[0] = (Bufbyte) ch;
353 for (bytes = REP_BYTES_BY_FIRST_BYTE(ch) - 1; bytes; bytes--) {
354 int c = Lstream_getc(stream);
355 bufpos_checking_assert(c >= 0);
356 *++strptr = (Bufbyte) c;
358 return charptr_emchar(str);
361 int Lstream_fput_emchar(Lstream * stream, Emchar ch)
363 Bufbyte str[MAX_EMCHAR_LEN];
364 Bytecount len = set_charptr_emchar(str, ch);
365 return Lstream_write(stream, str, len);
368 void Lstream_funget_emchar(Lstream * stream, Emchar ch)
370 Bufbyte str[MAX_EMCHAR_LEN];
371 Bytecount len = set_charptr_emchar(str, ch);
372 Lstream_unread(stream, str, len);
375 /************************************************************************/
377 /************************************************************************/
379 static Lisp_Object mark_charset(Lisp_Object obj)
381 Lisp_Charset *cs = XCHARSET(obj);
383 mark_object(cs->short_name);
384 mark_object(cs->long_name);
385 mark_object(cs->doc_string);
386 mark_object(cs->registry);
387 mark_object(cs->ccl_program);
392 print_charset(Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
394 Lisp_Charset *cs = XCHARSET(obj);
398 error("printing unreadable object #<charset %s 0x%x>",
399 string_data(XSYMBOL(CHARSET_NAME(cs))->name),
402 write_c_string("#<charset ", printcharfun);
403 print_internal(CHARSET_NAME(cs), printcharfun, 0);
404 write_c_string(" ", printcharfun);
405 print_internal(CHARSET_SHORT_NAME(cs), printcharfun, 1);
406 write_c_string(" ", printcharfun);
407 print_internal(CHARSET_LONG_NAME(cs), printcharfun, 1);
408 write_c_string(" ", printcharfun);
409 print_internal(CHARSET_DOC_STRING(cs), printcharfun, 1);
410 sprintf(buf, " %s %s cols=%d g%d final='%c' reg=",
411 CHARSET_TYPE(cs) == CHARSET_TYPE_94 ? "94" :
412 CHARSET_TYPE(cs) == CHARSET_TYPE_96 ? "96" :
413 CHARSET_TYPE(cs) == CHARSET_TYPE_94X94 ? "94x94" :
415 CHARSET_DIRECTION(cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
416 CHARSET_COLUMNS(cs), CHARSET_GRAPHIC(cs), CHARSET_FINAL(cs));
417 write_c_string(buf, printcharfun);
418 print_internal(CHARSET_REGISTRY(cs), printcharfun, 0);
419 sprintf(buf, " 0x%x>", cs->header.uid);
420 write_c_string(buf, printcharfun);
423 static const struct lrecord_description charset_description[] = {
424 {XD_LISP_OBJECT, offsetof(Lisp_Charset, name)},
425 {XD_LISP_OBJECT, offsetof(Lisp_Charset, doc_string)},
426 {XD_LISP_OBJECT, offsetof(Lisp_Charset, registry)},
427 {XD_LISP_OBJECT, offsetof(Lisp_Charset, short_name)},
428 {XD_LISP_OBJECT, offsetof(Lisp_Charset, long_name)},
429 {XD_LISP_OBJECT, offsetof(Lisp_Charset, reverse_direction_charset)},
430 {XD_LISP_OBJECT, offsetof(Lisp_Charset, ccl_program)},
434 DEFINE_LRECORD_IMPLEMENTATION("charset", charset,
435 mark_charset, print_charset, 0, 0, 0,
436 charset_description, Lisp_Charset);
438 /* Make a new charset. */
439 /* #### SJT Should generic properties be allowed? */
441 make_charset(int id, Lisp_Object name, unsigned char rep_bytes,
442 unsigned char type, unsigned char columns, unsigned char graphic,
443 Bufbyte final, unsigned char direction, Lisp_Object short_name,
444 Lisp_Object long_name, Lisp_Object doc, Lisp_Object reg)
447 Lisp_Charset *cs = alloc_lcrecord_type(Lisp_Charset, &lrecord_charset);
451 XSETCHARSET(obj, cs);
454 CHARSET_NAME(cs) = name;
455 CHARSET_SHORT_NAME(cs) = short_name;
456 CHARSET_LONG_NAME(cs) = long_name;
457 CHARSET_REP_BYTES(cs) = rep_bytes;
458 CHARSET_DIRECTION(cs) = direction;
459 CHARSET_TYPE(cs) = type;
460 CHARSET_COLUMNS(cs) = columns;
461 CHARSET_GRAPHIC(cs) = graphic;
462 CHARSET_FINAL(cs) = final;
463 CHARSET_DOC_STRING(cs) = doc;
464 CHARSET_REGISTRY(cs) = reg;
465 CHARSET_CCL_PROGRAM(cs) = Qnil;
466 CHARSET_REVERSE_DIRECTION_CHARSET(cs) = Qnil;
468 CHARSET_DIMENSION(cs) = (CHARSET_TYPE(cs) == CHARSET_TYPE_94 ||
469 CHARSET_TYPE(cs) == CHARSET_TYPE_96) ? 1 : 2;
470 CHARSET_CHARS(cs) = (CHARSET_TYPE(cs) == CHARSET_TYPE_94 ||
471 CHARSET_TYPE(cs) == CHARSET_TYPE_94X94) ? 94 : 96;
474 /* some charsets do not have final characters. This includes
475 ASCII, Control-1, Composite, and the two faux private
478 (chlook->charset_by_attributes[type][final][direction]));
479 chlook->charset_by_attributes[type][final][direction] = obj;
482 assert(NILP(chlook->charset_by_leading_byte[id - 128]));
483 chlook->charset_by_leading_byte[id - 128] = obj;
485 /* Some charsets are "faux" and don't have names or really exist at
486 all except in the leading-byte table. */
488 Fputhash(name, obj, Vcharset_hash_table);
492 static int get_unallocated_leading_byte(int dimension)
496 if (dimension == 1) {
497 if (chlook->next_allocated_1_byte_leading_byte >
498 (Bufbyte)MAX_LEADING_BYTE_PRIVATE_1)
501 lb = chlook->next_allocated_1_byte_leading_byte++;
502 #if MAX_LEADING_BYTE_PRIVATE_2 == 255
503 } else if (chlook->next_allocated_2_byte_leading_byte == 0) {
506 } else if (chlook->next_allocated_2_byte_leading_byte >
507 (Bufbyte)MAX_LEADING_BYTE_PRIVATE_2) {
511 lb = chlook->next_allocated_2_byte_leading_byte++;
516 ("No more character sets free for this dimension",
517 make_int(dimension));
522 /************************************************************************/
523 /* Basic charset Lisp functions */
524 /************************************************************************/
526 DEFUN("charsetp", Fcharsetp, 1, 1, 0, /*
527 Return non-nil if OBJECT is a charset.
531 return CHARSETP(object) ? Qt : Qnil;
534 DEFUN("find-charset", Ffind_charset, 1, 1, 0, /*
535 Retrieve the charset of the given name.
536 If CHARSET-OR-NAME is a charset object, it is simply returned.
537 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
538 nil is returned. Otherwise the associated charset object is returned.
542 if (CHARSETP(charset_or_name))
543 return charset_or_name;
545 CHECK_SYMBOL(charset_or_name);
546 return Fgethash(charset_or_name, Vcharset_hash_table, Qnil);
549 DEFUN("get-charset", Fget_charset, 1, 1, 0, /*
550 Retrieve the charset of the given name.
551 Same as `find-charset' except an error is signalled if there is no such
552 charset instead of returning nil.
556 Lisp_Object charset = Ffind_charset(name);
559 signal_simple_error("No such charset", name);
563 /* We store the charsets in hash tables with the names as the key and the
564 actual charset object as the value. Occasionally we need to use them
565 in a list format. These routines provide us with that. */
566 struct charset_list_closure {
567 Lisp_Object *charset_list;
571 add_charset_to_list_mapper(Lisp_Object key, Lisp_Object value,
572 void *charset_list_closure)
574 /* This function can GC */
575 struct charset_list_closure *chcl =
576 (struct charset_list_closure *)charset_list_closure;
577 Lisp_Object *charset_list = chcl->charset_list;
579 *charset_list = Fcons(XCHARSET_NAME(value), *charset_list);
583 DEFUN("charset-list", Fcharset_list, 0, 0, 0, /*
584 Return a list of the names of all defined charsets.
588 Lisp_Object charset_list = Qnil;
590 struct charset_list_closure charset_list_closure;
592 GCPRO1(charset_list);
593 charset_list_closure.charset_list = &charset_list;
594 elisp_maphash(add_charset_to_list_mapper, Vcharset_hash_table,
595 &charset_list_closure);
601 DEFUN("charset-name", Fcharset_name, 1, 1, 0, /*
602 Return the name of charset CHARSET.
606 Lisp_Object tmp = Fget_charset(charset);
607 return XCHARSET_NAME(tmp);
610 /* #### SJT Should generic properties be allowed? */
611 DEFUN("make-charset", Fmake_charset, 3, 3, 0, /*
612 Define a new character set.
613 This function is for use with Mule support.
614 NAME is a symbol, the name by which the character set is normally referred.
615 DOC-STRING is a string describing the character set.
616 PROPS is a property list, describing the specific nature of the
617 character set. Recognized properties are:
619 'short-name Short version of the charset name (ex: Latin-1)
620 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
621 'registry A regular expression matching the font registry field for
623 'dimension Number of octets used to index a character in this charset.
624 Either 1 or 2. Defaults to 1.
625 'columns Number of columns used to display a character in this charset.
626 Only used in TTY mode. (Under X, the actual width of a
627 character can be derived from the font used to display the
628 characters.) If unspecified, defaults to the dimension
629 (this is almost always the correct value).
630 'chars Number of characters in each dimension (94 or 96).
631 Defaults to 94. Note that if the dimension is 2, the
632 character set thus described is 94x94 or 96x96.
633 'final Final byte of ISO 2022 escape sequence. Must be
634 supplied. Each combination of (DIMENSION, CHARS) defines a
635 separate namespace for final bytes. Note that ISO
636 2022 restricts the final byte to the range
637 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
638 dimension == 2. Note also that final bytes in the range
639 0x30 - 0x3F are reserved for user-defined (not official)
641 'graphic 0 (use left half of font on output) or 1 (use right half
642 of font on output). Defaults to 0. For example, for
643 a font whose registry is ISO8859-1, the left half
644 (octets 0x20 - 0x7F) is the `ascii' character set, while
645 the right half (octets 0xA0 - 0xFF) is the `latin-1'
646 character set. With 'graphic set to 0, the octets
647 will have their high bit cleared; with it set to 1,
648 the octets will have their high bit set.
649 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
651 'ccl-program A compiled CCL program used to convert a character in
652 this charset into an index into the font. This is in
653 addition to the 'graphic property. The CCL program
654 is passed the octets of the character, with the high
655 bit cleared and set depending upon whether the value
656 of the 'graphic property is 0 or 1.
658 (name, doc_string, props))
660 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
661 int direction = CHARSET_LEFT_TO_RIGHT;
663 Lisp_Object registry = Qnil;
665 Lisp_Object ccl_program = Qnil;
666 Lisp_Object short_name = Qnil, long_name = Qnil;
669 if (!NILP(doc_string))
670 CHECK_STRING(doc_string);
672 charset = Ffind_charset(name);
674 signal_simple_error("Cannot redefine existing charset", name);
677 EXTERNAL_PROPERTY_LIST_LOOP_3(keyword, value, props) {
678 if (EQ(keyword, Qshort_name)) {
683 else if (EQ(keyword, Qlong_name)) {
688 else if (EQ(keyword, Qdimension)) {
690 dimension = XINT(value);
691 if (dimension < 1 || dimension > 2)
693 ("Invalid value for 'dimension",
697 else if (EQ(keyword, Qchars)) {
700 if (chars != 94 && chars != 96)
702 ("Invalid value for 'chars", value);
705 else if (EQ(keyword, Qcolumns)) {
707 columns = XINT(value);
708 if (columns != 1 && columns != 2)
710 ("Invalid value for 'columns",
714 else if (EQ(keyword, Qgraphic)) {
716 graphic = XINT(value);
717 if (graphic < 0 || graphic > 1)
719 ("Invalid value for 'graphic",
723 else if (EQ(keyword, Qregistry)) {
728 else if (EQ(keyword, Qdirection)) {
730 direction = CHARSET_LEFT_TO_RIGHT;
731 else if (EQ(value, Qr2l))
732 direction = CHARSET_RIGHT_TO_LEFT;
735 ("Invalid value for 'direction",
739 else if (EQ(keyword, Qfinal)) {
740 CHECK_CHAR_COERCE_INT(value);
741 final = XCHAR(value);
742 if (final < '0' || final > '~')
744 ("Invalid value for 'final", value);
747 else if (EQ(keyword, Qccl_program)) {
748 struct ccl_program test_ccl;
750 if (setup_ccl_program(&test_ccl, value) < 0)
752 ("Invalid value for 'ccl-program",
758 signal_simple_error("Unrecognized property",
764 error("'final must be specified");
765 if (dimension == 2 && final > 0x5F)
767 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
771 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
773 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
775 if (!NILP(CHARSET_BY_ATTRIBUTES(type, final, CHARSET_LEFT_TO_RIGHT)) ||
776 !NILP(CHARSET_BY_ATTRIBUTES(type, final, CHARSET_RIGHT_TO_LEFT)))
778 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
780 id = get_unallocated_leading_byte(dimension);
782 if (NILP(doc_string))
783 doc_string = build_string("");
786 registry = build_string("");
788 if (NILP(short_name))
789 XSETSTRING(short_name, XSYMBOL(name)->name);
792 long_name = doc_string;
796 charset = make_charset(id, name, dimension + 2, type, columns, graphic,
797 final, direction, short_name, long_name,
798 doc_string, registry);
799 if (!NILP(ccl_program))
800 XCHARSET_CCL_PROGRAM(charset) = ccl_program;
804 DEFUN("make-reverse-direction-charset", Fmake_reverse_direction_charset, 2, 2, 0, /*
805 Make a charset equivalent to CHARSET but which goes in the opposite direction.
806 NEW-NAME is the name of the new charset. Return the new charset.
810 Lisp_Object new_charset = Qnil;
811 int id, dimension, columns, graphic, final;
813 Lisp_Object registry, doc_string, short_name, long_name;
816 charset = Fget_charset(charset);
817 if (!NILP(XCHARSET_REVERSE_DIRECTION_CHARSET(charset)))
819 ("Charset already has reverse-direction charset", charset);
821 CHECK_SYMBOL(new_name);
822 if (!NILP(Ffind_charset(new_name)))
823 signal_simple_error("Cannot redefine existing charset",
826 cs = XCHARSET(charset);
828 type = CHARSET_TYPE(cs);
829 columns = CHARSET_COLUMNS(cs);
830 dimension = CHARSET_DIMENSION(cs);
831 id = get_unallocated_leading_byte(dimension);
833 graphic = CHARSET_GRAPHIC(cs);
834 final = CHARSET_FINAL(cs);
835 direction = CHARSET_RIGHT_TO_LEFT;
836 if (CHARSET_DIRECTION(cs) == CHARSET_RIGHT_TO_LEFT)
837 direction = CHARSET_LEFT_TO_RIGHT;
838 doc_string = CHARSET_DOC_STRING(cs);
839 short_name = CHARSET_SHORT_NAME(cs);
840 long_name = CHARSET_LONG_NAME(cs);
841 registry = CHARSET_REGISTRY(cs);
843 new_charset = make_charset(id, new_name, dimension + 2, type, columns,
844 graphic, final, direction, short_name,
845 long_name, doc_string, registry);
847 CHARSET_REVERSE_DIRECTION_CHARSET(cs) = new_charset;
848 XCHARSET_REVERSE_DIRECTION_CHARSET(new_charset) = charset;
853 /* #### Reverse direction charsets not yet implemented. */
855 DEFUN("charset-reverse-direction-charset", Fcharset_reverse_direction_charset, 1, 1, 0, /*
856 Return the reverse-direction charset parallel to CHARSET, if any.
857 This is the charset with the same properties (in particular, the same
858 dimension, number of characters per dimension, and final byte) as
859 CHARSET but whose characters are displayed in the opposite direction.
863 charset = Fget_charset(charset);
864 return XCHARSET_REVERSE_DIRECTION_CHARSET(charset);
868 DEFUN("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
869 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
870 If DIRECTION is omitted, both directions will be checked (left-to-right
871 will be returned if character sets exist for both directions).
873 (dimension, chars, final, direction))
875 int dm, ch, fi, di = -1;
877 Lisp_Object obj = Qnil;
879 CHECK_INT(dimension);
880 dm = XINT(dimension);
881 if (dm < 1 || dm > 2)
882 signal_simple_error("Invalid value for DIMENSION", dimension);
886 if (ch != 94 && ch != 96)
887 signal_simple_error("Invalid value for CHARS", chars);
889 CHECK_CHAR_COERCE_INT(final);
891 if (fi < '0' || fi > '~')
892 signal_simple_error("Invalid value for FINAL", final);
894 if (EQ(direction, Ql2r))
895 di = CHARSET_LEFT_TO_RIGHT;
896 else if (EQ(direction, Qr2l))
897 di = CHARSET_RIGHT_TO_LEFT;
898 else if (!NILP(direction))
899 signal_simple_error("Invalid value for DIRECTION", direction);
901 if (dm == 2 && fi > 0x5F)
903 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
907 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
909 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
912 obj = CHARSET_BY_ATTRIBUTES(type, fi, CHARSET_LEFT_TO_RIGHT);
915 CHARSET_BY_ATTRIBUTES(type, fi,
916 CHARSET_RIGHT_TO_LEFT);
918 obj = CHARSET_BY_ATTRIBUTES(type, fi, di);
921 return XCHARSET_NAME(obj);
925 DEFUN("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
926 Return short name of CHARSET.
930 Lisp_Object tmp = Fget_charset(charset);
931 return XCHARSET_SHORT_NAME(tmp);
934 DEFUN("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
935 Return long name of CHARSET.
939 Lisp_Object tmp = Fget_charset(charset);
940 return XCHARSET_LONG_NAME(tmp);
943 DEFUN("charset-description", Fcharset_description, 1, 1, 0, /*
944 Return description of CHARSET.
948 Lisp_Object tmp = Fget_charset(charset);
949 return XCHARSET_DOC_STRING(tmp);
952 DEFUN("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
953 Return dimension of CHARSET.
957 Lisp_Object tmp = Fget_charset(charset);
958 return make_int(XCHARSET_DIMENSION(tmp));
961 DEFUN("charset-property", Fcharset_property, 2, 2, 0, /*
962 Return property PROP of CHARSET, a charset object or symbol naming a charset.
963 Recognized properties are those listed in `make-charset', as well as
964 'name and 'doc-string.
970 charset = Fget_charset(charset);
971 cs = XCHARSET(charset);
975 return CHARSET_NAME(cs);
976 if (EQ(prop, Qshort_name))
977 return CHARSET_SHORT_NAME(cs);
978 if (EQ(prop, Qlong_name))
979 return CHARSET_LONG_NAME(cs);
980 if (EQ(prop, Qdoc_string))
981 return CHARSET_DOC_STRING(cs);
982 if (EQ(prop, Qdimension))
983 return make_int(CHARSET_DIMENSION(cs));
984 if (EQ(prop, Qcolumns))
985 return make_int(CHARSET_COLUMNS(cs));
986 if (EQ(prop, Qgraphic))
987 return make_int(CHARSET_GRAPHIC(cs));
988 if (EQ(prop, Qfinal))
989 return make_char(CHARSET_FINAL(cs));
990 if (EQ(prop, Qchars))
991 return make_int(CHARSET_CHARS(cs));
992 if (EQ(prop, Qregistry))
993 return CHARSET_REGISTRY(cs);
994 if (EQ(prop, Qccl_program))
995 return CHARSET_CCL_PROGRAM(cs);
996 if (EQ(prop, Qdirection))
997 return CHARSET_DIRECTION(cs) ==
998 CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
999 if (EQ(prop, Qreverse_direction_charset)) {
1000 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET(cs);
1001 /* #### Is this translation OK? If so, error checking sufficient? */
1002 return CHARSETP(obj) ? XCHARSET_NAME(obj) : obj;
1004 signal_simple_error("Unrecognized charset property name", prop);
1005 return Qnil; /* not reached */
1008 DEFUN("charset-id", Fcharset_id, 1, 1, 0, /*
1009 Return charset identification number of CHARSET.
1013 Lisp_Object tmp = Fget_charset(charset);
1014 return make_int(XCHARSET_LEADING_BYTE(tmp));
1017 /* #### We need to figure out which properties we really want to
1020 DEFUN("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1021 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1023 (charset, ccl_program))
1025 struct ccl_program test_ccl;
1027 charset = Fget_charset(charset);
1028 if (setup_ccl_program(&test_ccl, ccl_program) < 0)
1029 signal_simple_error("Invalid ccl-program", ccl_program);
1030 XCHARSET_CCL_PROGRAM(charset) = ccl_program;
1034 static void invalidate_charset_font_caches(Lisp_Object charset)
1036 /* Invalidate font cache entries for charset on all devices. */
1037 Lisp_Object devcons, concons, hash_table;
1038 DEVICE_LOOP_NO_BREAK(devcons, concons) {
1039 struct device *d = XDEVICE(XCAR(devcons));
1040 hash_table = Fgethash(charset, d->charset_font_cache, Qunbound);
1041 if (!UNBOUNDP(hash_table))
1042 Fclrhash(hash_table);
1046 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
1047 DEFUN("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1048 Set the 'registry property of CHARSET to REGISTRY.
1050 (charset, registry))
1052 charset = Fget_charset(charset);
1053 CHECK_STRING(registry);
1054 XCHARSET_REGISTRY(charset) = registry;
1055 invalidate_charset_font_caches(charset);
1056 face_property_was_changed(Vdefault_face, Qfont, Qglobal);
1060 /************************************************************************/
1061 /* Lisp primitives for working with characters */
1062 /************************************************************************/
1064 DEFUN("make-char", Fmake_char, 2, 3, 0, /*
1065 Make a character from CHARSET and octets ARG1 and ARG2.
1066 ARG2 is required only for characters from two-dimensional charsets.
1067 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1068 character s with caron.
1070 (charset, arg1, arg2))
1074 int lowlim, highlim;
1076 charset = Fget_charset(charset);
1077 cs = XCHARSET(charset);
1079 if (EQ(charset, Vcharset_ascii))
1080 lowlim = 0, highlim = 127;
1081 else if (EQ(charset, Vcharset_control_1))
1082 lowlim = 0, highlim = 31;
1083 else if (CHARSET_CHARS(cs) == 94)
1084 lowlim = 33, highlim = 126;
1085 else /* CHARSET_CHARS (cs) == 96) */
1086 lowlim = 32, highlim = 127;
1089 /* It is useful (and safe, according to Olivier Galibert) to strip
1090 the 8th bit off ARG1 and ARG2 because it allows programmers to
1091 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1092 Latin 2 code of the character. */
1093 a1 = XINT(arg1) & 0x7f;
1094 if (a1 < lowlim || a1 > highlim)
1095 args_out_of_range_3(arg1, make_int(lowlim), make_int(highlim));
1097 if (CHARSET_DIMENSION(cs) == 1) {
1100 ("Charset is of dimension one; second octet must be nil",
1102 return make_char(MAKE_CHAR(charset, a1, 0));
1106 a2 = XINT(arg2) & 0x7f;
1107 if (a2 < lowlim || a2 > highlim)
1108 args_out_of_range_3(arg2, make_int(lowlim), make_int(highlim));
1110 return make_char(MAKE_CHAR(charset, a1, a2));
1113 DEFUN("char-charset", Fchar_charset, 1, 1, 0, /*
1114 Return the character set of CHARACTER.
1120 CHECK_CHAR_COERCE_INT(character);
1122 tmp = CHARSET_BY_LEADING_BYTE(CHAR_LEADING_BYTE(XCHAR(character)));
1123 return XCHARSET_NAME(tmp);
1126 DEFUN("char-octet", Fchar_octet, 1, 2, 0, /*
1127 Return the octet numbered N (should be 0 or 1) of CHARACTER.
1128 N defaults to 0 if omitted.
1132 Lisp_Object charset;
1135 CHECK_CHAR_COERCE_INT(character);
1137 BREAKUP_CHAR(XCHAR(character), charset, octet0, octet1);
1139 if (NILP(n) || EQ(n, Qzero))
1140 return make_int(octet0);
1141 else if (EQ(n, make_int(1)))
1142 return make_int(octet1);
1144 signal_simple_error("Octet number must be 0 or 1", n);
1147 DEFUN("split-char", Fsplit_char, 1, 1, 0, /*
1148 Return list of charset and one or two position-codes of CHARACTER.
1152 /* This function can GC */
1153 struct gcpro gcpro1, gcpro2;
1154 Lisp_Object charset = Qnil;
1155 Lisp_Object rc = Qnil;
1159 GCPRO2(charset, rc);
1160 CHECK_CHAR_COERCE_INT(character);
1162 BREAKUP_CHAR(XCHAR(character), charset, c1, c2);
1164 tmp = Fget_charset(charset);
1165 if (XCHARSET_DIMENSION(tmp) == 2) {
1166 rc = list3(XCHARSET_NAME(charset), make_int(c1), make_int(c2));
1168 rc = list2(XCHARSET_NAME(charset), make_int(c1));
1175 #ifdef ENABLE_COMPOSITE_CHARS
1176 /************************************************************************/
1177 /* composite character functions */
1178 /************************************************************************/
1180 Emchar lookup_composite_char(Bufbyte * str, int len)
1182 Lisp_Object lispstr = make_string(str, len);
1183 Lisp_Object ch = Fgethash(lispstr,
1184 Vcomposite_char_string2char_hash_table,
1189 if (composite_char_row_next >= 128)
1190 signal_simple_error("No more composite chars available",
1193 MAKE_CHAR(Vcharset_composite, composite_char_row_next,
1194 composite_char_col_next);
1195 Fputhash(make_char(emch), lispstr,
1196 Vcomposite_char_char2string_hash_table);
1197 Fputhash(lispstr, make_char(emch),
1198 Vcomposite_char_string2char_hash_table);
1199 composite_char_col_next++;
1200 if (composite_char_col_next >= 128) {
1201 composite_char_col_next = 32;
1202 composite_char_row_next++;
1209 Lisp_Object composite_char_string(Emchar ch)
1211 Lisp_Object str = Fgethash(make_char(ch),
1212 Vcomposite_char_char2string_hash_table,
1214 assert(!UNBOUNDP(str));
1218 xxDEFUN("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1219 Convert a string into a single composite character.
1220 The character is the result of overstriking all the characters in
1225 CHECK_STRING(string);
1226 return make_char(lookup_composite_char(XSTRING_DATA(string),
1227 XSTRING_LENGTH(string)));
1230 xxDEFUN("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1231 Return a string of the characters comprising a composite character.
1239 if (CHAR_LEADING_BYTE(emch) != LEADING_BYTE_COMPOSITE)
1240 signal_simple_error("Must be composite char", ch);
1241 return composite_char_string(emch);
1243 #endif /* ENABLE_COMPOSITE_CHARS */
1245 /************************************************************************/
1246 /* initialization */
1247 /************************************************************************/
1249 void syms_of_mule_charset(void)
1251 INIT_LRECORD_IMPLEMENTATION(charset);
1254 DEFSUBR(Ffind_charset);
1255 DEFSUBR(Fget_charset);
1256 DEFSUBR(Fcharset_list);
1257 DEFSUBR(Fcharset_name);
1258 DEFSUBR(Fmake_charset);
1259 DEFSUBR(Fmake_reverse_direction_charset);
1260 /* DEFSUBR (Freverse_direction_charset); */
1261 DEFSUBR(Fcharset_from_attributes);
1262 DEFSUBR(Fcharset_short_name);
1263 DEFSUBR(Fcharset_long_name);
1264 DEFSUBR(Fcharset_description);
1265 DEFSUBR(Fcharset_dimension);
1266 DEFSUBR(Fcharset_property);
1267 DEFSUBR(Fcharset_id);
1268 DEFSUBR(Fset_charset_ccl_program);
1269 DEFSUBR(Fset_charset_registry);
1271 DEFSUBR(Fmake_char);
1272 DEFSUBR(Fchar_charset);
1273 DEFSUBR(Fchar_octet);
1274 DEFSUBR(Fsplit_char);
1276 #ifdef ENABLE_COMPOSITE_CHARS
1277 DEFSUBR(Fmake_composite_char);
1278 DEFSUBR(Fcomposite_char_string);
1281 defsymbol(&Qcharsetp, "charsetp");
1282 defsymbol(&Qregistry, "registry");
1283 defsymbol(&Qfinal, "final");
1284 defsymbol(&Qgraphic, "graphic");
1285 defsymbol(&Qdirection, "direction");
1286 defsymbol(&Qreverse_direction_charset, "reverse-direction-charset");
1287 defsymbol(&Qshort_name, "short-name");
1288 defsymbol(&Qlong_name, "long-name");
1290 defsymbol(&Ql2r, "l2r");
1291 defsymbol(&Qr2l, "r2l");
1293 /* Charsets, compatible with FSF 20.3
1294 Naming convention is Script-Charset[-Edition] */
1295 defsymbol(&Qascii, "ascii");
1296 defsymbol(&Qcontrol_1, "control-1");
1297 defsymbol(&Qlatin_iso8859_1, "latin-iso8859-1");
1298 defsymbol(&Qlatin_iso8859_2, "latin-iso8859-2");
1299 defsymbol(&Qlatin_iso8859_3, "latin-iso8859-3");
1300 defsymbol(&Qlatin_iso8859_4, "latin-iso8859-4");
1301 defsymbol(&Qthai_tis620, "thai-tis620");
1302 defsymbol(&Qgreek_iso8859_7, "greek-iso8859-7");
1303 defsymbol(&Qarabic_iso8859_6, "arabic-iso8859-6");
1304 defsymbol(&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1305 defsymbol(&Qkatakana_jisx0201, "katakana-jisx0201");
1306 defsymbol(&Qlatin_jisx0201, "latin-jisx0201");
1307 defsymbol(&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1308 defsymbol(&Qlatin_iso8859_9, "latin-iso8859-9");
1309 defsymbol(&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1310 defsymbol(&Qchinese_gb2312, "chinese-gb2312");
1311 defsymbol(&Qjapanese_jisx0208, "japanese-jisx0208");
1312 defsymbol(&Qkorean_ksc5601, "korean-ksc5601");
1313 defsymbol(&Qjapanese_jisx0212, "japanese-jisx0212");
1314 defsymbol(&Qchinese_cns11643_1, "chinese-cns11643-1");
1315 defsymbol(&Qchinese_cns11643_2, "chinese-cns11643-2");
1316 defsymbol(&Qchinese_big5_1, "chinese-big5-1");
1317 defsymbol(&Qchinese_big5_2, "chinese-big5-2");
1319 defsymbol(&Qcomposite, "composite");
1322 void vars_of_mule_charset(void)
1326 chlook = xnew_and_zero(struct charset_lookup); /* zero for Purify. */
1327 dump_add_root_struct_ptr(&chlook, &charset_lookup_description);
1329 /* Table of charsets indexed by leading byte. */
1330 for (i = 0; i < countof(chlook->charset_by_leading_byte); i++)
1331 chlook->charset_by_leading_byte[i] = Qnil;
1333 /* Table of charsets indexed by type/final-byte/direction. */
1334 for (i = 0; i < countof(chlook->charset_by_attributes); i++)
1335 for (j = 0; j < countof(chlook->charset_by_attributes[0]); j++)
1337 k < countof(chlook->charset_by_attributes[0][0]);
1339 chlook->charset_by_attributes[i][j][k] = Qnil;
1341 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1342 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1345 void complex_vars_of_mule_charset(void)
1347 staticpro(&Vcharset_hash_table);
1348 Vcharset_hash_table =
1349 make_lisp_hash_table(50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1351 /* Predefined character sets. We store them into variables for
1354 staticpro(&Vcharset_ascii);
1356 make_charset(LEADING_BYTE_ASCII, Qascii, 1,
1357 CHARSET_TYPE_94, 1, 0, 'B',
1358 CHARSET_LEFT_TO_RIGHT,
1359 build_string("ASCII"),
1360 build_string("ASCII)"),
1361 build_string("ASCII (ISO646 IRV)"),
1362 build_string("\\(iso8859-[0-9]*\\|-ascii\\)"));
1363 staticpro(&Vcharset_control_1);
1364 Vcharset_control_1 =
1365 make_charset(LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1366 CHARSET_TYPE_94, 1, 1, 0,
1367 CHARSET_LEFT_TO_RIGHT,
1369 build_string("Control characters"),
1370 build_string("Control characters 128-191"),
1372 staticpro(&Vcharset_latin_iso8859_1);
1373 Vcharset_latin_iso8859_1 =
1374 make_charset(LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
1375 CHARSET_TYPE_96, 1, 1, 'A',
1376 CHARSET_LEFT_TO_RIGHT,
1377 build_string("Latin-1"),
1378 build_string("ISO8859-1 (Latin-1)"),
1379 build_string("ISO8859-1 (Latin-1)"),
1380 build_string("iso8859-1"));
1381 staticpro(&Vcharset_latin_iso8859_2);
1382 Vcharset_latin_iso8859_2 =
1383 make_charset(LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
1384 CHARSET_TYPE_96, 1, 1, 'B',
1385 CHARSET_LEFT_TO_RIGHT,
1386 build_string("Latin-2"),
1387 build_string("ISO8859-2 (Latin-2)"),
1388 build_string("ISO8859-2 (Latin-2)"),
1389 build_string("iso8859-2"));
1390 staticpro(&Vcharset_latin_iso8859_3);
1391 Vcharset_latin_iso8859_3 =
1392 make_charset(LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
1393 CHARSET_TYPE_96, 1, 1, 'C',
1394 CHARSET_LEFT_TO_RIGHT,
1395 build_string("Latin-3"),
1396 build_string("ISO8859-3 (Latin-3)"),
1397 build_string("ISO8859-3 (Latin-3)"),
1398 build_string("iso8859-3"));
1399 staticpro(&Vcharset_latin_iso8859_4);
1400 Vcharset_latin_iso8859_4 =
1401 make_charset(LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
1402 CHARSET_TYPE_96, 1, 1, 'D',
1403 CHARSET_LEFT_TO_RIGHT,
1404 build_string("Latin-4"),
1405 build_string("ISO8859-4 (Latin-4)"),
1406 build_string("ISO8859-4 (Latin-4)"),
1407 build_string("iso8859-4"));
1408 staticpro(&Vcharset_thai_tis620);
1409 Vcharset_thai_tis620 =
1410 make_charset(LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
1411 CHARSET_TYPE_96, 1, 1, 'T',
1412 CHARSET_LEFT_TO_RIGHT,
1413 build_string("TIS620"),
1414 build_string("TIS620 (Thai)"),
1415 build_string("TIS620.2529 (Thai)"),
1416 build_string("tis620"));
1417 staticpro(&Vcharset_greek_iso8859_7);
1418 Vcharset_greek_iso8859_7 =
1419 make_charset(LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
1420 CHARSET_TYPE_96, 1, 1, 'F',
1421 CHARSET_LEFT_TO_RIGHT,
1422 build_string("ISO8859-7"),
1423 build_string("ISO8859-7 (Greek)"),
1424 build_string("ISO8859-7 (Greek)"),
1425 build_string("iso8859-7"));
1426 staticpro(&Vcharset_arabic_iso8859_6);
1427 Vcharset_arabic_iso8859_6 =
1428 make_charset(LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
1429 CHARSET_TYPE_96, 1, 1, 'G',
1430 CHARSET_RIGHT_TO_LEFT,
1431 build_string("ISO8859-6"),
1432 build_string("ISO8859-6 (Arabic)"),
1433 build_string("ISO8859-6 (Arabic)"),
1434 build_string("iso8859-6"));
1435 staticpro(&Vcharset_hebrew_iso8859_8);
1436 Vcharset_hebrew_iso8859_8 =
1437 make_charset(LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
1438 CHARSET_TYPE_96, 1, 1, 'H',
1439 CHARSET_RIGHT_TO_LEFT,
1440 build_string("ISO8859-8"),
1441 build_string("ISO8859-8 (Hebrew)"),
1442 build_string("ISO8859-8 (Hebrew)"),
1443 build_string("iso8859-8"));
1444 staticpro(&Vcharset_katakana_jisx0201);
1445 Vcharset_katakana_jisx0201 =
1446 make_charset(LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
1447 CHARSET_TYPE_94, 1, 1, 'I',
1448 CHARSET_LEFT_TO_RIGHT,
1449 build_string("JISX0201 Kana"),
1450 build_string("JISX0201.1976 (Japanese Kana)"),
1451 build_string("JISX0201.1976 Japanese Kana"),
1452 build_string("jisx0201.1976"));
1453 staticpro(&Vcharset_latin_jisx0201);
1454 Vcharset_latin_jisx0201 =
1455 make_charset(LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
1456 CHARSET_TYPE_94, 1, 0, 'J',
1457 CHARSET_LEFT_TO_RIGHT,
1458 build_string("JISX0201 Roman"),
1459 build_string("JISX0201.1976 (Japanese Roman)"),
1460 build_string("JISX0201.1976 Japanese Roman"),
1461 build_string("jisx0201.1976"));
1462 staticpro(&Vcharset_cyrillic_iso8859_5);
1463 Vcharset_cyrillic_iso8859_5 =
1464 make_charset(LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
1465 2, CHARSET_TYPE_96, 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
1466 build_string("ISO8859-5"),
1467 build_string("ISO8859-5 (Cyrillic)"),
1468 build_string("ISO8859-5 (Cyrillic)"),
1469 build_string("iso8859-5"));
1470 staticpro(&Vcharset_latin_iso8859_9);
1471 Vcharset_latin_iso8859_9 =
1472 make_charset(LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
1473 CHARSET_TYPE_96, 1, 1, 'M',
1474 CHARSET_LEFT_TO_RIGHT,
1475 build_string("Latin-5"),
1476 build_string("ISO8859-9 (Latin-5)"),
1477 build_string("ISO8859-9 (Latin-5)"),
1478 build_string("iso8859-9"));
1479 staticpro(&Vcharset_japanese_jisx0208_1978);
1480 Vcharset_japanese_jisx0208_1978 =
1481 make_charset(LEADING_BYTE_JAPANESE_JISX0208_1978,
1482 Qjapanese_jisx0208_1978, 3, CHARSET_TYPE_94X94, 2, 0,
1483 '@', CHARSET_LEFT_TO_RIGHT,
1484 build_string("JISX0208.1978"),
1485 build_string("JISX0208.1978 (Japanese)"),
1487 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
1488 build_string("\\(jisx0208\\|jisc6226\\)\\.1978"));
1489 staticpro(&Vcharset_chinese_gb2312);
1490 Vcharset_chinese_gb2312 =
1491 make_charset(LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
1492 CHARSET_TYPE_94X94, 2, 0, 'A',
1493 CHARSET_LEFT_TO_RIGHT,
1494 build_string("GB2312"),
1495 build_string("GB2312)"),
1496 build_string("GB2312 Chinese simplified"),
1497 build_string("gb2312"));
1498 staticpro(&Vcharset_japanese_jisx0208);
1499 Vcharset_japanese_jisx0208 =
1500 make_charset(LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
1501 CHARSET_TYPE_94X94, 2, 0, 'B',
1502 CHARSET_LEFT_TO_RIGHT,
1503 build_string("JISX0208"),
1504 build_string("JISX0208.1983/1990 (Japanese)"),
1505 build_string("JISX0208.1983/1990 Japanese Kanji"),
1506 build_string("jisx0208.19\\(83\\|90\\)"));
1507 staticpro(&Vcharset_korean_ksc5601);
1508 Vcharset_korean_ksc5601 =
1509 make_charset(LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
1510 CHARSET_TYPE_94X94, 2, 0, 'C',
1511 CHARSET_LEFT_TO_RIGHT,
1512 build_string("KSC5601"),
1513 build_string("KSC5601 (Korean"),
1514 build_string("KSC5601 Korean Hangul and Hanja"),
1515 build_string("ksc5601"));
1516 staticpro(&Vcharset_japanese_jisx0212);
1517 Vcharset_japanese_jisx0212 =
1518 make_charset(LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
1519 CHARSET_TYPE_94X94, 2, 0, 'D',
1520 CHARSET_LEFT_TO_RIGHT,
1521 build_string("JISX0212"),
1522 build_string("JISX0212 (Japanese)"),
1523 build_string("JISX0212 Japanese Supplement"),
1524 build_string("jisx0212"));
1526 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1527 staticpro(&Vcharset_chinese_cns11643_1);
1528 Vcharset_chinese_cns11643_1 =
1529 make_charset(LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
1530 3, CHARSET_TYPE_94X94, 2, 0, 'G',
1531 CHARSET_LEFT_TO_RIGHT, build_string("CNS11643-1"),
1532 build_string("CNS11643-1 (Chinese traditional)"),
1533 build_string("CNS 11643 Plane 1 Chinese traditional"),
1534 build_string(CHINESE_CNS_PLANE_RE("1")));
1535 staticpro(&Vcharset_chinese_cns11643_2);
1536 Vcharset_chinese_cns11643_2 =
1537 make_charset(LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
1538 3, CHARSET_TYPE_94X94, 2, 0, 'H',
1539 CHARSET_LEFT_TO_RIGHT, build_string("CNS11643-2"),
1540 build_string("CNS11643-2 (Chinese traditional)"),
1541 build_string("CNS 11643 Plane 2 Chinese traditional"),
1542 build_string(CHINESE_CNS_PLANE_RE("2")));
1543 staticpro(&Vcharset_chinese_big5_1);
1544 Vcharset_chinese_big5_1 =
1545 make_charset(LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
1546 CHARSET_TYPE_94X94, 2, 0, '0',
1547 CHARSET_LEFT_TO_RIGHT,
1548 build_string("Big5"),
1549 build_string("Big5 (Level-1)"),
1551 ("Big5 Level-1 Chinese traditional"),
1552 build_string("big5"));
1553 staticpro(&Vcharset_chinese_big5_2);
1554 Vcharset_chinese_big5_2 =
1555 make_charset(LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
1556 CHARSET_TYPE_94X94, 2, 0, '1',
1557 CHARSET_LEFT_TO_RIGHT,
1558 build_string("Big5"),
1559 build_string("Big5 (Level-2)"),
1561 ("Big5 Level-2 Chinese traditional"),
1562 build_string("big5"));
1564 #ifdef ENABLE_COMPOSITE_CHARS
1565 /* #### For simplicity, we put composite chars into a 96x96 charset.
1566 This is going to lead to problems because you can run out of
1567 room, esp. as we don't yet recycle numbers. */
1568 staticpro(&Vcharset_composite);
1569 Vcharset_composite =
1570 make_charset(LEADING_BYTE_COMPOSITE, Qcomposite, 3,
1571 CHARSET_TYPE_96X96, 2, 0, 0,
1572 CHARSET_LEFT_TO_RIGHT,
1573 build_string("Composite"),
1574 build_string("Composite characters"),
1575 build_string("Composite characters"),
1578 /* #### not dumped properly */
1579 composite_char_row_next = 32;
1580 composite_char_col_next = 32;
1582 Vcomposite_char_string2char_hash_table =
1583 make_lisp_hash_table(500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1584 Vcomposite_char_char2string_hash_table =
1585 make_lisp_hash_table(500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1586 staticpro(&Vcomposite_char_string2char_hash_table);
1587 staticpro(&Vcomposite_char_char2string_hash_table);
1588 #endif /* ENABLE_COMPOSITE_CHARS */