CID:226 - SECURE_CODING risky usage of strcpy
[sxemacs] / src / mule / mule-charset.c
1 /* Functions to handle multilingual characters.
2    Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3    Copyright (C) 1995 Sun Microsystems, Inc.
4
5 This file is part of SXEmacs
6
7 SXEmacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 SXEmacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program.  If not, see <http://www.gnu.org/licenses/>. */
19
20
21 /* Synched up with: FSF 20.3.  Not in FSF. */
22
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
24
25 #include <config.h>
26 #include "lisp.h"
27
28 #include "buffer.h"
29 #include "chartab.h"
30 #include "elhash.h"
31 #include "lstream.h"
32 #include "ui/device.h"
33 #include "ui/faces.h"
34 #include "mule-ccl.h"
35
36 /* The various pre-defined charsets. */
37
38 Lisp_Object Vcharset_ascii;
39 Lisp_Object Vcharset_control_1;
40 Lisp_Object Vcharset_latin_iso8859_1;
41 Lisp_Object Vcharset_latin_iso8859_2;
42 Lisp_Object Vcharset_latin_iso8859_3;
43 Lisp_Object Vcharset_latin_iso8859_4;
44 Lisp_Object Vcharset_thai_tis620;
45 Lisp_Object Vcharset_greek_iso8859_7;
46 Lisp_Object Vcharset_arabic_iso8859_6;
47 Lisp_Object Vcharset_hebrew_iso8859_8;
48 Lisp_Object Vcharset_katakana_jisx0201;
49 Lisp_Object Vcharset_latin_jisx0201;
50 Lisp_Object Vcharset_cyrillic_iso8859_5;
51 Lisp_Object Vcharset_latin_iso8859_9;
52 Lisp_Object Vcharset_japanese_jisx0208_1978;
53 Lisp_Object Vcharset_chinese_gb2312;
54 Lisp_Object Vcharset_japanese_jisx0208;
55 Lisp_Object Vcharset_korean_ksc5601;
56 Lisp_Object Vcharset_japanese_jisx0212;
57 Lisp_Object Vcharset_chinese_cns11643_1;
58 Lisp_Object Vcharset_chinese_cns11643_2;
59 Lisp_Object Vcharset_chinese_big5_1;
60 Lisp_Object Vcharset_chinese_big5_2;
61
62 #ifdef ENABLE_COMPOSITE_CHARS
63 Lisp_Object Vcharset_composite;
64
65 /* Hash tables for composite chars.  One maps string representing
66    composed chars to their equivalent chars; one goes the
67    other way. */
68 Lisp_Object Vcomposite_char_char2string_hash_table;
69 Lisp_Object Vcomposite_char_string2char_hash_table;
70
71 static int composite_char_row_next;
72 static int composite_char_col_next;
73
74 #endif                          /* ENABLE_COMPOSITE_CHARS */
75
76 struct charset_lookup *chlook;
77
78 static const struct lrecord_description charset_lookup_description_1[] = {
79         {XD_LISP_OBJECT_ARRAY,
80          offsetof(struct charset_lookup, charset_by_leading_byte),
81          128 + 4 * 128 * 2},
82         {XD_END}
83 };
84
85 static const struct struct_description charset_lookup_description = {
86         sizeof(struct charset_lookup),
87         charset_lookup_description_1
88 };
89
90 /* Table of number of bytes in the string representation of a character
91    indexed by the first byte of that representation.
92
93    rep_bytes_by_first_byte(c) is more efficient than the equivalent
94    canonical computation:
95
96    XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
97
98 const Bytecount rep_bytes_by_first_byte[0xA0] = {       /* 0x00 - 0x7f are for straight ASCII */
99         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
101         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
102         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
103         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
104         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
105         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
106         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107         /* 0x80 - 0x8f are for Dimension-1 official charsets */
108         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109         /* 0x90 - 0x9d are for Dimension-2 official charsets */
110         /* 0x9e is for Dimension-1 private charsets */
111         /* 0x9f is for Dimension-2 private charsets */
112         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
113 };
114
115 Lisp_Object Qcharsetp;
116
117 /* Qdoc_string, Qdimension, Qchars defined in general.c */
118 Lisp_Object Qregistry, Qfinal, Qgraphic;
119 Lisp_Object Qdirection;
120 Lisp_Object Qreverse_direction_charset;
121 Lisp_Object Qleading_byte;
122 Lisp_Object Qshort_name, Qlong_name;
123
124 Lisp_Object Qascii,
125     Qcontrol_1,
126     Qlatin_iso8859_1,
127     Qlatin_iso8859_2,
128     Qlatin_iso8859_3,
129     Qlatin_iso8859_4,
130     Qthai_tis620,
131     Qgreek_iso8859_7,
132     Qarabic_iso8859_6,
133     Qhebrew_iso8859_8,
134     Qkatakana_jisx0201,
135     Qlatin_jisx0201,
136     Qcyrillic_iso8859_5,
137     Qlatin_iso8859_9,
138     Qjapanese_jisx0208_1978,
139     Qchinese_gb2312,
140     Qjapanese_jisx0208,
141     Qkorean_ksc5601,
142     Qjapanese_jisx0212,
143     Qchinese_cns11643_1,
144     Qchinese_cns11643_2, Qchinese_big5_1, Qchinese_big5_2, Qcomposite;
145
146 Lisp_Object Ql2r, Qr2l;
147
148 Lisp_Object Vcharset_hash_table;
149
150 /* Composite characters are characters constructed by overstriking two
151    or more regular characters.
152
153    1) The old Mule implementation involves storing composite characters
154       in a buffer as a tag followed by all of the actual characters
155       used to make up the composite character.  I think this is a bad
156       idea; it greatly complicates code that wants to handle strings
157       one character at a time because it has to deal with the possibility
158       of great big ungainly characters.  It's much more reasonable to
159       simply store an index into a table of composite characters.
160
161    2) The current implementation only allows for 16,384 separate
162       composite characters over the lifetime of the SXEmacs process.
163       This could become a potential problem if the user
164       edited lots of different files that use composite characters.
165       Due to FSF bogosity, increasing the number of allowable
166       composite characters under Mule would decrease the number
167       of possible faces that can exist.  Mule already has shrunk
168       this to 2048, and further shrinkage would become uncomfortable.
169       No such problems exist in SXEmacs.
170
171       Composite characters could be represented as 0x80 C1 C2 C3,
172       where each C[1-3] is in the range 0xA0 - 0xFF.  This allows
173       for slightly under 2^20 (one million) composite characters
174       over the SXEmacs process lifetime, and you only need to
175       increase the size of a Mule character from 19 to 21 bits.
176       Or you could use 0x80 C1 C2 C3 C4, allowing for about
177       85 million (slightly over 2^26) composite characters. */
178 \f
179 /************************************************************************/
180 /*                       Basic Emchar functions                         */
181 /************************************************************************/
182
183 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
184    string in STR.  Returns the number of bytes stored.
185    Do not call this directly.  Use the macro set_charptr_emchar() instead.
186  */
187
188 Bytecount non_ascii_set_charptr_emchar(Bufbyte * str, Emchar c)
189 {
190         Bufbyte *p;
191         Bufbyte lb;
192         int c1, c2;
193         Lisp_Object charset;
194
195         p = str;
196         BREAKUP_CHAR(c, charset, c1, c2);
197         lb = CHAR_LEADING_BYTE(c);
198         if (LEADING_BYTE_PRIVATE_P(lb))
199                 *p++ = PRIVATE_LEADING_BYTE_PREFIX(lb);
200         *p++ = lb;
201         if (EQ(charset, Vcharset_control_1))
202                 c1 += 0x20;
203         *p++ = c1 | 0x80;
204         if (c2)
205                 *p++ = c2 | 0x80;
206
207         return (p - str);
208 }
209
210 /* Return the first character from a Mule-encoded string in STR,
211    assuming it's non-ASCII.  Do not call this directly.
212    Use the macro charptr_emchar() instead. */
213
214 Emchar non_ascii_charptr_emchar(const Bufbyte * str)
215 {
216         Bufbyte i0 = *str, i1, i2 = 0;
217         Lisp_Object charset;
218
219         if (i0 == LEADING_BYTE_CONTROL_1)
220                 return (Emchar) (*++str - 0x20);
221
222         if (LEADING_BYTE_PREFIX_P(i0))
223                 i0 = *++str;
224
225         i1 = *++str & 0x7F;
226
227         charset = CHARSET_BY_LEADING_BYTE(i0);
228         if (XCHARSET_DIMENSION(charset) == 2)
229                 i2 = *++str & 0x7F;
230
231         return MAKE_CHAR(charset, i1, i2);
232 }
233
234 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
235    Do not call this directly.  Use the macro valid_char_p() instead. */
236
237 int non_ascii_valid_char_p(Emchar ch)
238 {
239         int f1, f2, f3;
240
241         /* Must have only lowest 19 bits set */
242         if (ch & ~0x7FFFF)
243                 return 0;
244
245         f1 = CHAR_FIELD1(ch);
246         f2 = CHAR_FIELD2(ch);
247         f3 = CHAR_FIELD3(ch);
248
249         if (f1 == 0) {
250                 Lisp_Object charset;
251
252                 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
253                     (f2 > MAX_CHAR_FIELD2_OFFICIAL
254                      && f2 < MIN_CHAR_FIELD2_PRIVATE)
255                     || f2 > MAX_CHAR_FIELD2_PRIVATE)
256                         return 0;
257                 if (f3 < 0x20)
258                         return 0;
259
260                 if (f3 != 0x20 && f3 != 0x7F
261                     && !(f2 >= MIN_CHAR_FIELD2_PRIVATE
262                          && f2 <= MAX_CHAR_FIELD2_PRIVATE))
263                         return 1;
264
265                 /*
266                    NOTE: This takes advantage of the fact that
267                    FIELD2_TO_OFFICIAL_LEADING_BYTE and
268                    FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
269                  */
270                 charset =
271                     CHARSET_BY_LEADING_BYTE(f2 +
272                                             FIELD2_TO_OFFICIAL_LEADING_BYTE);
273                 if (EQ(charset, Qnil))
274                         return 0;
275                 return (XCHARSET_CHARS(charset) == 96);
276         } else {
277                 Lisp_Object charset;
278
279                 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
280                     (f1 > MAX_CHAR_FIELD1_OFFICIAL
281                      && f1 < MIN_CHAR_FIELD1_PRIVATE)
282                     || f1 > MAX_CHAR_FIELD1_PRIVATE)
283                         return 0;
284                 if (f2 < 0x20 || f3 < 0x20)
285                         return 0;
286
287 #ifdef ENABLE_COMPOSITE_CHARS
288                 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE ==
289                     LEADING_BYTE_COMPOSITE) {
290                         if (UNBOUNDP
291                             (Fgethash
292                              (make_int(ch),
293                               Vcomposite_char_char2string_hash_table,
294                               Qunbound)))
295                                 return 0;
296                         return 1;
297                 }
298 #endif                          /* ENABLE_COMPOSITE_CHARS */
299
300                 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
301                     && !(f1 >= MIN_CHAR_FIELD1_PRIVATE
302                          && f1 <= MAX_CHAR_FIELD1_PRIVATE))
303                         return 1;
304
305                 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
306                         charset =
307                             CHARSET_BY_LEADING_BYTE(f1 +
308                                                     FIELD1_TO_OFFICIAL_LEADING_BYTE);
309                 else
310                         charset =
311                             CHARSET_BY_LEADING_BYTE(f1 +
312                                                     FIELD1_TO_PRIVATE_LEADING_BYTE);
313
314                 if (EQ(charset, Qnil))
315                         return 0;
316                 return (XCHARSET_CHARS(charset) == 96);
317         }
318 }
319 \f
320 /************************************************************************/
321 /*                       Basic string functions                         */
322 /************************************************************************/
323
324 /* Copy the character pointed to by SRC into DST.  Do not call this
325    directly.  Use the macro charptr_copy_char() instead.
326    Return the number of bytes copied.  */
327
328 Bytecount non_ascii_charptr_copy_char(const Bufbyte * src, Bufbyte * dst)
329 {
330         unsigned int bytes = REP_BYTES_BY_FIRST_BYTE(*src);
331         unsigned int i;
332         for (i = bytes; i; i--, dst++, src++)
333                 *dst = *src;
334         return bytes;
335 }
336 \f
337 /************************************************************************/
338 /*                        streams of Emchars                            */
339 /************************************************************************/
340
341 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
342    The functions below are not meant to be called directly; use
343    the macros in insdel.h. */
344
345 Emchar Lstream_get_emchar_1(Lstream * stream, int ch)
346 {
347         Bufbyte str[MAX_EMCHAR_LEN];
348         Bufbyte *strptr = str;
349         unsigned int bytes;
350
351         str[0] = (Bufbyte) ch;
352
353         for (bytes = REP_BYTES_BY_FIRST_BYTE(ch) - 1; bytes; bytes--) {
354                 int c = Lstream_getc(stream);
355                 bufpos_checking_assert(c >= 0);
356                 *++strptr = (Bufbyte) c;
357         }
358         return charptr_emchar(str);
359 }
360
361 int Lstream_fput_emchar(Lstream * stream, Emchar ch)
362 {
363         Bufbyte str[MAX_EMCHAR_LEN];
364         Bytecount len = set_charptr_emchar(str, ch);
365         return Lstream_write(stream, str, len);
366 }
367
368 void Lstream_funget_emchar(Lstream * stream, Emchar ch)
369 {
370         Bufbyte str[MAX_EMCHAR_LEN];
371         Bytecount len = set_charptr_emchar(str, ch);
372         Lstream_unread(stream, str, len);
373 }
374 \f
375 /************************************************************************/
376 /*                            charset object                            */
377 /************************************************************************/
378
379 static Lisp_Object mark_charset(Lisp_Object obj)
380 {
381         Lisp_Charset *cs = XCHARSET(obj);
382
383         mark_object(cs->short_name);
384         mark_object(cs->long_name);
385         mark_object(cs->doc_string);
386         mark_object(cs->registry);
387         mark_object(cs->ccl_program);
388         return cs->name;
389 }
390
391 static void
392 print_charset(Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
393 {
394         Lisp_Charset *cs = XCHARSET(obj);
395         char buf[200];
396
397         if (print_readably)
398                 error("printing unreadable object #<charset %s 0x%x>",
399                       string_data(XSYMBOL(CHARSET_NAME(cs))->name),
400                       cs->header.uid);
401
402         write_c_string("#<charset ", printcharfun);
403         print_internal(CHARSET_NAME(cs), printcharfun, 0);
404         write_c_string(" ", printcharfun);
405         print_internal(CHARSET_SHORT_NAME(cs), printcharfun, 1);
406         write_c_string(" ", printcharfun);
407         print_internal(CHARSET_LONG_NAME(cs), printcharfun, 1);
408         write_c_string(" ", printcharfun);
409         print_internal(CHARSET_DOC_STRING(cs), printcharfun, 1);
410         sprintf(buf, " %s %s cols=%d g%d final='%c' reg=",
411                 CHARSET_TYPE(cs) == CHARSET_TYPE_94 ? "94" :
412                 CHARSET_TYPE(cs) == CHARSET_TYPE_96 ? "96" :
413                 CHARSET_TYPE(cs) == CHARSET_TYPE_94X94 ? "94x94" :
414                 "96x96",
415                 CHARSET_DIRECTION(cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
416                 CHARSET_COLUMNS(cs), CHARSET_GRAPHIC(cs), CHARSET_FINAL(cs));
417         write_c_string(buf, printcharfun);
418         print_internal(CHARSET_REGISTRY(cs), printcharfun, 0);
419         sprintf(buf, " 0x%x>", cs->header.uid);
420         write_c_string(buf, printcharfun);
421 }
422
423 static const struct lrecord_description charset_description[] = {
424         {XD_LISP_OBJECT, offsetof(Lisp_Charset, name)},
425         {XD_LISP_OBJECT, offsetof(Lisp_Charset, doc_string)},
426         {XD_LISP_OBJECT, offsetof(Lisp_Charset, registry)},
427         {XD_LISP_OBJECT, offsetof(Lisp_Charset, short_name)},
428         {XD_LISP_OBJECT, offsetof(Lisp_Charset, long_name)},
429         {XD_LISP_OBJECT, offsetof(Lisp_Charset, reverse_direction_charset)},
430         {XD_LISP_OBJECT, offsetof(Lisp_Charset, ccl_program)},
431         {XD_END}
432 };
433
434 DEFINE_LRECORD_IMPLEMENTATION("charset", charset,
435                               mark_charset, print_charset, 0, 0, 0,
436                               charset_description, Lisp_Charset);
437
438 /* Make a new charset. */
439 /* #### SJT Should generic properties be allowed? */
440 static Lisp_Object
441 make_charset(int id, Lisp_Object name, unsigned char rep_bytes,
442              unsigned char type, unsigned char columns, unsigned char graphic,
443              Bufbyte final, unsigned char direction, Lisp_Object short_name,
444              Lisp_Object long_name, Lisp_Object doc, Lisp_Object reg)
445 {
446         Lisp_Object obj;
447         Lisp_Charset *cs = alloc_lcrecord_type(Lisp_Charset, &lrecord_charset);
448
449         zero_lcrecord(cs);
450
451         XSETCHARSET(obj, cs);
452
453         CHARSET_ID(cs) = id;
454         CHARSET_NAME(cs) = name;
455         CHARSET_SHORT_NAME(cs) = short_name;
456         CHARSET_LONG_NAME(cs) = long_name;
457         CHARSET_REP_BYTES(cs) = rep_bytes;
458         CHARSET_DIRECTION(cs) = direction;
459         CHARSET_TYPE(cs) = type;
460         CHARSET_COLUMNS(cs) = columns;
461         CHARSET_GRAPHIC(cs) = graphic;
462         CHARSET_FINAL(cs) = final;
463         CHARSET_DOC_STRING(cs) = doc;
464         CHARSET_REGISTRY(cs) = reg;
465         CHARSET_CCL_PROGRAM(cs) = Qnil;
466         CHARSET_REVERSE_DIRECTION_CHARSET(cs) = Qnil;
467
468         CHARSET_DIMENSION(cs) = (CHARSET_TYPE(cs) == CHARSET_TYPE_94 ||
469                                  CHARSET_TYPE(cs) == CHARSET_TYPE_96) ? 1 : 2;
470         CHARSET_CHARS(cs) = (CHARSET_TYPE(cs) == CHARSET_TYPE_94 ||
471                              CHARSET_TYPE(cs) == CHARSET_TYPE_94X94) ? 94 : 96;
472
473         if (final) {
474                 /* some charsets do not have final characters.  This includes
475                    ASCII, Control-1, Composite, and the two faux private
476                    charsets. */
477                 assert(NILP
478                        (chlook->charset_by_attributes[type][final][direction]));
479                 chlook->charset_by_attributes[type][final][direction] = obj;
480         }
481
482         assert(NILP(chlook->charset_by_leading_byte[id - 128]));
483         chlook->charset_by_leading_byte[id - 128] = obj;
484
485         /* Some charsets are "faux" and don't have names or really exist at
486            all except in the leading-byte table. */
487         if (!NILP(name))
488                 Fputhash(name, obj, Vcharset_hash_table);
489         return obj;
490 }
491
492 static int get_unallocated_leading_byte(int dimension)
493 {
494         int lb;
495
496         if (dimension == 1) {
497                 if (chlook->next_allocated_1_byte_leading_byte >
498                     (Bufbyte)MAX_LEADING_BYTE_PRIVATE_1)
499                         lb = 0;
500                 else
501                         lb = chlook->next_allocated_1_byte_leading_byte++;
502 #if MAX_LEADING_BYTE_PRIVATE_2 == 255
503         } else if (chlook->next_allocated_2_byte_leading_byte == 0) {
504                 lb = 0;
505 #else
506         } else if (chlook->next_allocated_2_byte_leading_byte >
507                    (Bufbyte)MAX_LEADING_BYTE_PRIVATE_2) {
508                 lb = 0;
509 #endif
510         } else {
511                         lb = chlook->next_allocated_2_byte_leading_byte++;
512         }
513
514         if (!lb)
515                 signal_simple_error
516                     ("No more character sets free for this dimension",
517                      make_int(dimension));
518
519         return lb;
520 }
521 \f
522 /************************************************************************/
523 /*                      Basic charset Lisp functions                    */
524 /************************************************************************/
525
526 DEFUN("charsetp", Fcharsetp, 1, 1, 0,   /*
527 Return non-nil if OBJECT is a charset.
528 */
529       (object))
530 {
531         return CHARSETP(object) ? Qt : Qnil;
532 }
533
534 DEFUN("find-charset", Ffind_charset, 1, 1, 0,   /*
535 Retrieve the charset of the given name.
536 If CHARSET-OR-NAME is a charset object, it is simply returned.
537 Otherwise, CHARSET-OR-NAME should be a symbol.  If there is no such charset,
538 nil is returned.  Otherwise the associated charset object is returned.
539 */
540       (charset_or_name))
541 {
542         if (CHARSETP(charset_or_name))
543                 return charset_or_name;
544
545         CHECK_SYMBOL(charset_or_name);
546         return Fgethash(charset_or_name, Vcharset_hash_table, Qnil);
547 }
548
549 DEFUN("get-charset", Fget_charset, 1, 1, 0,     /*
550 Retrieve the charset of the given name.
551 Same as `find-charset' except an error is signalled if there is no such
552 charset instead of returning nil.
553 */
554       (name))
555 {
556         Lisp_Object charset = Ffind_charset(name);
557
558         if (NILP(charset))
559                 signal_simple_error("No such charset", name);
560         return charset;
561 }
562
563 /* We store the charsets in hash tables with the names as the key and the
564    actual charset object as the value.  Occasionally we need to use them
565    in a list format.  These routines provide us with that. */
566 struct charset_list_closure {
567         Lisp_Object *charset_list;
568 };
569
570 static int
571 add_charset_to_list_mapper(Lisp_Object key, Lisp_Object value,
572                            void *charset_list_closure)
573 {
574         /* This function can GC */
575         struct charset_list_closure *chcl =
576             (struct charset_list_closure *)charset_list_closure;
577         Lisp_Object *charset_list = chcl->charset_list;
578
579         *charset_list = Fcons(XCHARSET_NAME(value), *charset_list);
580         return 0;
581 }
582
583 DEFUN("charset-list", Fcharset_list, 0, 0, 0,   /*
584 Return a list of the names of all defined charsets.
585 */
586       ())
587 {
588         Lisp_Object charset_list = Qnil;
589         struct gcpro gcpro1;
590         struct charset_list_closure charset_list_closure;
591
592         GCPRO1(charset_list);
593         charset_list_closure.charset_list = &charset_list;
594         elisp_maphash(add_charset_to_list_mapper, Vcharset_hash_table,
595                       &charset_list_closure);
596         UNGCPRO;
597
598         return charset_list;
599 }
600
601 DEFUN("charset-name", Fcharset_name, 1, 1, 0,   /*
602 Return the name of charset CHARSET.
603 */
604       (charset))
605 {
606         Lisp_Object tmp = Fget_charset(charset);
607         return XCHARSET_NAME(tmp);
608 }
609
610 /* #### SJT Should generic properties be allowed? */
611 DEFUN("make-charset", Fmake_charset, 3, 3, 0,   /*
612 Define a new character set.
613 This function is for use with Mule support.
614 NAME is a symbol, the name by which the character set is normally referred.
615 DOC-STRING is a string describing the character set.
616 PROPS is a property list, describing the specific nature of the
617 character set.  Recognized properties are:
618
619 'short-name   Short version of the charset name (ex: Latin-1)
620 'long-name    Long version of the charset name (ex: ISO8859-1 (Latin-1))
621 'registry     A regular expression matching the font registry field for
622 this character set.
623 'dimension    Number of octets used to index a character in this charset.
624 Either 1 or 2.  Defaults to 1.
625 'columns      Number of columns used to display a character in this charset.
626 Only used in TTY mode. (Under X, the actual width of a
627 character can be derived from the font used to display the
628 characters.) If unspecified, defaults to the dimension
629 (this is almost       always the correct value).
630 'chars                Number of characters in each dimension (94 or 96).
631 Defaults to 94.  Note that if the dimension is 2, the
632 character set thus described is 94x94 or 96x96.
633 'final                Final byte of ISO 2022 escape sequence.  Must be
634 supplied.  Each combination of (DIMENSION, CHARS) defines a
635 separate namespace for final bytes.  Note that ISO
636 2022 restricts the final byte to the range
637 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
638 dimension == 2.  Note also that final bytes in the range
639 0x30 - 0x3F are reserved for user-defined (not official)
640 character sets.
641 'graphic      0 (use left half of font on output) or 1 (use right half
642 of font on output).  Defaults to 0.  For example, for
643 a font whose registry is ISO8859-1, the left half
644 (octets 0x20 - 0x7F) is the `ascii' character set, while
645 the right half (octets 0xA0 - 0xFF) is the `latin-1'
646 character set.  With 'graphic set to 0, the octets
647 will have their high bit cleared; with it set to 1,
648 the octets will have their high bit set.
649 'direction    'l2r (left-to-right) or 'r2l (right-to-left).
650 Defaults to 'l2r.
651 'ccl-program  A compiled CCL program used to convert a character in
652 this charset into an index into the font.  This is in
653 addition to the 'graphic property.  The CCL program
654 is passed the octets of the character, with the high
655 bit cleared and set depending upon whether the value
656 of the 'graphic property is 0 or 1.
657 */
658       (name, doc_string, props))
659 {
660         int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
661         int direction = CHARSET_LEFT_TO_RIGHT;
662         int type;
663         Lisp_Object registry = Qnil;
664         Lisp_Object charset;
665         Lisp_Object ccl_program = Qnil;
666         Lisp_Object short_name = Qnil, long_name = Qnil;
667
668         CHECK_SYMBOL(name);
669         if (!NILP(doc_string))
670                 CHECK_STRING(doc_string);
671
672         charset = Ffind_charset(name);
673         if (!NILP(charset))
674                 signal_simple_error("Cannot redefine existing charset", name);
675
676         {
677                 EXTERNAL_PROPERTY_LIST_LOOP_3(keyword, value, props) {
678                         if (EQ(keyword, Qshort_name)) {
679                                 CHECK_STRING(value);
680                                 short_name = value;
681                         }
682
683                         else if (EQ(keyword, Qlong_name)) {
684                                 CHECK_STRING(value);
685                                 long_name = value;
686                         }
687
688                         else if (EQ(keyword, Qdimension)) {
689                                 CHECK_INT(value);
690                                 dimension = XINT(value);
691                                 if (dimension < 1 || dimension > 2)
692                                         signal_simple_error
693                                             ("Invalid value for 'dimension",
694                                              value);
695                         }
696
697                         else if (EQ(keyword, Qchars)) {
698                                 CHECK_INT(value);
699                                 chars = XINT(value);
700                                 if (chars != 94 && chars != 96)
701                                         signal_simple_error
702                                             ("Invalid value for 'chars", value);
703                         }
704
705                         else if (EQ(keyword, Qcolumns)) {
706                                 CHECK_INT(value);
707                                 columns = XINT(value);
708                                 if (columns != 1 && columns != 2)
709                                         signal_simple_error
710                                             ("Invalid value for 'columns",
711                                              value);
712                         }
713
714                         else if (EQ(keyword, Qgraphic)) {
715                                 CHECK_INT(value);
716                                 graphic = XINT(value);
717                                 if (graphic < 0 || graphic > 1)
718                                         signal_simple_error
719                                             ("Invalid value for 'graphic",
720                                              value);
721                         }
722
723                         else if (EQ(keyword, Qregistry)) {
724                                 CHECK_STRING(value);
725                                 registry = value;
726                         }
727
728                         else if (EQ(keyword, Qdirection)) {
729                                 if (EQ(value, Ql2r))
730                                         direction = CHARSET_LEFT_TO_RIGHT;
731                                 else if (EQ(value, Qr2l))
732                                         direction = CHARSET_RIGHT_TO_LEFT;
733                                 else
734                                         signal_simple_error
735                                             ("Invalid value for 'direction",
736                                              value);
737                         }
738
739                         else if (EQ(keyword, Qfinal)) {
740                                 CHECK_CHAR_COERCE_INT(value);
741                                 final = XCHAR(value);
742                                 if (final < '0' || final > '~')
743                                         signal_simple_error
744                                             ("Invalid value for 'final", value);
745                         }
746
747                         else if (EQ(keyword, Qccl_program)) {
748                                 struct ccl_program test_ccl;
749
750                                 if (setup_ccl_program(&test_ccl, value) < 0)
751                                         signal_simple_error
752                                             ("Invalid value for 'ccl-program",
753                                              value);
754                                 ccl_program = value;
755                         }
756
757                         else
758                                 signal_simple_error("Unrecognized property",
759                                                     keyword);
760                 }
761         }
762
763         if (!final)
764                 error("'final must be specified");
765         if (dimension == 2 && final > 0x5F)
766                 signal_simple_error
767                     ("Final must be in the range 0x30 - 0x5F for dimension == 2",
768                      make_char(final));
769
770         if (dimension == 1)
771                 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
772         else
773                 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
774
775         if (!NILP(CHARSET_BY_ATTRIBUTES(type, final, CHARSET_LEFT_TO_RIGHT)) ||
776             !NILP(CHARSET_BY_ATTRIBUTES(type, final, CHARSET_RIGHT_TO_LEFT)))
777                 error
778                     ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
779
780         id = get_unallocated_leading_byte(dimension);
781
782         if (NILP(doc_string))
783                 doc_string = build_string("");
784
785         if (NILP(registry))
786                 registry = build_string("");
787
788         if (NILP(short_name))
789                 XSETSTRING(short_name, XSYMBOL(name)->name);
790
791         if (NILP(long_name))
792                 long_name = doc_string;
793
794         if (columns == -1)
795                 columns = dimension;
796         charset = make_charset(id, name, dimension + 2, type, columns, graphic,
797                                final, direction, short_name, long_name,
798                                doc_string, registry);
799         if (!NILP(ccl_program))
800                 XCHARSET_CCL_PROGRAM(charset) = ccl_program;
801         return charset;
802 }
803
804 DEFUN("make-reverse-direction-charset", Fmake_reverse_direction_charset, 2, 2, 0,       /*
805 Make a charset equivalent to CHARSET but which goes in the opposite direction.
806 NEW-NAME is the name of the new charset.  Return the new charset.
807 */
808       (charset, new_name))
809 {
810         Lisp_Object new_charset = Qnil;
811         int id, dimension, columns, graphic, final;
812         int direction, type;
813         Lisp_Object registry, doc_string, short_name, long_name;
814         Lisp_Charset *cs;
815
816         charset = Fget_charset(charset);
817         if (!NILP(XCHARSET_REVERSE_DIRECTION_CHARSET(charset)))
818                 signal_simple_error
819                     ("Charset already has reverse-direction charset", charset);
820
821         CHECK_SYMBOL(new_name);
822         if (!NILP(Ffind_charset(new_name)))
823                 signal_simple_error("Cannot redefine existing charset",
824                                     new_name);
825
826         cs = XCHARSET(charset);
827
828         type = CHARSET_TYPE(cs);
829         columns = CHARSET_COLUMNS(cs);
830         dimension = CHARSET_DIMENSION(cs);
831         id = get_unallocated_leading_byte(dimension);
832
833         graphic = CHARSET_GRAPHIC(cs);
834         final = CHARSET_FINAL(cs);
835         direction = CHARSET_RIGHT_TO_LEFT;
836         if (CHARSET_DIRECTION(cs) == CHARSET_RIGHT_TO_LEFT)
837                 direction = CHARSET_LEFT_TO_RIGHT;
838         doc_string = CHARSET_DOC_STRING(cs);
839         short_name = CHARSET_SHORT_NAME(cs);
840         long_name = CHARSET_LONG_NAME(cs);
841         registry = CHARSET_REGISTRY(cs);
842
843         new_charset = make_charset(id, new_name, dimension + 2, type, columns,
844                                    graphic, final, direction, short_name,
845                                    long_name, doc_string, registry);
846
847         CHARSET_REVERSE_DIRECTION_CHARSET(cs) = new_charset;
848         XCHARSET_REVERSE_DIRECTION_CHARSET(new_charset) = charset;
849
850         return new_charset;
851 }
852
853 /* #### Reverse direction charsets not yet implemented.  */
854 #if 0
855 DEFUN("charset-reverse-direction-charset", Fcharset_reverse_direction_charset, 1, 1, 0, /*
856 Return the reverse-direction charset parallel to CHARSET, if any.
857 This is the charset with the same properties (in particular, the same
858 dimension, number of characters per dimension, and final byte) as
859 CHARSET but whose characters are displayed in the opposite direction.
860 */
861       (charset))
862 {
863         charset = Fget_charset(charset);
864         return XCHARSET_REVERSE_DIRECTION_CHARSET(charset);
865 }
866 #endif
867
868 DEFUN("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0,     /*
869 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
870 If DIRECTION is omitted, both directions will be checked (left-to-right
871 will be returned if character sets exist for both directions).
872 */
873       (dimension, chars, final, direction))
874 {
875         int dm, ch, fi, di = -1;
876         int type;
877         Lisp_Object obj = Qnil;
878
879         CHECK_INT(dimension);
880         dm = XINT(dimension);
881         if (dm < 1 || dm > 2)
882                 signal_simple_error("Invalid value for DIMENSION", dimension);
883
884         CHECK_INT(chars);
885         ch = XINT(chars);
886         if (ch != 94 && ch != 96)
887                 signal_simple_error("Invalid value for CHARS", chars);
888
889         CHECK_CHAR_COERCE_INT(final);
890         fi = XCHAR(final);
891         if (fi < '0' || fi > '~')
892                 signal_simple_error("Invalid value for FINAL", final);
893
894         if (EQ(direction, Ql2r))
895                 di = CHARSET_LEFT_TO_RIGHT;
896         else if (EQ(direction, Qr2l))
897                 di = CHARSET_RIGHT_TO_LEFT;
898         else if (!NILP(direction))
899                 signal_simple_error("Invalid value for DIRECTION", direction);
900
901         if (dm == 2 && fi > 0x5F)
902                 signal_simple_error
903                     ("Final must be in the range 0x30 - 0x5F for dimension == 2",
904                      final);
905
906         if (dm == 1)
907                 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
908         else
909                 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
910
911         if (di == -1) {
912                 obj = CHARSET_BY_ATTRIBUTES(type, fi, CHARSET_LEFT_TO_RIGHT);
913                 if (NILP(obj))
914                         obj =
915                             CHARSET_BY_ATTRIBUTES(type, fi,
916                                                   CHARSET_RIGHT_TO_LEFT);
917         } else
918                 obj = CHARSET_BY_ATTRIBUTES(type, fi, di);
919
920         if (CHARSETP(obj))
921                 return XCHARSET_NAME(obj);
922         return obj;
923 }
924
925 DEFUN("charset-short-name", Fcharset_short_name, 1, 1, 0,       /*
926 Return short name of CHARSET.
927 */
928       (charset))
929 {
930         Lisp_Object tmp = Fget_charset(charset);
931         return XCHARSET_SHORT_NAME(tmp);
932 }
933
934 DEFUN("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
935 Return long name of CHARSET.
936 */
937       (charset))
938 {
939         Lisp_Object tmp = Fget_charset(charset);
940         return XCHARSET_LONG_NAME(tmp);
941 }
942
943 DEFUN("charset-description", Fcharset_description, 1, 1, 0,     /*
944 Return description of CHARSET.
945 */
946       (charset))
947 {
948         Lisp_Object tmp = Fget_charset(charset);
949         return XCHARSET_DOC_STRING(tmp);
950 }
951
952 DEFUN("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
953 Return dimension of CHARSET.
954 */
955       (charset))
956 {
957         Lisp_Object tmp = Fget_charset(charset);
958         return make_int(XCHARSET_DIMENSION(tmp));
959 }
960
961 DEFUN("charset-property", Fcharset_property, 2, 2, 0,   /*
962 Return property PROP of CHARSET, a charset object or symbol naming a charset.
963 Recognized properties are those listed in `make-charset', as well as
964 'name and 'doc-string.
965 */
966       (charset, prop))
967 {
968         Lisp_Charset *cs;
969
970         charset = Fget_charset(charset);
971         cs = XCHARSET(charset);
972
973         CHECK_SYMBOL(prop);
974         if (EQ(prop, Qname))
975                 return CHARSET_NAME(cs);
976         if (EQ(prop, Qshort_name))
977                 return CHARSET_SHORT_NAME(cs);
978         if (EQ(prop, Qlong_name))
979                 return CHARSET_LONG_NAME(cs);
980         if (EQ(prop, Qdoc_string))
981                 return CHARSET_DOC_STRING(cs);
982         if (EQ(prop, Qdimension))
983                 return make_int(CHARSET_DIMENSION(cs));
984         if (EQ(prop, Qcolumns))
985                 return make_int(CHARSET_COLUMNS(cs));
986         if (EQ(prop, Qgraphic))
987                 return make_int(CHARSET_GRAPHIC(cs));
988         if (EQ(prop, Qfinal))
989                 return make_char(CHARSET_FINAL(cs));
990         if (EQ(prop, Qchars))
991                 return make_int(CHARSET_CHARS(cs));
992         if (EQ(prop, Qregistry))
993                 return CHARSET_REGISTRY(cs);
994         if (EQ(prop, Qccl_program))
995                 return CHARSET_CCL_PROGRAM(cs);
996         if (EQ(prop, Qdirection))
997                 return CHARSET_DIRECTION(cs) ==
998                     CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
999         if (EQ(prop, Qreverse_direction_charset)) {
1000                 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET(cs);
1001                 /* #### Is this translation OK?  If so, error checking sufficient? */
1002                 return CHARSETP(obj) ? XCHARSET_NAME(obj) : obj;
1003         }
1004         signal_simple_error("Unrecognized charset property name", prop);
1005         return Qnil;            /* not reached */
1006 }
1007
1008 DEFUN("charset-id", Fcharset_id, 1, 1, 0,       /*
1009 Return charset identification number of CHARSET.
1010 */
1011       (charset))
1012 {
1013         Lisp_Object tmp = Fget_charset(charset);
1014         return make_int(XCHARSET_LEADING_BYTE(tmp));
1015 }
1016
1017 /* #### We need to figure out which properties we really want to
1018    allow to be set. */
1019
1020 DEFUN("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0,     /*
1021 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1022 */
1023       (charset, ccl_program))
1024 {
1025         struct ccl_program test_ccl;
1026
1027         charset = Fget_charset(charset);
1028         if (setup_ccl_program(&test_ccl, ccl_program) < 0)
1029                 signal_simple_error("Invalid ccl-program", ccl_program);
1030         XCHARSET_CCL_PROGRAM(charset) = ccl_program;
1031         return Qnil;
1032 }
1033
1034 static void invalidate_charset_font_caches(Lisp_Object charset)
1035 {
1036         /* Invalidate font cache entries for charset on all devices. */
1037         Lisp_Object devcons, concons, hash_table;
1038         DEVICE_LOOP_NO_BREAK(devcons, concons) {
1039                 struct device *d = XDEVICE(XCAR(devcons));
1040                 hash_table = Fgethash(charset, d->charset_font_cache, Qunbound);
1041                 if (!UNBOUNDP(hash_table))
1042                         Fclrhash(hash_table);
1043         }
1044 }
1045
1046 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
1047 DEFUN("set-charset-registry", Fset_charset_registry, 2, 2, 0,   /*
1048 Set the 'registry property of CHARSET to REGISTRY.
1049 */
1050       (charset, registry))
1051 {
1052         charset = Fget_charset(charset);
1053         CHECK_STRING(registry);
1054         XCHARSET_REGISTRY(charset) = registry;
1055         invalidate_charset_font_caches(charset);
1056         face_property_was_changed(Vdefault_face, Qfont, Qglobal);
1057         return Qnil;
1058 }
1059 \f
1060 /************************************************************************/
1061 /*              Lisp primitives for working with characters             */
1062 /************************************************************************/
1063
1064 DEFUN("make-char", Fmake_char, 2, 3, 0, /*
1065 Make a character from CHARSET and octets ARG1 and ARG2.
1066 ARG2 is required only for characters from two-dimensional charsets.
1067 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1068 character s with caron.
1069 */
1070       (charset, arg1, arg2))
1071 {
1072         Lisp_Charset *cs;
1073         int a1, a2;
1074         int lowlim, highlim;
1075
1076         charset = Fget_charset(charset);
1077         cs = XCHARSET(charset);
1078
1079         if (EQ(charset, Vcharset_ascii))
1080                 lowlim = 0, highlim = 127;
1081         else if (EQ(charset, Vcharset_control_1))
1082                 lowlim = 0, highlim = 31;
1083         else if (CHARSET_CHARS(cs) == 94)
1084                 lowlim = 33, highlim = 126;
1085         else                    /* CHARSET_CHARS (cs) == 96) */
1086                 lowlim = 32, highlim = 127;
1087
1088         CHECK_INT(arg1);
1089         /* It is useful (and safe, according to Olivier Galibert) to strip
1090            the 8th bit off ARG1 and ARG2 because it allows programmers to
1091            write (make-char 'latin-iso8859-2 CODE) where code is the actual
1092            Latin 2 code of the character.  */
1093         a1 = XINT(arg1) & 0x7f;
1094         if (a1 < lowlim || a1 > highlim)
1095                 args_out_of_range_3(arg1, make_int(lowlim), make_int(highlim));
1096
1097         if (CHARSET_DIMENSION(cs) == 1) {
1098                 if (!NILP(arg2))
1099                         signal_simple_error
1100                             ("Charset is of dimension one; second octet must be nil",
1101                              arg2);
1102                 return make_char(MAKE_CHAR(charset, a1, 0));
1103         }
1104
1105         CHECK_INT(arg2);
1106         a2 = XINT(arg2) & 0x7f;
1107         if (a2 < lowlim || a2 > highlim)
1108                 args_out_of_range_3(arg2, make_int(lowlim), make_int(highlim));
1109
1110         return make_char(MAKE_CHAR(charset, a1, a2));
1111 }
1112
1113 DEFUN("char-charset", Fchar_charset, 1, 1, 0,   /*
1114 Return the character set of CHARACTER.
1115 */
1116       (character))
1117 {
1118         Lisp_Object tmp;
1119
1120         CHECK_CHAR_COERCE_INT(character);
1121
1122         tmp = CHARSET_BY_LEADING_BYTE(CHAR_LEADING_BYTE(XCHAR(character)));
1123         return XCHARSET_NAME(tmp);
1124 }
1125
1126 DEFUN("char-octet", Fchar_octet, 1, 2, 0,       /*
1127 Return the octet numbered N (should be 0 or 1) of CHARACTER.
1128 N defaults to 0 if omitted.
1129 */
1130       (character, n))
1131 {
1132         Lisp_Object charset;
1133         int octet0, octet1;
1134
1135         CHECK_CHAR_COERCE_INT(character);
1136
1137         BREAKUP_CHAR(XCHAR(character), charset, octet0, octet1);
1138
1139         if (NILP(n) || EQ(n, Qzero))
1140                 return make_int(octet0);
1141         else if (EQ(n, make_int(1)))
1142                 return make_int(octet1);
1143         else
1144                 signal_simple_error("Octet number must be 0 or 1", n);
1145 }
1146
1147 DEFUN("split-char", Fsplit_char, 1, 1, 0,       /*
1148 Return list of charset and one or two position-codes of CHARACTER.
1149 */
1150       (character))
1151 {
1152         /* This function can GC */
1153         struct gcpro gcpro1, gcpro2;
1154         Lisp_Object charset = Qnil;
1155         Lisp_Object rc = Qnil;
1156         int c1, c2;
1157         Lisp_Object tmp;
1158
1159         GCPRO2(charset, rc);
1160         CHECK_CHAR_COERCE_INT(character);
1161
1162         BREAKUP_CHAR(XCHAR(character), charset, c1, c2);
1163
1164         tmp = Fget_charset(charset);
1165         if (XCHARSET_DIMENSION(tmp) == 2) {
1166                 rc = list3(XCHARSET_NAME(charset), make_int(c1), make_int(c2));
1167         } else {
1168                 rc = list2(XCHARSET_NAME(charset), make_int(c1));
1169         }
1170         UNGCPRO;
1171
1172         return rc;
1173 }
1174 \f
1175 #ifdef ENABLE_COMPOSITE_CHARS
1176 /************************************************************************/
1177 /*                     composite character functions                    */
1178 /************************************************************************/
1179
1180 Emchar lookup_composite_char(Bufbyte * str, int len)
1181 {
1182         Lisp_Object lispstr = make_string(str, len);
1183         Lisp_Object ch = Fgethash(lispstr,
1184                                   Vcomposite_char_string2char_hash_table,
1185                                   Qunbound);
1186         Emchar emch;
1187
1188         if (UNBOUNDP(ch)) {
1189                 if (composite_char_row_next >= 128)
1190                         signal_simple_error("No more composite chars available",
1191                                             lispstr);
1192                 emch =
1193                     MAKE_CHAR(Vcharset_composite, composite_char_row_next,
1194                               composite_char_col_next);
1195                 Fputhash(make_char(emch), lispstr,
1196                          Vcomposite_char_char2string_hash_table);
1197                 Fputhash(lispstr, make_char(emch),
1198                          Vcomposite_char_string2char_hash_table);
1199                 composite_char_col_next++;
1200                 if (composite_char_col_next >= 128) {
1201                         composite_char_col_next = 32;
1202                         composite_char_row_next++;
1203                 }
1204         } else
1205                 emch = XCHAR(ch);
1206         return emch;
1207 }
1208
1209 Lisp_Object composite_char_string(Emchar ch)
1210 {
1211         Lisp_Object str = Fgethash(make_char(ch),
1212                                    Vcomposite_char_char2string_hash_table,
1213                                    Qunbound);
1214         assert(!UNBOUNDP(str));
1215         return str;
1216 }
1217
1218 xxDEFUN("make-composite-char", Fmake_composite_char, 1, 1, 0,   /*
1219 Convert a string into a single composite character.
1220 The character is the result of overstriking all the characters in
1221 the string.
1222                                                                  */
1223         (string))
1224 {
1225         CHECK_STRING(string);
1226         return make_char(lookup_composite_char(XSTRING_DATA(string),
1227                                                XSTRING_LENGTH(string)));
1228 }
1229
1230 xxDEFUN("composite-char-string", Fcomposite_char_string, 1, 1, 0,       /*
1231 Return a string of the characters comprising a composite character.
1232                                                                          */
1233         (ch))
1234 {
1235         Emchar emch;
1236
1237         CHECK_CHAR(ch);
1238         emch = XCHAR(ch);
1239         if (CHAR_LEADING_BYTE(emch) != LEADING_BYTE_COMPOSITE)
1240                 signal_simple_error("Must be composite char", ch);
1241         return composite_char_string(emch);
1242 }
1243 #endif                          /* ENABLE_COMPOSITE_CHARS */
1244 \f
1245 /************************************************************************/
1246 /*                            initialization                            */
1247 /************************************************************************/
1248
1249 void syms_of_mule_charset(void)
1250 {
1251         INIT_LRECORD_IMPLEMENTATION(charset);
1252
1253         DEFSUBR(Fcharsetp);
1254         DEFSUBR(Ffind_charset);
1255         DEFSUBR(Fget_charset);
1256         DEFSUBR(Fcharset_list);
1257         DEFSUBR(Fcharset_name);
1258         DEFSUBR(Fmake_charset);
1259         DEFSUBR(Fmake_reverse_direction_charset);
1260         /*  DEFSUBR (Freverse_direction_charset); */
1261         DEFSUBR(Fcharset_from_attributes);
1262         DEFSUBR(Fcharset_short_name);
1263         DEFSUBR(Fcharset_long_name);
1264         DEFSUBR(Fcharset_description);
1265         DEFSUBR(Fcharset_dimension);
1266         DEFSUBR(Fcharset_property);
1267         DEFSUBR(Fcharset_id);
1268         DEFSUBR(Fset_charset_ccl_program);
1269         DEFSUBR(Fset_charset_registry);
1270
1271         DEFSUBR(Fmake_char);
1272         DEFSUBR(Fchar_charset);
1273         DEFSUBR(Fchar_octet);
1274         DEFSUBR(Fsplit_char);
1275
1276 #ifdef ENABLE_COMPOSITE_CHARS
1277         DEFSUBR(Fmake_composite_char);
1278         DEFSUBR(Fcomposite_char_string);
1279 #endif
1280
1281         defsymbol(&Qcharsetp, "charsetp");
1282         defsymbol(&Qregistry, "registry");
1283         defsymbol(&Qfinal, "final");
1284         defsymbol(&Qgraphic, "graphic");
1285         defsymbol(&Qdirection, "direction");
1286         defsymbol(&Qreverse_direction_charset, "reverse-direction-charset");
1287         defsymbol(&Qshort_name, "short-name");
1288         defsymbol(&Qlong_name, "long-name");
1289
1290         defsymbol(&Ql2r, "l2r");
1291         defsymbol(&Qr2l, "r2l");
1292
1293         /* Charsets, compatible with FSF 20.3
1294            Naming convention is Script-Charset[-Edition] */
1295         defsymbol(&Qascii, "ascii");
1296         defsymbol(&Qcontrol_1, "control-1");
1297         defsymbol(&Qlatin_iso8859_1, "latin-iso8859-1");
1298         defsymbol(&Qlatin_iso8859_2, "latin-iso8859-2");
1299         defsymbol(&Qlatin_iso8859_3, "latin-iso8859-3");
1300         defsymbol(&Qlatin_iso8859_4, "latin-iso8859-4");
1301         defsymbol(&Qthai_tis620, "thai-tis620");
1302         defsymbol(&Qgreek_iso8859_7, "greek-iso8859-7");
1303         defsymbol(&Qarabic_iso8859_6, "arabic-iso8859-6");
1304         defsymbol(&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1305         defsymbol(&Qkatakana_jisx0201, "katakana-jisx0201");
1306         defsymbol(&Qlatin_jisx0201, "latin-jisx0201");
1307         defsymbol(&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1308         defsymbol(&Qlatin_iso8859_9, "latin-iso8859-9");
1309         defsymbol(&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1310         defsymbol(&Qchinese_gb2312, "chinese-gb2312");
1311         defsymbol(&Qjapanese_jisx0208, "japanese-jisx0208");
1312         defsymbol(&Qkorean_ksc5601, "korean-ksc5601");
1313         defsymbol(&Qjapanese_jisx0212, "japanese-jisx0212");
1314         defsymbol(&Qchinese_cns11643_1, "chinese-cns11643-1");
1315         defsymbol(&Qchinese_cns11643_2, "chinese-cns11643-2");
1316         defsymbol(&Qchinese_big5_1, "chinese-big5-1");
1317         defsymbol(&Qchinese_big5_2, "chinese-big5-2");
1318
1319         defsymbol(&Qcomposite, "composite");
1320 }
1321
1322 void vars_of_mule_charset(void)
1323 {
1324         int i, j, k;
1325
1326         chlook = xnew_and_zero(struct charset_lookup);  /* zero for Purify. */
1327         dump_add_root_struct_ptr(&chlook, &charset_lookup_description);
1328
1329         /* Table of charsets indexed by leading byte. */
1330         for (i = 0; i < countof(chlook->charset_by_leading_byte); i++)
1331                 chlook->charset_by_leading_byte[i] = Qnil;
1332
1333         /* Table of charsets indexed by type/final-byte/direction. */
1334         for (i = 0; i < countof(chlook->charset_by_attributes); i++)
1335                 for (j = 0; j < countof(chlook->charset_by_attributes[0]); j++)
1336                         for (k = 0;
1337                              k < countof(chlook->charset_by_attributes[0][0]);
1338                              k++)
1339                                 chlook->charset_by_attributes[i][j][k] = Qnil;
1340
1341         chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1342         chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1343 }
1344
1345 void complex_vars_of_mule_charset(void)
1346 {
1347         staticpro(&Vcharset_hash_table);
1348         Vcharset_hash_table =
1349             make_lisp_hash_table(50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1350
1351         /* Predefined character sets.  We store them into variables for
1352            ease of access. */
1353
1354         staticpro(&Vcharset_ascii);
1355         Vcharset_ascii =
1356             make_charset(LEADING_BYTE_ASCII, Qascii, 1,
1357                          CHARSET_TYPE_94, 1, 0, 'B',
1358                          CHARSET_LEFT_TO_RIGHT,
1359                          build_string("ASCII"),
1360                          build_string("ASCII)"),
1361                          build_string("ASCII (ISO646 IRV)"),
1362                          build_string("\\(iso8859-[0-9]*\\|-ascii\\)"));
1363         staticpro(&Vcharset_control_1);
1364         Vcharset_control_1 =
1365             make_charset(LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1366                          CHARSET_TYPE_94, 1, 1, 0,
1367                          CHARSET_LEFT_TO_RIGHT,
1368                          build_string("C1"),
1369                          build_string("Control characters"),
1370                          build_string("Control characters 128-191"),
1371                          build_string(""));
1372         staticpro(&Vcharset_latin_iso8859_1);
1373         Vcharset_latin_iso8859_1 =
1374             make_charset(LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
1375                          CHARSET_TYPE_96, 1, 1, 'A',
1376                          CHARSET_LEFT_TO_RIGHT,
1377                          build_string("Latin-1"),
1378                          build_string("ISO8859-1 (Latin-1)"),
1379                          build_string("ISO8859-1 (Latin-1)"),
1380                          build_string("iso8859-1"));
1381         staticpro(&Vcharset_latin_iso8859_2);
1382         Vcharset_latin_iso8859_2 =
1383             make_charset(LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
1384                          CHARSET_TYPE_96, 1, 1, 'B',
1385                          CHARSET_LEFT_TO_RIGHT,
1386                          build_string("Latin-2"),
1387                          build_string("ISO8859-2 (Latin-2)"),
1388                          build_string("ISO8859-2 (Latin-2)"),
1389                          build_string("iso8859-2"));
1390         staticpro(&Vcharset_latin_iso8859_3);
1391         Vcharset_latin_iso8859_3 =
1392             make_charset(LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
1393                          CHARSET_TYPE_96, 1, 1, 'C',
1394                          CHARSET_LEFT_TO_RIGHT,
1395                          build_string("Latin-3"),
1396                          build_string("ISO8859-3 (Latin-3)"),
1397                          build_string("ISO8859-3 (Latin-3)"),
1398                          build_string("iso8859-3"));
1399         staticpro(&Vcharset_latin_iso8859_4);
1400         Vcharset_latin_iso8859_4 =
1401             make_charset(LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
1402                          CHARSET_TYPE_96, 1, 1, 'D',
1403                          CHARSET_LEFT_TO_RIGHT,
1404                          build_string("Latin-4"),
1405                          build_string("ISO8859-4 (Latin-4)"),
1406                          build_string("ISO8859-4 (Latin-4)"),
1407                          build_string("iso8859-4"));
1408         staticpro(&Vcharset_thai_tis620);
1409         Vcharset_thai_tis620 =
1410             make_charset(LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
1411                          CHARSET_TYPE_96, 1, 1, 'T',
1412                          CHARSET_LEFT_TO_RIGHT,
1413                          build_string("TIS620"),
1414                          build_string("TIS620 (Thai)"),
1415                          build_string("TIS620.2529 (Thai)"),
1416                          build_string("tis620"));
1417         staticpro(&Vcharset_greek_iso8859_7);
1418         Vcharset_greek_iso8859_7 =
1419             make_charset(LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
1420                          CHARSET_TYPE_96, 1, 1, 'F',
1421                          CHARSET_LEFT_TO_RIGHT,
1422                          build_string("ISO8859-7"),
1423                          build_string("ISO8859-7 (Greek)"),
1424                          build_string("ISO8859-7 (Greek)"),
1425                          build_string("iso8859-7"));
1426         staticpro(&Vcharset_arabic_iso8859_6);
1427         Vcharset_arabic_iso8859_6 =
1428             make_charset(LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
1429                          CHARSET_TYPE_96, 1, 1, 'G',
1430                          CHARSET_RIGHT_TO_LEFT,
1431                          build_string("ISO8859-6"),
1432                          build_string("ISO8859-6 (Arabic)"),
1433                          build_string("ISO8859-6 (Arabic)"),
1434                          build_string("iso8859-6"));
1435         staticpro(&Vcharset_hebrew_iso8859_8);
1436         Vcharset_hebrew_iso8859_8 =
1437             make_charset(LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
1438                          CHARSET_TYPE_96, 1, 1, 'H',
1439                          CHARSET_RIGHT_TO_LEFT,
1440                          build_string("ISO8859-8"),
1441                          build_string("ISO8859-8 (Hebrew)"),
1442                          build_string("ISO8859-8 (Hebrew)"),
1443                          build_string("iso8859-8"));
1444         staticpro(&Vcharset_katakana_jisx0201);
1445         Vcharset_katakana_jisx0201 =
1446             make_charset(LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
1447                          CHARSET_TYPE_94, 1, 1, 'I',
1448                          CHARSET_LEFT_TO_RIGHT,
1449                          build_string("JISX0201 Kana"),
1450                          build_string("JISX0201.1976 (Japanese Kana)"),
1451                          build_string("JISX0201.1976 Japanese Kana"),
1452                          build_string("jisx0201.1976"));
1453         staticpro(&Vcharset_latin_jisx0201);
1454         Vcharset_latin_jisx0201 =
1455             make_charset(LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
1456                          CHARSET_TYPE_94, 1, 0, 'J',
1457                          CHARSET_LEFT_TO_RIGHT,
1458                          build_string("JISX0201 Roman"),
1459                          build_string("JISX0201.1976 (Japanese Roman)"),
1460                          build_string("JISX0201.1976 Japanese Roman"),
1461                          build_string("jisx0201.1976"));
1462         staticpro(&Vcharset_cyrillic_iso8859_5);
1463         Vcharset_cyrillic_iso8859_5 =
1464             make_charset(LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
1465                          2, CHARSET_TYPE_96, 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
1466                          build_string("ISO8859-5"),
1467                          build_string("ISO8859-5 (Cyrillic)"),
1468                          build_string("ISO8859-5 (Cyrillic)"),
1469                          build_string("iso8859-5"));
1470         staticpro(&Vcharset_latin_iso8859_9);
1471         Vcharset_latin_iso8859_9 =
1472             make_charset(LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
1473                          CHARSET_TYPE_96, 1, 1, 'M',
1474                          CHARSET_LEFT_TO_RIGHT,
1475                          build_string("Latin-5"),
1476                          build_string("ISO8859-9 (Latin-5)"),
1477                          build_string("ISO8859-9 (Latin-5)"),
1478                          build_string("iso8859-9"));
1479         staticpro(&Vcharset_japanese_jisx0208_1978);
1480         Vcharset_japanese_jisx0208_1978 =
1481             make_charset(LEADING_BYTE_JAPANESE_JISX0208_1978,
1482                          Qjapanese_jisx0208_1978, 3, CHARSET_TYPE_94X94, 2, 0,
1483                          '@', CHARSET_LEFT_TO_RIGHT,
1484                          build_string("JISX0208.1978"),
1485                          build_string("JISX0208.1978 (Japanese)"),
1486                          build_string
1487                          ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
1488                          build_string("\\(jisx0208\\|jisc6226\\)\\.1978"));
1489         staticpro(&Vcharset_chinese_gb2312);
1490         Vcharset_chinese_gb2312 =
1491             make_charset(LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
1492                          CHARSET_TYPE_94X94, 2, 0, 'A',
1493                          CHARSET_LEFT_TO_RIGHT,
1494                          build_string("GB2312"),
1495                          build_string("GB2312)"),
1496                          build_string("GB2312 Chinese simplified"),
1497                          build_string("gb2312"));
1498         staticpro(&Vcharset_japanese_jisx0208);
1499         Vcharset_japanese_jisx0208 =
1500             make_charset(LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
1501                          CHARSET_TYPE_94X94, 2, 0, 'B',
1502                          CHARSET_LEFT_TO_RIGHT,
1503                          build_string("JISX0208"),
1504                          build_string("JISX0208.1983/1990 (Japanese)"),
1505                          build_string("JISX0208.1983/1990 Japanese Kanji"),
1506                          build_string("jisx0208.19\\(83\\|90\\)"));
1507         staticpro(&Vcharset_korean_ksc5601);
1508         Vcharset_korean_ksc5601 =
1509             make_charset(LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
1510                          CHARSET_TYPE_94X94, 2, 0, 'C',
1511                          CHARSET_LEFT_TO_RIGHT,
1512                          build_string("KSC5601"),
1513                          build_string("KSC5601 (Korean"),
1514                          build_string("KSC5601 Korean Hangul and Hanja"),
1515                          build_string("ksc5601"));
1516         staticpro(&Vcharset_japanese_jisx0212);
1517         Vcharset_japanese_jisx0212 =
1518             make_charset(LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
1519                          CHARSET_TYPE_94X94, 2, 0, 'D',
1520                          CHARSET_LEFT_TO_RIGHT,
1521                          build_string("JISX0212"),
1522                          build_string("JISX0212 (Japanese)"),
1523                          build_string("JISX0212 Japanese Supplement"),
1524                          build_string("jisx0212"));
1525
1526 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1527         staticpro(&Vcharset_chinese_cns11643_1);
1528         Vcharset_chinese_cns11643_1 =
1529             make_charset(LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
1530                          3, CHARSET_TYPE_94X94, 2, 0, 'G',
1531                          CHARSET_LEFT_TO_RIGHT, build_string("CNS11643-1"),
1532                          build_string("CNS11643-1 (Chinese traditional)"),
1533                          build_string("CNS 11643 Plane 1 Chinese traditional"),
1534                          build_string(CHINESE_CNS_PLANE_RE("1")));
1535         staticpro(&Vcharset_chinese_cns11643_2);
1536         Vcharset_chinese_cns11643_2 =
1537             make_charset(LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
1538                          3, CHARSET_TYPE_94X94, 2, 0, 'H',
1539                          CHARSET_LEFT_TO_RIGHT, build_string("CNS11643-2"),
1540                          build_string("CNS11643-2 (Chinese traditional)"),
1541                          build_string("CNS 11643 Plane 2 Chinese traditional"),
1542                          build_string(CHINESE_CNS_PLANE_RE("2")));
1543         staticpro(&Vcharset_chinese_big5_1);
1544         Vcharset_chinese_big5_1 =
1545             make_charset(LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
1546                          CHARSET_TYPE_94X94, 2, 0, '0',
1547                          CHARSET_LEFT_TO_RIGHT,
1548                          build_string("Big5"),
1549                          build_string("Big5 (Level-1)"),
1550                          build_string
1551                          ("Big5 Level-1 Chinese traditional"),
1552                          build_string("big5"));
1553         staticpro(&Vcharset_chinese_big5_2);
1554         Vcharset_chinese_big5_2 =
1555             make_charset(LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
1556                          CHARSET_TYPE_94X94, 2, 0, '1',
1557                          CHARSET_LEFT_TO_RIGHT,
1558                          build_string("Big5"),
1559                          build_string("Big5 (Level-2)"),
1560                          build_string
1561                          ("Big5 Level-2 Chinese traditional"),
1562                          build_string("big5"));
1563
1564 #ifdef ENABLE_COMPOSITE_CHARS
1565         /* #### For simplicity, we put composite chars into a 96x96 charset.
1566            This is going to lead to problems because you can run out of
1567            room, esp. as we don't yet recycle numbers. */
1568         staticpro(&Vcharset_composite);
1569         Vcharset_composite =
1570             make_charset(LEADING_BYTE_COMPOSITE, Qcomposite, 3,
1571                          CHARSET_TYPE_96X96, 2, 0, 0,
1572                          CHARSET_LEFT_TO_RIGHT,
1573                          build_string("Composite"),
1574                          build_string("Composite characters"),
1575                          build_string("Composite characters"),
1576                          build_string(""));
1577
1578         /* #### not dumped properly */
1579         composite_char_row_next = 32;
1580         composite_char_col_next = 32;
1581
1582         Vcomposite_char_string2char_hash_table =
1583             make_lisp_hash_table(500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1584         Vcomposite_char_char2string_hash_table =
1585             make_lisp_hash_table(500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1586         staticpro(&Vcomposite_char_string2char_hash_table);
1587         staticpro(&Vcomposite_char_char2string_hash_table);
1588 #endif                          /* ENABLE_COMPOSITE_CHARS */
1589
1590 }