1 ;;; china-util.el --- utilities for Chinese
3 ;; Copyright (C) 1995,1999 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5 ;; Copyright (C) 1997 MORIOKA Tomohiko
7 ;; Keywords: mule, multilingual, Chinese
9 ;; This file is part of XEmacs.
11 ;; XEmacs is free software; you can redistribute it and/or modify it
12 ;; under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
16 ;; XEmacs is distributed in the hope that it will be useful, but
17 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 ;; General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs; see the file COPYING. If not, write to the Free
23 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
29 (defun setup-chinese-gb-environment-internal ()
31 (setq-default its:*current-map* (its:get-mode-map "PinYin")))
35 (defun setup-chinese-gb-environment ()
36 "Setup multilingual environment (MULE) for Chinese GB2312 users."
38 (set-language-environment "Chinese-GB"))
41 (defun setup-chinese-big5-environment ()
42 "Setup multilingual environment (MULE) for Chinese Big5 users."
44 (set-language-environment "Chinese-BIG5"))
47 ;; (defun setup-chinese-cns-environment ()
48 ;; "Setup multilingual environment (MULE) for Chinese CNS11643 family users."
50 ;; (set-language-environment "Chinese-CNS"))
52 ;; Hz/ZW encoding stuffs
54 ;; HZ is an encoding method for Chinese character set GB2312 used
55 ;; widely in Internet. It is very similar to 7-bit environment of
56 ;; ISO-2022. The difference is that HZ uses the sequence "~{" and
57 ;; "~}" for designating GB2312 and ASCII respectively, hence, it
58 ;; doesn't uses ESC (0x1B) code.
60 ;; ZW is another encoding method for Chinese character set GB2312. It
61 ;; encodes Chinese characters line by line by starting each line with
62 ;; the sequence "zW". It also uses only 7-bit as HZ.
64 ;; ISO-2022 escape sequence to designate GB2312.
65 (defvar iso2022-gb-designation "\e$A")
66 ;; HZ escape sequence to designate GB2312.
67 (defvar hz-gb-designnation "~{")
68 ;; ISO-2022 escape sequence to designate ASCII.
69 (defvar iso2022-ascii-designation "\e(B")
70 ;; HZ escape sequence to designate ASCII.
71 (defvar hz-ascii-designnation "~}")
72 ;; Regexp of ZW sequence to start GB2312.
73 (defvar zw-start-gb "^zW")
74 ;; Regexp for start of GB2312 in an encoding mixture of HZ and ZW.
75 (defvar hz/zw-start-gb
76 (concat hz-gb-designnation "\\|" zw-start-gb "\\|[^\0-\177]"))
78 (defvar decode-hz-line-continuation nil
79 "Flag to tell if we should care line continuation convention of Hz.")
81 (defconst hz-set-msb-table
82 (let ((str (make-string 127 0))
88 (aset str i (+ i 128))
93 (defun decode-hz-region (beg end)
94 "Decode HZ/ZW encoded text in the current region.
95 Return the length of resulting text."
100 (narrow-to-region beg end)
102 ;; We, at first, convert HZ/ZW to `cn-gb-2312',
105 ;; "~\n" -> "\n", "~~" -> "~"
106 (goto-char (point-min))
107 (while (search-forward "~" nil t)
108 (setq ch (following-char))
109 (if (or (= ch ?\n) (= ch ?~)) (delete-char -1)))
111 ;; "^zW...\n" -> Chinese GB2312
112 ;; "~{...~}" -> Chinese GB2312
113 (goto-char (point-min))
115 (while (re-search-forward hz/zw-start-gb nil t)
116 (setq pos (match-beginning 0)
118 ;; Record the first position to start conversion.
119 (or beg (setq beg pos))
122 (if (>= ch 128) ; 8bit GB2312
127 (if (= ch ?z) ; ZW -> cn-gb-2312
129 (translate-region (point) end hz-set-msb-table)
131 (if (search-forward hz-ascii-designnation
132 (if decode-hz-line-continuation nil end)
136 (translate-region pos (point) hz-set-msb-table))))
138 (decode-coding-region beg end 'cn-gb-2312)))
139 (- (point-max) (point-min)))))
142 (defun decode-hz-buffer ()
143 "Decode HZ/ZW encoded text in the current buffer."
145 (decode-hz-region (point-min) (point-max)))
148 (defun encode-hz-region (beg end)
149 "Encode the text in the current region to HZ.
150 Return the length of resulting text."
154 (narrow-to-region beg end)
157 (goto-char (point-min))
158 (while (search-forward "~" nil t) (insert ?~))
160 ;; Chinese GB2312 -> "~{...~}"
161 (goto-char (point-min))
162 (if (re-search-forward "\\cc" nil t)
164 (goto-char (setq pos (match-beginning 0)))
165 (encode-coding-region pos (point-max) 'iso-2022-7bit)
167 (while (search-forward iso2022-gb-designation nil t)
169 (insert hz-gb-designnation))
171 (while (search-forward iso2022-ascii-designation nil t)
173 (insert hz-ascii-designnation))))
174 (- (point-max) (point-min)))))
177 (defun encode-hz-buffer ()
178 "Encode the text in the current buffer to HZ."
180 (encode-hz-region (point-min) (point-max)))
183 (provide 'china-util)
185 ;;; china-util.el ends here