1 ;;; thai-xtis-util.el --- utilities for Thai (for XTIS).
3 ;; Copyright (C) 1999 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5 ;; Copyright (C) 1999 NECTEC, Thai.
7 ;; Author: TAKAHASHI Naoto <ntakahas@etl.go.jp>
8 ;; Ken'ichi HANDA <handa@etl.go.jp>
9 ;; Virach Sornlertlamvanich <virach@links.nectec.or.th>
10 ;; MORIOKA Tomohiko <tomo@etl.go.jp>
12 ;; Keywords: mule, multilingual, Thai, XTIS
14 ;; This file is part of XEmacs.
16 ;; XEmacs is free software; you can redistribute it and/or modify it
17 ;; under the terms of the GNU General Public License as published by
18 ;; the Free Software Foundation; either version 2, or (at your option)
21 ;; XEmacs is distributed in the hope that it will be useful, but
22 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
23 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 ;; General Public License for more details.
26 ;; You should have received a copy of the GNU General Public License
27 ;; along with XEmacs; see the file COPYING. If not, write to the Free
28 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
33 ;; For Thai, the pre-composed character set proposed by
34 ;; Virach Sornlertlamvanich <virach@links.nectec.or.th> is supported.
41 ;; (defun setup-thai-xtis-environment ()
42 ;; "Setup multilingual environment for Thai-XTIS."
44 ;; (set-language-environment "Thai-XTIS"))
47 ;; (defun exit-thai-xtis-environment ()
48 ;; "Exit Thai-XTIS environment."
49 ;; ;; (thai-xtis-text-mode nil)
52 ;;; Utilities for ThaiText minor mode
54 ;; Generic character for Thai character set.
55 (defvar thai-xtis-generic-char
56 (if (featurep 'xemacs)
58 (make-char 'thai-xtis)))
60 ;; Regular expression matching any single Thai character.
61 (defvar thai-xtis-char-regexp "\\cx")
63 (defvar thai-xtis-text-mode nil "Non-nil if using Thai text minor mode.")
64 (make-variable-buffer-local 'thai-xtis-text-mode)
66 (defvar thai-xtis-text-mode-map
67 (let ((map (make-sparse-keymap)))
68 (define-key map "\M-f" 'thai-xtis-forward-word)
69 (define-key map "\M-b" 'thai-xtis-backward-word)
70 (define-key map "\M-d" 'thai-xtis-kill-word)
71 (define-key map "\M-\177" 'thai-xtis-backward-kill-word)
72 (define-key map "\M-t" 'thai-xtis-transpose-words)
73 (cond ((featurep 'xemacs)
74 (define-key map [(meta backspace)] 'thai-xtis-backward-kill-word)
75 (define-key map [(meta delete)] 'thai-xtis-backward-kill-word)
76 (define-key map [(meta right)] 'thai-xtis-forward-word)
77 (define-key map [(meta left)] 'thai-xtis-backward-word)
78 (define-key map [(control right)] 'thai-xtis-forward-word)
79 (define-key map [(control left)] 'thai-xtis-backward-word)
80 (define-key map [(control delete)] 'thai-xtis-backward-kill-word)
83 (define-key map [M-right] 'thai-xtis-forward-word)
84 (define-key map [M-left] 'thai-xtis-backward-word)
85 (define-key map [C-right] 'thai-xtis-forward-word)
86 (define-key map [C-left] 'thai-xtis-backward-word)
87 (define-key map [C-delete] 'thai-xtis-backward-kill-word)
90 ;; Character base operations.
91 (define-key map "\177" 'thai-xtis-backward-delete-char)
92 (define-key map [backspace] 'thai-xtis-backward-delete-char)
94 "Keymap for Thai Text minor mode.")
96 (defvar thai-xtis-prev-auto-fill-function nil)
97 (make-variable-buffer-local 'thai-xtis-prev-auto-fill-function)
99 (defvar thai-xtis-prev-normal-auto-fill-function nil)
100 (make-variable-buffer-local 'thai-xtis-prev-normal-auto-fill-function)
103 (defun thai-xtis-text-mode (&optional arg)
104 "Minor mode for Thai text that pays attention to word segmentation.
106 In this mode, word-oriented commands (e.g forward-word) and text
107 filling commands (e.g. fill-paragraph) recognize Thai word boundaries
108 within a sequence of Thai characters."
109 (interactive (list (not thai-xtis-text-mode)))
110 (setq thai-xtis-text-mode arg)
111 (if thai-xtis-text-mode
113 ;; Setup ThaiText mode.
114 (make-local-variable 'auto-fill-chars)
115 ;; (setq auto-fill-chars (copy-sequence auto-fill-chars))
116 ;; (aset auto-fill-chars thai-xtis-generic-char t)
117 (setq thai-xtis-prev-auto-fill-function 'auto-fill-function)
118 (make-local-variable 'auto-fill-function)
119 (setq auto-fill-function 'thai-xtis-do-auto-fill)
120 (setq thai-xtis-prev-normal-auto-fill-function
121 'normal-auto-fill-function)
122 (setq normal-auto-fill-function 'thai-xtis-do-auto-fill)
123 (make-local-variable 'sentence-end-without-period)
124 (setq sentence-end-without-period t)
125 (set-category-table (copy-category-table))
126 ;; (modify-category-entry thai-xtis-generic-char ?|)
127 (put-charset-property 'thai-xtis 'fill-find-break-point-function
128 'thai-xtis-find-break-point)
129 (put-charset-property 'thai-xtis 'nospace-between-words t)
130 (make-local-variable 'before-change-functions)
131 (setq before-change-functions
132 (cons 'thai-xtis-wordseg-overlay-modification-function
133 before-change-functions))
135 (kill-local-variable 'auto-fill-chars)
136 (kill-local-variable 'sentence-end-without-period)
137 (kill-local-variable 'before-change-functions)
138 (setq auto-fill-function thai-xtis-prev-auto-fill-function)
139 (set-category-table (standard-category-table))
140 (put-charset-property 'thai-xtis 'fill-find-break-point-function nil)
141 (put-charset-property 'thai-xtis 'nospace-between-words nil)
143 (force-mode-line-update))
145 (cond ((featurep 'xemacs)
146 (add-minor-mode 'thai-xtis-text-mode
148 thai-xtis-text-mode-map
150 'thai-xtis-text-mode)
154 (set-alist 'minor-mode-alist
157 (set-alist 'minor-mode-map-alist
159 thai-xtis-text-mode-map)
162 ;;; Thai wordseg program interface.
164 (defvar thai-xtis-wordseg-program
165 "/usr/local/bin/wordseg"
166 "*Program name of Thai word segmentor.
167 This program reads a Thai word from stdin,
168 and writes segmented words (separated by a space) to stdout.")
170 (defvar thai-xtis-wordseg-data "/usr/local/lib/wordseg"
171 "*Directory of data used by `thai-xtis-wordseg-program'.")
173 (defvar thai-xtis-wordseg-args (list "mule" "-d" thai-xtis-wordseg-data)
174 "List of arguments for the program `thai-xtis-wordseg-program'.")
176 (defconst thai-xtis-wordseg-service 6750
177 "Service name of port number for Thai word segmentor network service.
178 If a program specified in `thai-xtis-wordseg-program' is not available
179 on your machine, this service will be used.")
181 (defvar thai-xtis-wordseg-server "localhost"
182 "*Host name for Thai word segmentor network service.")
184 (defvar thai-xtis-wordseg-coding-system
186 "Coding system used to communicate with `thai-xtis-wordseg-program'.")
189 (defvar thai-xtis-wordseg-proc nil)
190 ;; String to accumulate data sent from wordseg.
191 (defvar thai-xtis-wordseg-buf nil)
192 ;; Flag to tell that data sent from wordseg is ready in
193 ;; thai-xtis-wordseg-buf.
194 (defvar thai-xtis-wordseg-ready nil)
196 ;; Function to call when data from wordseg arrives at Emacs.
197 (defun thai-xtis-wordseg-filter (proc str)
198 (setq thai-xtis-wordseg-buf (concat thai-xtis-wordseg-buf str))
199 (if (string-match "\n" thai-xtis-wordseg-buf)
200 (setq thai-xtis-wordseg-ready t)))
202 (defun thai-xtis-word-segment (str &optional stringp)
203 "Segment STR by Thai words.
204 Return a list of word starting positions.
205 The last element of the list is the ending position of the last word.
206 If optional arg STRINGP is non-nil, return a string of words in Thai
207 separated by `|' (vertical bar)."
209 (let ((status (and thai-xtis-wordseg-proc
210 (process-status thai-xtis-wordseg-proc))))
211 (if (not (memq status '(run open)))
212 (let ((coding-system-for-read 'binary)
213 (coding-system-for-write 'binary))
214 (setq thai-xtis-wordseg-proc
215 (if (file-executable-p thai-xtis-wordseg-program)
216 (apply 'start-process "wordseg" nil
217 thai-xtis-wordseg-program
218 thai-xtis-wordseg-args)
219 (open-network-stream "wordseg" nil
220 thai-xtis-wordseg-server
221 thai-xtis-wordseg-service)))
222 (if (not (memq (process-status thai-xtis-wordseg-proc)
224 (error "Failed to run %s" thai-xtis-wordseg-program))
225 (process-kill-without-query thai-xtis-wordseg-proc)
226 (set-process-filter thai-xtis-wordseg-proc
227 'thai-xtis-wordseg-filter)
228 ;; For unknown reason, we must wait for a while before
229 ;; sending Thai text to "wordseg" program.
232 (setq thai-xtis-wordseg-buf "" thai-xtis-wordseg-ready nil)
233 (process-send-string thai-xtis-wordseg-proc
234 (concat (encode-coding-string str 'tis-620) "\n"))
235 (while (not thai-xtis-wordseg-ready)
236 (accept-process-output thai-xtis-wordseg-proc))
237 (setq thai-xtis-wordseg-buf
238 (decode-coding-string thai-xtis-wordseg-buf
239 thai-xtis-wordseg-coding-system))
241 (substring thai-xtis-wordseg-buf 0 -2)
245 (while (setq idx (string-match "|" thai-xtis-wordseg-buf idx))
246 (setq segments (cons (- idx count) segments)
249 (nreverse segments))))))
251 ;; Delete all overlays in between FROM and TO which have
252 ;; `thai-xtis-wordseg' property.
253 (defun thai-xtis-delete-wordseg-overlay (from to)
254 (let ((overlays (overlays-in from to)))
256 (if (overlay-get (car overlays) 'thai-xtis-wordseg)
257 (delete-overlay (car overlays)))
258 (setq overlays (cdr overlays)))))
260 ;; A function to call when a text within or adjacent to a Thai wordseg
261 ;; overlay is changed.
262 (defun thai-xtis-wordseg-overlay-modification-function (from to)
263 (let ((overlays (append (overlays-at from) (overlays-at to))))
265 (if (overlay-get (car overlays) 'thai-xtis-wordseg)
266 (delete-overlay (car overlays)))
267 (setq overlays (cdr overlays)))))
269 ;; Return Thai wordseg overlay at POS.
270 (defun thai-xtis-get-wordseg-overlay (pos)
271 (let ((overlays (overlays-at pos))
274 (if (overlay-get (car overlays) 'thai-xtis-wordseg)
275 (setq overlay (car overlays)
279 ;; Make a wordseg overlay on the region FROM and TO and return it.
280 ;; SEGMENTS contains word segmentation information. It is set in
281 ;; `thai-xtis-wordseg' property of the overlay.
282 (defun thai-xtis-put-wordseg-overlay (from to segments)
283 (let ((overlay (make-overlay from to)))
284 (overlay-put overlay 'thai-xtis-wordseg segments)
285 (overlay-put overlay 'evaporate t)
286 ;;(overlay-put overlay 'modification-hooks
287 ;;(list 'thai-xtis-wordseg-overlay-modification-function))
288 ;;(overlay-put overlay 'insert-in-front-hooks
289 ;;(list 'thai-xtis-wordseg-overlay-modification-function))
290 ;;(overlay-put overlay 'insert-behind-hooks
291 ;;(list 'thai-xtis-wordseg-overlay-modification-function))
294 ;; Make wordseg overlays on all Thai character sequences in the region
296 (defun thai-xtis-set-wordseg-info-region (from to)
297 (thai-xtis-delete-wordseg-overlay from to)
301 (let ((regexp (concat thai-xtis-char-regexp "+"))
305 (re-search-forward regexp nil t))
306 (setq from (match-beginning 0)
309 segments (thai-xtis-word-segment (match-string 0)))
310 (thai-xtis-put-wordseg-overlay from (if (< end (point-max)) (1+ end) end)
313 ;; Return a list of word segmented positions at or near POS.
314 (defun thai-xtis-wordsegs-at (pos)
315 (let ((overlay (thai-xtis-get-wordseg-overlay pos)))
318 (while (and (not (bobp))
319 (eq (char-charset (preceding-char)) 'thai-xtis))
321 (thai-xtis-set-wordseg-info-region (point) pos)
322 (setq overlay (thai-xtis-get-wordseg-overlay pos))))
324 (let ((head (overlay-start overlay))
325 (segments (overlay-get overlay 'thai-xtis-wordseg)))
326 (mapcar (function (lambda (x) (+ head x))) segments)))))
328 (defun thai-xtis-wordseg-info (pos)
329 (let ((segments (thai-xtis-wordsegs-at pos)))
331 (< pos (car (last segments))))
332 (let ((from (car segments)))
333 (while (<= (car segments) pos)
334 (setq from (car segments) segments (cdr segments)))
335 (cons from (car segments))))))
337 ;; Move point forward to the next word boundary or to LIMIT. If LIMIT
338 ;; is before point, move point backward to the previous word boundary.
339 (defun thai-xtis-search-next-wordseg (limit &optional inhibit-limit)
344 (if (and (re-search-forward "\\sw" limit 'move)
347 (looking-at thai-xtis-char-regexp)))
349 (if (and (re-search-backward "\\sw" limit 'move)
350 (looking-at thai-xtis-char-regexp))
353 (let ((segments (thai-xtis-wordsegs-at (point))))
358 (if (looking-at (format "\\c%c+" ?t))
359 (setq to (match-end 0))
362 (if (re-search-backward (format "\\C%c" ?t)
363 (if (< limit orig) limit) 'move)
364 (setq from (1+ (point)))
366 (thai-xtis-set-wordseg-info-region from to))
367 (setq segments (thai-xtis-wordsegs-at (point)))))
373 (setq pos (car segments))
375 (while (< (car l) (point))
376 (setq pos (car l) l (cdr l))))
377 (while (<= (car l) (point))
384 ;;; Thai text filling programs.
386 ;; Property `fill-find-break-point-function' of Thai charset.
387 (defun thai-xtis-find-break-point (limit)
388 (if (and thai-xtis-text-mode
389 (looking-at thai-xtis-char-regexp))
390 (thai-xtis-search-next-wordseg limit)))
392 (defvar thai-xtis-auto-fill-delay-column 8
393 "How many columns right of `fill-column' auto filling should be delayed.
394 In Auto Fill mode, when you type a Thai character beyond fill-column
395 plus this value, automatic line-wrapping happens.
397 This delay of automatic line-wrapping is to get more accurate word
398 segmentation info from `thai-xtis-wordseg-program'.")
400 (defun thai-xtis-do-auto-fill ()
401 "Substitution for the function `do-auto-fill' in Thai Text mode."
402 (if (and (not (memq (preceding-char) '(? ?\n ?\t)))
403 (< (current-column) (+ fill-column thai-xtis-auto-fill-delay-column)))
407 ;;; Word base operations.
409 (defun thai-xtis-forward-word (arg)
410 "Substitution for the command `forward-word' in Thai Text minor mode."
413 (while (and (not (eobp))
414 (not (or (looking-at "\\w")
415 (looking-at thai-xtis-char-regexp))))
419 (if (looking-at thai-xtis-char-regexp)
420 (thai-xtis-search-next-wordseg (point-max))
422 (thai-xtis-forward-word (1- arg))))
424 (while (and (not (bobp))
428 (or (looking-at "\\w")
429 (looking-at thai-xtis-char-regexp))))))
432 (if (looking-at thai-xtis-char-regexp)
435 (thai-xtis-search-next-wordseg (point-min)))
438 (thai-xtis-forward-word (1+ arg))))))
440 (defun thai-xtis-backward-word (arg)
441 "Substitution for the command `backward-word' in Thai Text minor mode."
443 (thai-xtis-forward-word (- arg)))
445 (defun thai-xtis-kill-word (arg)
446 "Substitution for the command `kill-word' in Thai Text minor mode."
449 (thai-xtis-forward-word arg)
450 (kill-region pos (point))))
452 (defun thai-xtis-backward-kill-word (arg)
453 "Substitution for the command `backward-kill-word' in Thai Text minor mode."
455 (thai-xtis-kill-word (- arg)))
457 (defun thai-xtis-transpose-words (arg)
458 "Substitution for the command `transpose-words' in Thai Text minor mode."
460 (transpose-subr 'thai-xtis-forward-word arg))
462 ;; Character base operations.
464 (defsubst thai-xtis-char-tone (char)
465 (logand (char-int char) 7)
468 (defsubst thai-xtis-clear-char-tone (char)
469 (int-char (logxor (logior (char-int char) 7) 7))
472 (defsubst thai-xtis-char-verbal (char)
473 (logand (char-int char) 120) ; #x78
476 (defsubst thai-xtis-clear-char-verbal (char)
478 (logxor (logior (char-int char) 120) 120)
482 (defun thai-xtis-backward-delete-char (arg)
483 "Delete backward one character each, used in Thai text only.
484 A vowel sign or a tone mark is considered as a character."
487 (let ((chr (char-before)))
488 (cond ((eq (char-charset chr) 'thai-xtis)
490 (let ((tone (thai-xtis-char-tone chr)))
492 (thai-xtis-clear-char-tone chr)
493 (let ((verbal (thai-xtis-char-verbal chr)))
494 (if (> verbal 48) ; #x30
495 (thai-xtis-clear-char-verbal chr)
497 (backward-delete-char 1)
503 (backward-delete-char 1)
509 (provide 'thai-xtis-util)
511 ;; thai-xtis-util.el ends here.