;;; thai-xtis-util.el --- utilities for Thai (for XTIS). ;; Copyright (C) 1999 Electrotechnical Laboratory, JAPAN. ;; Licensed to the Free Software Foundation. ;; Copyright (C) 1999 NECTEC, Thai. ;; Author: TAKAHASHI Naoto ;; Ken'ichi HANDA ;; Virach Sornlertlamvanich ;; MORIOKA Tomohiko ;; Keywords: mule, multilingual, Thai, XTIS ;; This file is part of XEmacs. ;; XEmacs is free software; you can redistribute it and/or modify it ;; under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 2, or (at your option) ;; any later version. ;; XEmacs is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with XEmacs; see the file COPYING. If not, write to the Free ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ;; 02111-1307, USA. ;;; Commentary: ;; For Thai, the pre-composed character set proposed by ;; Virach Sornlertlamvanich is supported. ;;; Code: (require 'overlay) ;;;###autoload ;; (defun setup-thai-xtis-environment () ;; "Setup multilingual environment for Thai-XTIS." ;; (interactive) ;; (set-language-environment "Thai-XTIS")) ;;;###autoload ;; (defun exit-thai-xtis-environment () ;; "Exit Thai-XTIS environment." ;; ;; (thai-xtis-text-mode nil) ;; ) ;;; Utilities for ThaiText minor mode ;; Generic character for Thai character set. (defvar thai-xtis-generic-char (if (featurep 'xemacs) 'thai-xtis (make-char 'thai-xtis))) ;; Regular expression matching any single Thai character. (defvar thai-xtis-char-regexp "\\cx") (defvar thai-xtis-text-mode nil "Non-nil if using Thai text minor mode.") (make-variable-buffer-local 'thai-xtis-text-mode) (defvar thai-xtis-text-mode-map (let ((map (make-sparse-keymap))) (define-key map "\M-f" 'thai-xtis-forward-word) (define-key map "\M-b" 'thai-xtis-backward-word) (define-key map "\M-d" 'thai-xtis-kill-word) (define-key map "\M-\177" 'thai-xtis-backward-kill-word) (define-key map "\M-t" 'thai-xtis-transpose-words) (cond ((featurep 'xemacs) (define-key map [(meta backspace)] 'thai-xtis-backward-kill-word) (define-key map [(meta delete)] 'thai-xtis-backward-kill-word) (define-key map [(meta right)] 'thai-xtis-forward-word) (define-key map [(meta left)] 'thai-xtis-backward-word) (define-key map [(control right)] 'thai-xtis-forward-word) (define-key map [(control left)] 'thai-xtis-backward-word) (define-key map [(control delete)] 'thai-xtis-backward-kill-word) ) (t (define-key map [M-right] 'thai-xtis-forward-word) (define-key map [M-left] 'thai-xtis-backward-word) (define-key map [C-right] 'thai-xtis-forward-word) (define-key map [C-left] 'thai-xtis-backward-word) (define-key map [C-delete] 'thai-xtis-backward-kill-word) )) ;; Character base operations. (define-key map "\177" 'thai-xtis-backward-delete-char) (define-key map [backspace] 'thai-xtis-backward-delete-char) map) "Keymap for Thai Text minor mode.") (defvar thai-xtis-prev-auto-fill-function nil) (make-variable-buffer-local 'thai-xtis-prev-auto-fill-function) (defvar thai-xtis-prev-normal-auto-fill-function nil) (make-variable-buffer-local 'thai-xtis-prev-normal-auto-fill-function) ;;;###autoload (defun thai-xtis-text-mode (&optional arg) "Minor mode for Thai text that pays attention to word segmentation. In this mode, word-oriented commands (e.g forward-word) and text filling commands (e.g. fill-paragraph) recognize Thai word boundaries within a sequence of Thai characters." (interactive (list (not thai-xtis-text-mode))) (setq thai-xtis-text-mode arg) (if thai-xtis-text-mode (progn ;; Setup ThaiText mode. (make-local-variable 'auto-fill-chars) ;; (setq auto-fill-chars (copy-sequence auto-fill-chars)) ;; (aset auto-fill-chars thai-xtis-generic-char t) (setq thai-xtis-prev-auto-fill-function 'auto-fill-function) (make-local-variable 'auto-fill-function) (setq auto-fill-function 'thai-xtis-do-auto-fill) (setq thai-xtis-prev-normal-auto-fill-function 'normal-auto-fill-function) (setq normal-auto-fill-function 'thai-xtis-do-auto-fill) (make-local-variable 'sentence-end-without-period) (setq sentence-end-without-period t) (set-category-table (copy-category-table)) ;; (modify-category-entry thai-xtis-generic-char ?|) (put-charset-property 'thai-xtis 'fill-find-break-point-function 'thai-xtis-find-break-point) (put-charset-property 'thai-xtis 'nospace-between-words t) (make-local-variable 'before-change-functions) (setq before-change-functions (cons 'thai-xtis-wordseg-overlay-modification-function before-change-functions)) ) (kill-local-variable 'auto-fill-chars) (kill-local-variable 'sentence-end-without-period) (kill-local-variable 'before-change-functions) (setq auto-fill-function thai-xtis-prev-auto-fill-function) (set-category-table (standard-category-table)) (put-charset-property 'thai-xtis 'fill-find-break-point-function nil) (put-charset-property 'thai-xtis 'nospace-between-words nil) ) (force-mode-line-update)) (cond ((featurep 'xemacs) (add-minor-mode 'thai-xtis-text-mode " ThaiText" thai-xtis-text-mode-map nil 'thai-xtis-text-mode) ) (t (require 'alist) (set-alist 'minor-mode-alist 'thai-xtis-text-mode '(" ThaiText")) (set-alist 'minor-mode-map-alist 'thai-xtis-text-mode thai-xtis-text-mode-map) )) ;;; Thai wordseg program interface. (defvar thai-xtis-wordseg-program "/usr/local/bin/wordseg" "*Program name of Thai word segmentor. This program reads a Thai word from stdin, and writes segmented words (separated by a space) to stdout.") (defvar thai-xtis-wordseg-data "/usr/local/lib/wordseg" "*Directory of data used by `thai-xtis-wordseg-program'.") (defvar thai-xtis-wordseg-args (list "mule" "-d" thai-xtis-wordseg-data) "List of arguments for the program `thai-xtis-wordseg-program'.") (defconst thai-xtis-wordseg-service 6750 "Service name of port number for Thai word segmentor network service. If a program specified in `thai-xtis-wordseg-program' is not available on your machine, this service will be used.") (defvar thai-xtis-wordseg-server "localhost" "*Host name for Thai word segmentor network service.") (defvar thai-xtis-wordseg-coding-system 'tis-620 "Coding system used to communicate with `thai-xtis-wordseg-program'.") ;; Wordseg process. (defvar thai-xtis-wordseg-proc nil) ;; String to accumulate data sent from wordseg. (defvar thai-xtis-wordseg-buf nil) ;; Flag to tell that data sent from wordseg is ready in ;; thai-xtis-wordseg-buf. (defvar thai-xtis-wordseg-ready nil) ;; Function to call when data from wordseg arrives at Emacs. (defun thai-xtis-wordseg-filter (proc str) (setq thai-xtis-wordseg-buf (concat thai-xtis-wordseg-buf str)) (if (string-match "\n" thai-xtis-wordseg-buf) (setq thai-xtis-wordseg-ready t))) (defun thai-xtis-word-segment (str &optional stringp) "Segment STR by Thai words. Return a list of word starting positions. The last element of the list is the ending position of the last word. If optional arg STRINGP is non-nil, return a string of words in Thai separated by `|' (vertical bar)." (save-match-data (let ((status (and thai-xtis-wordseg-proc (process-status thai-xtis-wordseg-proc)))) (if (not (memq status '(run open))) (let ((coding-system-for-read 'binary) (coding-system-for-write 'binary)) (setq thai-xtis-wordseg-proc (if (file-executable-p thai-xtis-wordseg-program) (apply 'start-process "wordseg" nil thai-xtis-wordseg-program thai-xtis-wordseg-args) (open-network-stream "wordseg" nil thai-xtis-wordseg-server thai-xtis-wordseg-service))) (if (not (memq (process-status thai-xtis-wordseg-proc) '(run open))) (error "Failed to run %s" thai-xtis-wordseg-program)) (process-kill-without-query thai-xtis-wordseg-proc) (set-process-filter thai-xtis-wordseg-proc 'thai-xtis-wordseg-filter) ;; For unknown reason, we must wait for a while before ;; sending Thai text to "wordseg" program. (sit-for 0 300) )) (setq thai-xtis-wordseg-buf "" thai-xtis-wordseg-ready nil) (process-send-string thai-xtis-wordseg-proc (concat (encode-coding-string str 'tis-620) "\n")) (while (not thai-xtis-wordseg-ready) (accept-process-output thai-xtis-wordseg-proc)) (setq thai-xtis-wordseg-buf (decode-coding-string thai-xtis-wordseg-buf thai-xtis-wordseg-coding-system)) (if stringp (substring thai-xtis-wordseg-buf 0 -2) (let ((idx 0) (count 0) (segments (list 0))) (while (setq idx (string-match "|" thai-xtis-wordseg-buf idx)) (setq segments (cons (- idx count) segments) count (1+ count) idx (1+ idx))) (nreverse segments)))))) ;; Delete all overlays in between FROM and TO which have ;; `thai-xtis-wordseg' property. (defun thai-xtis-delete-wordseg-overlay (from to) (let ((overlays (overlays-in from to))) (while overlays (if (overlay-get (car overlays) 'thai-xtis-wordseg) (delete-overlay (car overlays))) (setq overlays (cdr overlays))))) ;; A function to call when a text within or adjacent to a Thai wordseg ;; overlay is changed. (defun thai-xtis-wordseg-overlay-modification-function (from to) (let ((overlays (append (overlays-at from) (overlays-at to)))) (while overlays (if (overlay-get (car overlays) 'thai-xtis-wordseg) (delete-overlay (car overlays))) (setq overlays (cdr overlays))))) ;; Return Thai wordseg overlay at POS. (defun thai-xtis-get-wordseg-overlay (pos) (let ((overlays (overlays-at pos)) overlay) (while overlays (if (overlay-get (car overlays) 'thai-xtis-wordseg) (setq overlay (car overlays) overlays nil))) overlay)) ;; Make a wordseg overlay on the region FROM and TO and return it. ;; SEGMENTS contains word segmentation information. It is set in ;; `thai-xtis-wordseg' property of the overlay. (defun thai-xtis-put-wordseg-overlay (from to segments) (let ((overlay (make-overlay from to))) (overlay-put overlay 'thai-xtis-wordseg segments) (overlay-put overlay 'evaporate t) ;;(overlay-put overlay 'modification-hooks ;;(list 'thai-xtis-wordseg-overlay-modification-function)) ;;(overlay-put overlay 'insert-in-front-hooks ;;(list 'thai-xtis-wordseg-overlay-modification-function)) ;;(overlay-put overlay 'insert-behind-hooks ;;(list 'thai-xtis-wordseg-overlay-modification-function)) overlay)) ;; Make wordseg overlays on all Thai character sequences in the region ;; FROM and TO. (defun thai-xtis-set-wordseg-info-region (from to) (thai-xtis-delete-wordseg-overlay from to) (save-excursion (save-match-data (goto-char from) (let ((regexp (concat thai-xtis-char-regexp "+")) (continue t) end segments) (while (and continue (re-search-forward regexp nil t)) (setq from (match-beginning 0) end (point) continue (< end to) segments (thai-xtis-word-segment (match-string 0))) (thai-xtis-put-wordseg-overlay from (if (< end (point-max)) (1+ end) end) segments)))))) ;; Return a list of word segmented positions at or near POS. (defun thai-xtis-wordsegs-at (pos) (let ((overlay (thai-xtis-get-wordseg-overlay pos))) (or overlay (save-excursion (while (and (not (bobp)) (eq (char-charset (preceding-char)) 'thai-xtis)) (forward-char -1)) (thai-xtis-set-wordseg-info-region (point) pos) (setq overlay (thai-xtis-get-wordseg-overlay pos)))) (if overlay (let ((head (overlay-start overlay)) (segments (overlay-get overlay 'thai-xtis-wordseg))) (mapcar (function (lambda (x) (+ head x))) segments))))) (defun thai-xtis-wordseg-info (pos) (let ((segments (thai-xtis-wordsegs-at pos))) (if (and segments (< pos (car (last segments)))) (let ((from (car segments))) (while (<= (car segments) pos) (setq from (car segments) segments (cdr segments))) (cons from (car segments)))))) ;; Move point forward to the next word boundary or to LIMIT. If LIMIT ;; is before point, move point backward to the previous word boundary. (defun thai-xtis-search-next-wordseg (limit &optional inhibit-limit) (save-match-data (let ((orig (point)) result) (if (> limit orig) (if (and (re-search-forward "\\sw" limit 'move) (progn (forward-char -1) (looking-at thai-xtis-char-regexp))) (setq result t)) (if (and (re-search-backward "\\sw" limit 'move) (looking-at thai-xtis-char-regexp)) (setq result t))) (if result (let ((segments (thai-xtis-wordsegs-at (point)))) (or segments (let (from to) (save-excursion (forward-char 1) (if (looking-at (format "\\c%c+" ?t)) (setq to (match-end 0)) (setq to (point))) (forward-char -1) (if (re-search-backward (format "\\C%c" ?t) (if (< limit orig) limit) 'move) (setq from (1+ (point))) (setq from (point))) (thai-xtis-set-wordseg-info-region from to)) (setq segments (thai-xtis-wordsegs-at (point))))) (let (;; (point) (l segments) pos) (if (< limit orig) (progn (setq pos (car segments)) (forward-char 1) (while (< (car l) (point)) (setq pos (car l) l (cdr l)))) (while (<= (car l) (point)) (setq l (cdr l))) (setq pos (car l))) (goto-char pos))) (goto-char orig) nil)))) ;;; Thai text filling programs. ;; Property `fill-find-break-point-function' of Thai charset. (defun thai-xtis-find-break-point (limit) (if (and thai-xtis-text-mode (looking-at thai-xtis-char-regexp)) (thai-xtis-search-next-wordseg limit))) (defvar thai-xtis-auto-fill-delay-column 8 "How many columns right of `fill-column' auto filling should be delayed. In Auto Fill mode, when you type a Thai character beyond fill-column plus this value, automatic line-wrapping happens. This delay of automatic line-wrapping is to get more accurate word segmentation info from `thai-xtis-wordseg-program'.") (defun thai-xtis-do-auto-fill () "Substitution for the function `do-auto-fill' in Thai Text mode." (if (and (not (memq (preceding-char) '(? ?\n ?\t))) (< (current-column) (+ fill-column thai-xtis-auto-fill-delay-column))) nil (do-auto-fill))) ;;; Word base operations. (defun thai-xtis-forward-word (arg) "Substitution for the command `forward-word' in Thai Text minor mode." (interactive "p") (cond ((> arg 0) (while (and (not (eobp)) (not (or (looking-at "\\w") (looking-at thai-xtis-char-regexp)))) (forward-char 1)) (if (eobp) nil (if (looking-at thai-xtis-char-regexp) (thai-xtis-search-next-wordseg (point-max)) (forward-word 1)) (thai-xtis-forward-word (1- arg)))) ((< arg 0) (while (and (not (bobp)) (progn (forward-char -1) (not (or (looking-at "\\w") (looking-at thai-xtis-char-regexp)))))) (if (bolp) nil (if (looking-at thai-xtis-char-regexp) (progn (forward-char 1) (thai-xtis-search-next-wordseg (point-min))) (forward-char 1) (forward-word -1)) (thai-xtis-forward-word (1+ arg)))))) (defun thai-xtis-backward-word (arg) "Substitution for the command `backward-word' in Thai Text minor mode." (interactive "p") (thai-xtis-forward-word (- arg))) (defun thai-xtis-kill-word (arg) "Substitution for the command `kill-word' in Thai Text minor mode." (interactive "*p") (let ((pos (point))) (thai-xtis-forward-word arg) (kill-region pos (point)))) (defun thai-xtis-backward-kill-word (arg) "Substitution for the command `backward-kill-word' in Thai Text minor mode." (interactive "*p") (thai-xtis-kill-word (- arg))) (defun thai-xtis-transpose-words (arg) "Substitution for the command `transpose-words' in Thai Text minor mode." (interactive "*p") (transpose-subr 'thai-xtis-forward-word arg)) ;; Character base operations. (defsubst thai-xtis-char-tone (char) (logand (char-int char) 7) ) (defsubst thai-xtis-clear-char-tone (char) (int-char (logxor (logior (char-int char) 7) 7)) ) (defsubst thai-xtis-char-verbal (char) (logand (char-int char) 120) ; #x78 ) (defsubst thai-xtis-clear-char-verbal (char) (int-char (logior (logxor (logior (char-int char) 120) 120) 48)) ; #x30 ) (defun thai-xtis-backward-delete-char (arg) "Delete backward one character each, used in Thai text only. A vowel sign or a tone mark is considered as a character." (interactive "p") (while (> arg 0) (let ((chr (char-before))) (cond ((eq (char-charset chr) 'thai-xtis) (setq chr (let ((tone (thai-xtis-char-tone chr))) (if (> tone 0) (thai-xtis-clear-char-tone chr) (let ((verbal (thai-xtis-char-verbal chr))) (if (> verbal 48) ; #x30 (thai-xtis-clear-char-verbal chr) ))))) (backward-delete-char 1) (if chr (insert chr) ) ) (t (backward-delete-char 1) ))) (setq arg (1- arg)) )) ;;; (provide 'thai-xtis-util) ;; thai-xtis-util.el ends here.