cgit.sxemacs.org Git - packages/blob - mule-packages/latin-unity/latin-unity-vars.el

   1 ;;; latin-unity-vars.el --- Common variables and objects of latin-unity
   2
   3 ;; Copyright (C) 2002 Free Software Foundation, Inc
   4
   5 ;; Author: Stephen J. Turnbull
   6 ;; Keywords: mule, charsets
   7 ;; Created: 2002 January 26
   8 ;; Last-modified: 2002 March 23
   9
  10 ;; This file is part of XEmacs.
  11
  12 ;; XEmacs is free software; you can redistribute it and/or modify
  13 ;; it under the terms of the GNU General Public License as published by
  14 ;; the Free Software Foundation; either version 2, or (at your option)
  15 ;; any later version.
  16
  17 ;; XEmacs is distributed in the hope that it will be useful,
  18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 ;; GNU General Public License for more details.
  21
  22 ;; You should have received a copy of the GNU General Public License
  23 ;; along with XEmacs; see the file COPYING.  If not, write to the
  24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  25 ;; Boston, MA 02111-1307, USA.
  26
  27
  28 ;;; Commentary:
  29
  30 ;; Mule bogusly considers the various ISO-8859 extended character sets
  31 ;; as disjoint, when ISO 8859 itself clearly considers them to be subsets
  32 ;; of a larger character set.  This library provides functions which
  33 ;; determine the list of coding systems which can encode all of the
  34 ;; characters in the buffer.
  35
  36 ;;; Code:
  37
  38 (provide 'latin-unity-vars)
  39
  40 ;; Load the latin{7,8,9,10} language environments, character sets, and
  41 ;; coding systems.
  42 (require 'latin-euro-standards)
  43
  44 ;;; User customization is in latin-unity.el
  45
  46 ;; latin-unity-equivalence-table
  47 ;; could speed things up a tiny bit by splitting out the bit-vector, but
  48 ;; this is constant-time (a char-table ref plus an aref)
  49 (defvar latin-unity-equivalences (make-char-table 'generic)
  50   "Char-table of Latin character equivalence vectors.
  51
  52 Each vector takes integral elements (or nil, meaning void).  The zero-th
  53 element is interpreted as the bit vector representation of the set of
  54 character sets that can represent the character.  A nil value will cause
  55 an error if accessed, so probably zero should be used instead.  The next
  56 (length latin-unity-character-sets) are the mapping of the char-table
  57 index to code points in the other character sets.  The last is the
  58 Unicode code point.
  59
  60 Note that because this is a char-table, many characters will refer to
  61 the same vector.  Thus whenever updating a character's value, you must
  62 use `copy-sequence', or there will be side-effects.
  63
  64 The table is actually loaded from latin-unity-tables.el.")
  65
  66
  67 ;;; Variables
  68
  69 (defcustom latin-unity-debug nil
  70   "If non-nil, make file write operations as slow as molasses.
  71 If there were bugs, this might help find them, but there aren't. ;^)"
  72   :type 'boolean
  73   :group 'latin-unity)
  74
  75 (defvar latin-unity-help-buffer " *Coding system conflict*")
  76
  77 (defconst latin-unity-coding-systems
  78   (let (lucs)
  79     (mapc (lambda (x)
  80             (when (find-coding-system x)
  81               (setq lucs (cons x lucs))))
  82           '(iso-8859-1 iso-8859-2 iso-8859-3 iso-8859-4 iso-8859-9
  83             iso-8859-10 iso-8859-13 iso-8859-14 iso-8859-15 iso-8859-16))
  84     (nreverse lucs))
  85   "List of coding systems \"unified\" by latin-unity.
  86
  87 Cf. `latin-unity-character-sets'.")
  88
  89 (defconst latin-unity-character-sets
  90   (let (lucs)
  91     (mapc (lambda (x)
  92             (when (find-charset x)
  93               (setq lucs (cons x lucs))))
  94           '(latin-iso8859-1 latin-iso8859-2 latin-iso8859-3 latin-iso8859-4
  95             latin-iso8859-9 latin-iso8859-10 latin-iso8859-13 latin-iso8859-14
  96             latin-iso8859-15 latin-iso8859-16
  97             ;; above are all GR sets, below are normally GL
  98             ascii latin-jisx0201))
  99     (nreverse lucs))
 100   "List of character sets \"unified\" by latin-unity.
 101
 102 \"Unified\" is a misnomer, since actually these character sets are all
 103 subsets of a larger set.  Characters which are identified by these
 104 library are actually the same characters according to ISO 8859.  The
 105 exception is the Japanese JIS X 0201 left half (JIS Roman), which is
 106 controversial.  It will by default be identified with ASCII, but also
 107 may take values elsewhere according to user preference.  (Unimplemented.)
 108
 109 The ISO 8859 character sets are actually Latin-1 to Latin-10, the right
 110 halves of the ISO 8859 Latin character sets.
 111
 112 ASCII and Unicode are treated implicitly.  All of the listed character
 113 sets are the GR of a coded character set that supports ASCII, except
 114 for JIS Roman.  Whether JIS Roman is considered to be identical to
 115 ASCII, or a slight revision, depends on user preference.  Unicode is a
 116 \"universal\" character set which is always a \"safe\" encoding for
 117 streams that receive buffer contents.")
 118
 119 (defvar latin-unity-ascii-and-jis-roman "\000-\177"
 120   "skip-chars set defining ASCII characters also in JIS Roman.
 121
 122 #### Defaults to treating JIS Roman as identical to ASCII, not consistent
 123 with the equivalence table.")
 124
 125 (defconst latin-unity-cset-codesys-alist
 126   (let (lucs)
 127     (mapc (lambda (x)
 128             (when (find-coding-system (cdr x))
 129               (setq lucs (cons x lucs))))
 130           '((latin-iso8859-1  . iso-8859-1)
 131             (latin-iso8859-2  . iso-8859-2)
 132             (latin-iso8859-3  . iso-8859-3)
 133             (latin-iso8859-4  . iso-8859-4)
 134             (latin-iso8859-9  . iso-8859-9)
 135             (latin-iso8859-10 . iso-8859-10)
 136             (latin-iso8859-13 . iso-8859-13)
 137             (latin-iso8859-14 . iso-8859-14)
 138             (latin-iso8859-15 . iso-8859-15)
 139             (latin-iso8859-16 . iso-8859-16)
 140             ;; the following mappings are bogus, the RightThang not clear
 141             (ascii            . iso-8859-1)     ; any will do
 142             (latin-jisx0201   . jisx0201)))     ; doesn't currently exist
 143     (nreverse lucs))
 144   "Map Latin charsets to corresponding coding systems or classes.")
 145
 146 ;; bit vectors for checking the feasible character sets
 147
 148 (defconst latin-unity-all-flags
 149   (lognot (lsh (lognot 0) (length latin-unity-character-sets)))
 150   "Bit vector representing the set of all Latin character sets.")
 151
 152 ;; put the character set indicies and flag bits in reasonable places
 153 (defconst latin-unity-non-latin-bit-flag
 154   (let ((index 1) (bit 1))
 155     (if (> (length latin-unity-character-sets) 25)
 156         (error "representation too small to support so many charsets!"))
 157     (mapcar (lambda (cs)
 158               (put cs 'latin-unity-flag-bit bit)
 159               (put cs 'latin-unity-index index)
 160               (setq bit (lsh bit 1)
 161                     index (1+ index)))
 162             latin-unity-character-sets)
 163     bit)
 164   "A bit-flag indicating charsets not handled by latin-unity.")
 165
 166 ;;; end of latin-unity-vars.