Lisp_Object Qno_iso6429;
Lisp_Object Qinput_charset_conversion, Qoutput_charset_conversion;
Lisp_Object Qescape_quoted;
-Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift;
+Lisp_Object Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift;
#endif
Lisp_Object Qencode, Qdecode;
switch (type) {
default:
abort();
+ return Qnil;
case EOL_LF:
return Qlf;
case EOL_CRLF:
/* Checks that aliasee names a coding-system */
real_coding_system = Fget_coding_system(aliasee);
+ SXE_SET_UNUSED(real_coding_system);
/* Check for coding system alias loops */
if (EQ(alias, aliasee))
switch (XCODING_SYSTEM_TYPE(tmp)) {
default:
abort();
+ return Qnil;
case CODESYS_AUTODETECT:
return Qundecided;
#ifdef MULE
Returns:
0 == st->eol_type is EOL_AUTODETECT and/or more than coding category
- is present in st->mask
+ is present in st->mask
1 == definitive answers are here for both st->eol_type and st->mask
*/
the first one that is allowed. */
for (i = 0; i < CODING_CATEGORY_LAST; i++) {
cat = fcd->coding_category_by_priority[i];
+ if (cat<0)
+ continue;
if ((mask & (1 << cat)) &&
!NILP(fcd->coding_category_system[cat]))
break;
/* number of leading lines to check for a coding cookie */
#define LINES_TO_CHECK 2
+
void
-determine_real_coding_system(lstream_t stream, Lisp_Object * codesys_in_out,
- eol_type_t * eol_type_in_out)
+autodetect_real_coding_system(lstream_t stream, Lisp_Object * codesys_in_out,
+ eol_type_t * eol_type_in_out);
+
+void
+autodetect_real_coding_system(lstream_t stream, Lisp_Object * codesys_in_out,
+ eol_type_t * eol_type_in_out)
{
+ static const char mime_name_valid_chars[] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789"
+ "!$%&*+-.^_{|}~";
+
struct detection_state decst;
- if (*eol_type_in_out == EOL_AUTODETECT)
- *eol_type_in_out = XCODING_SYSTEM_EOL_TYPE(*codesys_in_out);
+ Extbyte buf[4096];
+ Lisp_Object coding_system = Qnil;
+ Extbyte *p = NULL,
+ *scan_end = NULL;
+ int lines_checked = 0;
+ Lstream_data_count nread =
+ Lstream_read(stream, buf, sizeof(buf));
+
+ if (nread < 0)
+ /* Nothing more can be done here */
+ return;
xzero(decst);
decst.eol_type = *eol_type_in_out;
decst.mask = ~0;
- /* If autodetection is called for, do it now. */
- if (XCODING_SYSTEM_TYPE(*codesys_in_out) == CODESYS_AUTODETECT
- || *eol_type_in_out == EOL_AUTODETECT) {
- Extbyte buf[4096];
- Lisp_Object coding_system = Qnil;
- Extbyte *p;
- Lstream_data_count nread =
- Lstream_read(stream, buf, sizeof(buf));
- Extbyte *scan_end;
- int lines_checked = 0;
-
- /* Look for initial "-*-"; mode line prefix */
- for (p = buf,
- scan_end = buf + nread - LENGTH("-*-coding:?-*-");
- p <= scan_end && lines_checked < LINES_TO_CHECK; p++)
- if (*p == '-' && *(p + 1) == '*' && *(p + 2) == '-') {
- Extbyte *local_vars_beg = p + 3;
- /* Look for final "-*-"; mode line suffix */
- for (p = local_vars_beg,
- scan_end = buf + nread - LENGTH("-*-");
- p <= scan_end
- && lines_checked < LINES_TO_CHECK; p++)
- if (*p == '-' && *(p + 1) == '*'
- && *(p + 2) == '-') {
- Extbyte *suffix = p;
- /* Look for "coding:" */
- for (p = local_vars_beg,
- scan_end =
- suffix -
- LENGTH("coding:?");
- p <= scan_end; p++)
- if (memcmp
- ("coding:", p,
- LENGTH("coding:"))
- == 0
- && (p ==
- local_vars_beg
- || (*(p - 1) ==
- ' '
- || *(p -
- 1) ==
- '\t'
- || *(p -
- 1) ==
- ';'))) {
- Extbyte save;
- int n;
- p += LENGTH
- ("coding:");
- while (*p == ' '
- || *p ==
- '\t')
- p++;
-
- /* Get coding system name */
- save = *suffix;
- *suffix = '\0';
- /* Characters valid in a MIME charset name (rfc 1521),
- and in a Lisp symbol name. */
- n = strspn((char
- *)p,
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "abcdefghijklmnopqrstuvwxyz"
- "0123456789"
- "!$%&*+-.^_{|}~");
- *suffix = save;
- if (n > 0) {
- save =
- p
- [n];
- p[n] =
- '\0';
- coding_system
- =
- Ffind_coding_system
- (intern
- ((char *)p));
- p[n] =
- save;
- }
- break;
- }
- break;
- }
- /* #### file must use standard EOLs or we miss 2d line */
- /* #### not to mention this is broken for UTF-16 DOS files */
- else if (*p == '\n' || *p == '\r') {
- lines_checked++;
- /* skip past multibyte (DOS) newline */
- if (*p == '\r'
- && *(p + 1) == '\n')
- p++;
- }
- break;
- }
- /* #### file must use standard EOLs or we miss 2d line */
- /* #### not to mention this is broken for UTF-16 DOS files */
- else if (*p == '\n' || *p == '\r') {
+
+ /* Look for initial "-*-"; mode line prefix */
+ for (p = buf, scan_end = buf + nread - LENGTH("-*-coding:?-*-");
+ p <= scan_end && lines_checked < LINES_TO_CHECK; p++) {
+ Extbyte *local_vars_beg = p + 3;
+
+ if (*p == '\n' || *p == '\r') {
+ /* file must use standard EOLs or we miss 2d
+ line not to mention this is broken for
+ UTF-16 DOS files */
+ lines_checked++;
+ /* skip past multibyte (DOS) newline */
+ if (*p == '\r' && *(p + 1) == '\n')
+ p++;
+ continue;
+ }
+ if (*p != '-' || *(p + 1) != '*' || *(p + 2) != '-') {
+ continue;
+ }
+
+ /* Look for final "-*-"; mode line suffix */
+ for (p = local_vars_beg, scan_end = buf + nread - LENGTH("-*-");
+ p <= scan_end && lines_checked < LINES_TO_CHECK; p++) {
+ Extbyte *suffix = p;
+ if (*p == '\n' || *p == '\r') {
+ /* file must use standard EOLs or we
+ miss 2d line not to mention this is
+ broken for UTF-16 DOS files */
lines_checked++;
/* skip past multibyte (DOS) newline */
- if (*p == '\r' && *(p + 1) == '\n')
+ if (*p == '\r'
+ && *(p + 1) == '\n')
p++;
+ continue;
+ }
+ if (*p != '-' || *(p + 1) != '*' || *(p + 2) != '-') {
+ continue;
}
- if (NILP(coding_system))
- do {
- if (detect_coding_type(&decst, buf, nread,
- XCODING_SYSTEM_TYPE
- (*codesys_in_out)
- != CODESYS_AUTODETECT))
- break;
- nread = Lstream_read(stream, buf, sizeof(buf));
- if (nread == 0)
- break;
+ /* Look for "coding:" */
+ for (p = local_vars_beg, scan_end = suffix - LENGTH("coding:?");
+ p <= scan_end; p++) {
+ Extbyte save;
+ int n;
+
+ if (memcmp("coding:", p, LENGTH("coding:")) != 0) {
+ continue;
+ }
+ if (p != local_vars_beg && strchr(" \t;", *p) == NULL ) {
+ continue;
+ }
+ p += LENGTH("coding:");
+ while (*p == ' ' || *p == '\t') {
+ p++;
+ }
+
+ /* Get coding system name */
+ save = *suffix;
+ *suffix = '\0';
+ /* Characters valid in a MIME charset
+ name (rfc 1521), and in a Lisp
+ symbol name. */
+ n = strspn((char *)p, mime_name_valid_chars);
+ *suffix = save;
+ if (n > 0) {
+ save = p[n];
+ p[n] = '\0';
+ coding_system = Ffind_coding_system(
+ intern((char *)p));
+ p[n] = save;
+ }
+ break;
}
- while (1);
+ /* Try " coding:" */
+ if (NILP(coding_system)) {
+ for (p = local_vars_beg, scan_end = suffix - LENGTH(" coding:?");
+ p <= scan_end; p++) {
+ Extbyte save;
+ int n;
+
+ if (memcmp(" coding:", p, LENGTH(" coding:")) != 0) {
+ continue;
+ }
+ if (p != local_vars_beg && strchr(" \t;", *p) == NULL ) {
+ continue;
+ }
+ p += LENGTH(" coding:");
+ while (*p == ' ' || *p == '\t') {
+ p++;
+ }
- else if (XCODING_SYSTEM_TYPE(*codesys_in_out) ==
- CODESYS_AUTODETECT
- && XCODING_SYSTEM_EOL_TYPE(coding_system) ==
- EOL_AUTODETECT)
- do {
- if (detect_coding_type(&decst, buf, nread, 1))
- break;
- nread = Lstream_read(stream, buf, sizeof(buf));
- if (!nread)
+ /* Get coding system name */
+ save = *suffix;
+ *suffix = '\0';
+ /* Characters valid in a MIME charset
+ name (rfc 1521), and in a Lisp
+ symbol name. */
+ n = strspn((char *)p, mime_name_valid_chars);
+ *suffix = save;
+ if (n > 0) {
+ save = p[n];
+ p[n] = '\0';
+ coding_system = Ffind_coding_system(
+ intern((char *)p));
+ p[n] = save;
+ }
break;
+ }
}
- while (1);
-
- *eol_type_in_out = decst.eol_type;
- if (XCODING_SYSTEM_TYPE(*codesys_in_out) == CODESYS_AUTODETECT) {
- if (NILP(coding_system))
- *codesys_in_out =
- coding_system_from_mask(decst.mask);
- else
- *codesys_in_out = coding_system;
+ break;
+ }
+ break;
+ }
+ if (NILP(coding_system)) {
+ do {
+ if (detect_coding_type(&decst, buf, nread,
+ XCODING_SYSTEM_TYPE(*codesys_in_out)
+ != CODESYS_AUTODETECT))
+ break;
+ nread = Lstream_read(stream, buf, sizeof(buf));
+ if (nread == 0)
+ break;
+ }
+ while (1);
+ } else if (XCODING_SYSTEM_TYPE(*codesys_in_out) == CODESYS_AUTODETECT
+ && XCODING_SYSTEM_EOL_TYPE(coding_system) == EOL_AUTODETECT) {
+ do {
+ if (detect_coding_type(&decst, buf, nread, 1))
+ break;
+ nread = Lstream_read(stream, buf, sizeof(buf));
+ if (!nread)
+ break;
}
+ while (1);
+ }
+ *eol_type_in_out = decst.eol_type;
+ if (XCODING_SYSTEM_TYPE(*codesys_in_out) == CODESYS_AUTODETECT) {
+ if (NILP(coding_system))
+ *codesys_in_out =
+ coding_system_from_mask(decst.mask);
+ else
+ *codesys_in_out = coding_system;
+ }
+}
+
+void
+determine_real_coding_system(lstream_t stream, Lisp_Object * codesys_in_out,
+ eol_type_t * eol_type_in_out)
+{
+
+ if (*eol_type_in_out == EOL_AUTODETECT)
+ *eol_type_in_out = XCODING_SYSTEM_EOL_TYPE(*codesys_in_out);
+
+
+ /* If autodetection is called for, do it now. */
+ if (XCODING_SYSTEM_TYPE(*codesys_in_out) == CODESYS_AUTODETECT
+ || *eol_type_in_out == EOL_AUTODETECT) {
+ autodetect_real_coding_system(stream, codesys_in_out,
+ eol_type_in_out);
}
/* If we absolutely can't determine the EOL type, just assume LF. */
<GETA MARK> (U+3013) of JIS X 0208, which means correct character
is not found, instead.
#### do something more appropriate (use blob?)
- Danger, Will Robinson! Data loss. Should we signal user? */
+ Danger, Will Robinson! Data loss. Should we signal user? */
static void decode_ucs4(unsigned long ch, unsigned_char_dynarr * dst)
{
Lisp_Object chr = ucs_to_char(ch);
defsymbol(&Qinput_charset_conversion, "input-charset-conversion");
defsymbol(&Qoutput_charset_conversion, "output-charset-conversion");
- defsymbol(&Qshort, "short");
defsymbol(&Qno_ascii_eol, "no-ascii-eol");
defsymbol(&Qno_ascii_cntl, "no-ascii-cntl");
defsymbol(&Qseven, "seven");