X-Git-Url: http://cgit.sxemacs.org/?p=sxemacs;a=blobdiff_plain;f=src%2Fmule%2Ffile-coding.c;h=496afdb35b7ec0a28fcd98845e8f10baacc70957;hp=eeb28a18cb4f6787382e93b59f081a7699ab9c10;hb=31e8a7f7af30a232a29957cab97bd1f892df3d21;hpb=580496d4cb0a95b14964b76c4fde232b8795d54b diff --git a/src/mule/file-coding.c b/src/mule/file-coding.c index eeb28a1..496afdb 100644 --- a/src/mule/file-coding.c +++ b/src/mule/file-coding.c @@ -99,7 +99,7 @@ Lisp_Object Qforce_g2_on_output, Qforce_g3_on_output; Lisp_Object Qno_iso6429; Lisp_Object Qinput_charset_conversion, Qoutput_charset_conversion; Lisp_Object Qescape_quoted; -Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift; +Lisp_Object Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift; #endif Lisp_Object Qencode, Qdecode; @@ -463,6 +463,7 @@ static Lisp_Object eol_type_to_symbol(eol_type_t type) switch (type) { default: abort(); + break; case EOL_LF: return Qlf; case EOL_CRLF: @@ -1330,6 +1331,7 @@ Return the type of CODING-SYSTEM. switch (XCODING_SYSTEM_TYPE(tmp)) { default: abort(); + break; case CODESYS_AUTODETECT: return Qundecided; #ifdef MULE @@ -1733,7 +1735,7 @@ detect_eol_type(struct detection_state *st, const Extbyte * src, Returns: 0 == st->eol_type is EOL_AUTODETECT and/or more than coding category - is present in st->mask + is present in st->mask 1 == definitive answers are here for both st->eol_type and st->mask */ @@ -1846,157 +1848,158 @@ static Lisp_Object coding_system_from_mask(int mask) /* number of leading lines to check for a coding cookie */ #define LINES_TO_CHECK 2 + void -determine_real_coding_system(lstream_t stream, Lisp_Object * codesys_in_out, +autodetect_real_coding_system(lstream_t stream, Lisp_Object * codesys_in_out, eol_type_t * eol_type_in_out) { + static const char mime_name_valid_chars[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789" + "!$%&*+-.^_{|}~"; + struct detection_state decst; - if (*eol_type_in_out == EOL_AUTODETECT) - *eol_type_in_out = XCODING_SYSTEM_EOL_TYPE(*codesys_in_out); + Extbyte buf[4096]; + Lisp_Object coding_system = Qnil; + Extbyte *p = NULL, + *scan_end = NULL; + int lines_checked = 0; + Lstream_data_count nread = + Lstream_read(stream, buf, sizeof(buf)); + + if (nread < 0) + /* Nothing more can be done here */ + return; xzero(decst); decst.eol_type = *eol_type_in_out; decst.mask = ~0; - /* If autodetection is called for, do it now. */ - if (XCODING_SYSTEM_TYPE(*codesys_in_out) == CODESYS_AUTODETECT - || *eol_type_in_out == EOL_AUTODETECT) { - Extbyte buf[4096]; - Lisp_Object coding_system = Qnil; - Extbyte *p; - Lstream_data_count nread = - Lstream_read(stream, buf, sizeof(buf)); - Extbyte *scan_end; - int lines_checked = 0; - - /* Look for initial "-*-"; mode line prefix */ - for (p = buf, - scan_end = buf + nread - LENGTH("-*-coding:?-*-"); - p <= scan_end && lines_checked < LINES_TO_CHECK; p++) - if (*p == '-' && *(p + 1) == '*' && *(p + 2) == '-') { - Extbyte *local_vars_beg = p + 3; - /* Look for final "-*-"; mode line suffix */ - for (p = local_vars_beg, - scan_end = buf + nread - LENGTH("-*-"); - p <= scan_end - && lines_checked < LINES_TO_CHECK; p++) - if (*p == '-' && *(p + 1) == '*' - && *(p + 2) == '-') { - Extbyte *suffix = p; - /* Look for "coding:" */ - for (p = local_vars_beg, - scan_end = - suffix - - LENGTH("coding:?"); - p <= scan_end; p++) - if (memcmp - ("coding:", p, - LENGTH("coding:")) - == 0 - && (p == - local_vars_beg - || (*(p - 1) == - ' ' - || *(p - - 1) == - '\t' - || *(p - - 1) == - ';'))) { - Extbyte save; - int n; - p += LENGTH - ("coding:"); - while (*p == ' ' - || *p == - '\t') - p++; - - /* Get coding system name */ - save = *suffix; - *suffix = '\0'; - /* Characters valid in a MIME charset name (rfc 1521), - and in a Lisp symbol name. */ - n = strspn((char - *)p, - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789" - "!$%&*+-.^_{|}~"); - *suffix = save; - if (n > 0) { - save = - p - [n]; - p[n] = - '\0'; - coding_system - = - Ffind_coding_system - (intern - ((char *)p)); - p[n] = - save; - } - break; - } - break; - } - /* #### file must use standard EOLs or we miss 2d line */ - /* #### not to mention this is broken for UTF-16 DOS files */ - else if (*p == '\n' || *p == '\r') { - lines_checked++; - /* skip past multibyte (DOS) newline */ - if (*p == '\r' - && *(p + 1) == '\n') - p++; - } - break; - } - /* #### file must use standard EOLs or we miss 2d line */ - /* #### not to mention this is broken for UTF-16 DOS files */ - else if (*p == '\n' || *p == '\r') { + + /* Look for initial "-*-"; mode line prefix */ + for (p = buf, scan_end = buf + nread - LENGTH("-*-coding:?-*-"); + p <= scan_end && lines_checked < LINES_TO_CHECK; p++) { + Extbyte *local_vars_beg = p + 3; + + if (*p == '\n' || *p == '\r') { + /* file must use standard EOLs or we miss 2d + line not to mention this is broken for + UTF-16 DOS files */ + lines_checked++; + /* skip past multibyte (DOS) newline */ + if (*p == '\r' && *(p + 1) == '\n') + p++; + continue; + } + if (*p != '-' || *(p + 1) != '*' || *(p + 2) != '-') { + continue; + } + + /* Look for final "-*-"; mode line suffix */ + for (p = local_vars_beg, scan_end = buf + nread - LENGTH("-*-"); + p <= scan_end && lines_checked < LINES_TO_CHECK; p++) { + Extbyte *suffix = p; + if (*p == '\n' || *p == '\r') { + /* file must use standard EOLs or we + miss 2d line not to mention this is + broken for UTF-16 DOS files */ lines_checked++; /* skip past multibyte (DOS) newline */ - if (*p == '\r' && *(p + 1) == '\n') + if (*p == '\r' + && *(p + 1) == '\n') p++; + continue; } - - if (NILP(coding_system)) - do { - if (detect_coding_type(&decst, buf, nread, - XCODING_SYSTEM_TYPE - (*codesys_in_out) - != CODESYS_AUTODETECT)) - break; - nread = Lstream_read(stream, buf, sizeof(buf)); - if (nread == 0) - break; + if (*p != '-' || *(p + 1) != '*' || *(p + 2) != '-') { + continue; } - while (1); - else if (XCODING_SYSTEM_TYPE(*codesys_in_out) == - CODESYS_AUTODETECT - && XCODING_SYSTEM_EOL_TYPE(coding_system) == - EOL_AUTODETECT) - do { - if (detect_coding_type(&decst, buf, nread, 1)) - break; - nread = Lstream_read(stream, buf, sizeof(buf)); - if (!nread) - break; - } - while (1); + /* Look for "coding:" */ + for (p = local_vars_beg, scan_end = suffix - LENGTH("coding:?"); + p <= scan_end; p++) { + Extbyte save; + int n; - *eol_type_in_out = decst.eol_type; - if (XCODING_SYSTEM_TYPE(*codesys_in_out) == CODESYS_AUTODETECT) { - if (NILP(coding_system)) - *codesys_in_out = - coding_system_from_mask(decst.mask); - else - *codesys_in_out = coding_system; + if (memcmp("coding:", p, LENGTH("coding:")) != 0) { + continue; + } + if (p != local_vars_beg && strchr(" \t;", *p) == NULL ) { + continue; + } + p += LENGTH("coding:"); + while (*p == ' ' || *p == '\t') { + p++; + } + + /* Get coding system name */ + save = *suffix; + *suffix = '\0'; + /* Characters valid in a MIME charset + name (rfc 1521), and in a Lisp + symbol name. */ + n = strspn((char *)p, mime_name_valid_chars); + *suffix = save; + if (n > 0) { + save = p[n]; + p[n] = '\0'; + coding_system = Ffind_coding_system( + intern((char *)p)); + p[n] = save; + } + break; + } + break; } + break; + } + if (NILP(coding_system)) { + do { + if (detect_coding_type(&decst, buf, nread, + XCODING_SYSTEM_TYPE(*codesys_in_out) + != CODESYS_AUTODETECT)) + break; + nread = Lstream_read(stream, buf, sizeof(buf)); + if (nread == 0) + break; + } + while (1); + } else if (XCODING_SYSTEM_TYPE(*codesys_in_out) == CODESYS_AUTODETECT + && XCODING_SYSTEM_EOL_TYPE(coding_system) == EOL_AUTODETECT) { + do { + if (detect_coding_type(&decst, buf, nread, 1)) + break; + nread = Lstream_read(stream, buf, sizeof(buf)); + if (!nread) + break; + } + while (1); + } + *eol_type_in_out = decst.eol_type; + if (XCODING_SYSTEM_TYPE(*codesys_in_out) == CODESYS_AUTODETECT) { + if (NILP(coding_system)) + *codesys_in_out = + coding_system_from_mask(decst.mask); + else + *codesys_in_out = coding_system; + } +} + +void +determine_real_coding_system(lstream_t stream, Lisp_Object * codesys_in_out, + eol_type_t * eol_type_in_out) +{ + + if (*eol_type_in_out == EOL_AUTODETECT) + *eol_type_in_out = XCODING_SYSTEM_EOL_TYPE(*codesys_in_out); + + + /* If autodetection is called for, do it now. */ + if (XCODING_SYSTEM_TYPE(*codesys_in_out) == CODESYS_AUTODETECT + || *eol_type_in_out == EOL_AUTODETECT) { + autodetect_real_coding_system(stream, codesys_in_out, + eol_type_in_out); } /* If we absolutely can't determine the EOL type, just assume LF. */ @@ -3574,7 +3577,7 @@ Return the UCS code (a positive integer) corresponding to CHARACTER. (U+3013) of JIS X 0208, which means correct character is not found, instead. #### do something more appropriate (use blob?) - Danger, Will Robinson! Data loss. Should we signal user? */ + Danger, Will Robinson! Data loss. Should we signal user? */ static void decode_ucs4(unsigned long ch, unsigned_char_dynarr * dst) { Lisp_Object chr = ucs_to_char(ch); @@ -5700,7 +5703,6 @@ void syms_of_file_coding(void) defsymbol(&Qinput_charset_conversion, "input-charset-conversion"); defsymbol(&Qoutput_charset_conversion, "output-charset-conversion"); - defsymbol(&Qshort, "short"); defsymbol(&Qno_ascii_eol, "no-ascii-eol"); defsymbol(&Qno_ascii_cntl, "no-ascii-cntl"); defsymbol(&Qseven, "seven");