Coverity fixes from Nelson
[sxemacs] / lib-src / make-docfile.c
1 /* Generate doc-string file for SXEmacs from source files.
2    Copyright (C) 1985, 1986, 1992, 1993, 1994 Free Software Foundation, Inc.
3    Copyright (C) 1995 Board of Trustees, University of Illinois.
4    Copyright (C) 1998, 1999 J. Kean Johnston.
5    Copyright (C) 2004 Steve Youngs.
6
7 This file is part of SXEmacs.
8
9 SXEmacs is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
13
14 SXEmacs is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program.  If not, see <http://www.gnu.org/licenses/>. */
21
22 /* Synched up with: FSF 19.30. */
23
24 /* The arguments given to this program are all the C and Lisp source files
25  of SXEmacs.  .elc and .el and .c files are allowed.
26  A .o file can also be specified; the .c file it was made from is used.
27  This helps the makefile pass the correct list of files.
28
29  The results, which go to standard output or to a file
30  specified with -a or -o (-a to append, -o to start from nothing),
31  are entries containing function or variable names and their documentation.
32  Each entry starts with a ^_ character.
33  Then comes F for a function or V for a variable.
34  Then comes the function or variable name, terminated with a newline.
35  Then comes the documentation for that function or variable.
36
37  Added 19.15/20.1:  `-i site-packages' allow installer to dump extra packages
38  without modifying Makefiles, etc.
39  */
40
41 #define NO_SHORTNAMES           /* Tell config not to load remap.h */
42 #include <config.h>
43
44 #include <stdio.h>
45 #include <errno.h>
46 #if __STDC__ || defined(STDC_HEADERS)
47 #include <stdlib.h>
48 #ifdef HAVE_UNISTD_H
49 #include <unistd.h>
50 #endif
51 #include <string.h>
52 #include <ctype.h>
53 #endif
54
55 #include <sys/param.h>
56
57 /* How long can a source filename be in DOC (including "\037S" at the start
58     and "\n" at the end) ? */
59 #define DOC_MAX_FILENAME_LENGTH 2048
60 #define IS_DIRECTORY_SEP(arg) ('/' == arg)
61
62 /* Can't use the system assert on OS X, it can't find a definition for
63    __eprintf on linking */
64 #define assert(x) ((x) ? (void) 0 : (void) abort ())
65
66 #define READ_TEXT "r"
67 #define READ_BINARY "r"
68 #define WRITE_BINARY "w"
69 #define APPEND_BINARY "a"
70
71 /* Stdio stream for output to the DOC file.  */
72 static FILE *outfile;
73 static char *modname = NULL;
74
75 enum {
76         el_file,
77         elc_file,
78         c_file
79 } Current_file_type;
80
81 static void put_filename (const char *filename);
82 static int scan_file(const char *filename);
83 static int read_c_string(FILE *, int, int);
84 static void write_c_args(FILE * out, const char *func, char *buf, int minargs,
85                          int maxargs);
86 static int scan_c_file(const char *filename, const char *mode);
87 static void skip_white(FILE *);
88 static void read_lisp_symbol(FILE *, char *);
89 static int scan_lisp_file(const char *filename, const char *mode);
90
91 #define C_IDENTIFIER_CHAR_P(c) \
92  (('A' <= c && c <= 'Z') || \
93   ('a' <= c && c <= 'z') || \
94   ('0' <= c && c <= '9') || \
95   (c == '_'))
96
97 /* Name this program was invoked with.  */
98 char *progname;
99
100 /* Set to 1 if this was invoked by ellcc */
101 int ellcc = 0;
102
103 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
104
105 static void error(const char *s1, const char *s2)
106 {
107         fprintf(stderr, "%s: ", progname);
108         fprintf(stderr, s1, s2);
109         fprintf(stderr, "\n");
110 }
111
112 /* Print error message and exit.  */
113
114 static void fatal(const char *s1, const char *s2)
115 {
116         error(s1, s2);
117         exit(1);
118 }
119
120 /* Like malloc but get fatal error if memory is exhausted.  */
121
122 static long *xmalloc(unsigned int size)
123 {
124         long *result = (long *)malloc(size);
125         if (result == NULL)
126                 fatal("virtual memory exhausted", 0);
127         return result;
128 }
129
130 static char *next_extra_elc(char *extra_elcs)
131 {
132         static FILE *fp = NULL;
133         static char line_buf[BUFSIZ];
134         char *p = line_buf + 1;
135
136         if (!fp) {
137                 if (!extra_elcs) {
138                         return NULL;
139                 } else if (!(fp = fopen(extra_elcs, READ_BINARY))) {
140                         /* It is not an error if this file doesn't exist. */
141                         /*fatal("error opening site package file list", 0); */
142                         return NULL;
143                 }
144                 if(!fgets(line_buf, BUFSIZ, fp)) {
145                         fclose(fp);
146                         fp = NULL;
147                         return NULL;
148                 }
149         }
150
151       again:
152         if (!fgets(line_buf, BUFSIZ, fp)) {
153                 fclose(fp);
154                 fp = NULL;
155                 return NULL;
156         }
157         line_buf[0] = '\0';
158         if (strlen(p) <= 2 || strlen(p) >= (BUFSIZ - 5)) {
159                 /* reject too short or too long lines */
160                 goto again;
161         }
162         p[strlen(p) - 2] = '\0';
163         strcat(p, ".elc");
164
165         return p;
166 }
167
168 static void
169 write_doc_header(void)
170 {
171         char *tmp, *modout = strdup(modname), *modoutC;
172         size_t modsz;
173
174         if ((tmp = strrchr(modout, '.')) != NULL) {
175                 *tmp = '\0';
176                 tmp = strrchr(modout, '.');
177                 if (tmp != NULL) {
178                         *tmp = '\0';
179                 }
180         }
181         /* the same for modoutC */
182         modoutC = strdup(modout);
183         modsz = strlen(modoutC);
184
185         for (size_t i = 0; i < modsz; i++) {
186                 /* for the C version we have to convert any non-char to _ */
187                 if (!isdigit(modoutC[i]) && !isalpha(modoutC[i])) {
188                         modoutC[i] = '_';
189                 }
190         }
191
192         fprintf(outfile, "/* DO NOT EDIT - AUTOMATICALLY GENERATED */\n\n");
193         fprintf(outfile, "#include <emodules-ng.h>\n\n");
194
195 #if 0
196         fprintf(outfile, "const char *emodule_name = \"%s\";\n", modoutC);
197         fprintf(outfile, "const char *emodule_title = "
198                 "\"SXEmacs module: %s\";\n", modout);
199 #endif
200         /* declare and start the LTX_docs() block */
201         fprintf(outfile, "\n\nextern void %s_LTX_docs(void);\n", modoutC);
202         fprintf(outfile, "\nvoid\n%s_LTX_docs(void)\n", modoutC);
203 }
204
205 \f
206 int main(int argc, char **argv)
207 {
208         int i;
209         int err_count = 0;
210         char *extra_elcs = NULL;
211
212         progname = argv[0];
213
214         outfile = stdout;
215
216         /* If first two args are -o FILE, output to FILE.  */
217         for (i = 1; i < argc - 1;) {
218                 if (!strcmp(argv[i], "-o")) {
219                         outfile = fopen(argv[++i], WRITE_BINARY);
220                 }
221                 if (!strcmp(argv[i], "-a")) {
222                         outfile = fopen(argv[++i], APPEND_BINARY);
223                 }
224                 if (!strcmp(argv[i], "-E")) {
225                         if (modname == NULL) {
226                                 modname = strdup(argv[i+1]);
227                         }
228                         ellcc = 1;
229                         /*outfile = fopen(argv[++i], APPEND_BINARY);*/
230                         outfile = fopen(argv[++i], WRITE_BINARY);
231                 }
232                 if (!strcmp(argv[i], "-d")) {
233                         if (chdir(argv[++i]) < 0)
234                                 fatal("Could not change to directory ",argv[i]);
235                 }
236
237                 if (!strcmp(argv[i], "-i")) {
238                         extra_elcs = argv[++i];
239                 }
240
241                 if (!strcmp(argv[i], "--modname") || !strcmp(argv[i], "-m")) {
242                         modname = strdup(argv[++i]);
243                 }
244                 i++;
245         }
246         if (outfile == 0)
247                 fatal("No output file specified", "");
248
249         if (ellcc) {
250                 write_doc_header();
251                 fprintf(outfile, "{\n");
252         }
253
254         for (i = 1; i < argc; i++) {
255                 int j;
256
257                 if (argc > i + 1 && !strcmp (argv[i], "-d")) {
258                         /* XEmacs change; allow more than one chdir. The
259                            idea is that the second chdir is to source-lisp,
260                            and that any Lisp files not under there have the
261                            full path specified.  */
262                         i += 1;
263                         if (chdir (argv[i]) < 0)
264                                 fatal("Could not change to directory ", argv[i]);
265                         continue;
266                 } else if (argv[i][0] == '-') {
267                         i++;
268                         continue;
269                 }
270                 /* Don't process one file twice.  */
271                 for (j = 1; j < i; j++)
272                         if (!strcmp(argv[i], argv[j]))
273                                 break;
274                 if (j == i) {
275                         /* err_count seems to be {mis,un}used */
276                         err_count += scan_file(argv[i]);
277                 }
278         }
279
280         if (extra_elcs) {
281                 char *p;
282
283                 while ((p = next_extra_elc(extra_elcs)) != NULL) {
284                         err_count += scan_file(p);
285                 }
286         }
287
288         putc('\n', outfile);
289         if (ellcc)
290                 fprintf(outfile, "}\n\n");
291 #ifndef VMS
292         exit(err_count > 0);
293 #endif                          /* VMS */
294         return err_count > 0;
295 }
296
297 /* Add a source file name boundary in the output file.  */
298 static void put_filename (const char *filename)
299 {
300         /* XEmacs change; don't strip directory information. */
301 #if 0
302         const char *tmp;
303
304         /* Why are we cutting this off? */
305         for (tmp = filename; *tmp; tmp++) {
306                 if (IS_DIRECTORY_SEP(*tmp))
307                         filename = tmp + 1;
308         }
309 #endif
310         /* <= because sizeof includes the nul byte at the end. Not quite
311            right, because it should include the length of the symbol +
312            "\037[VF]" instead of simply 10. */
313         assert(sizeof("\037S\n") + strlen(filename) + 10 
314                <= DOC_MAX_FILENAME_LENGTH);
315
316         putc (037, outfile);
317         putc ('S', outfile);
318         fprintf (outfile, "%s\n", filename);
319 }
320
321 /* Read file FILENAME and output its doc strings to outfile.  */
322 /* Return 1 if file is not found, 0 if it is found.  */
323
324 static int scan_file(const char *filename)
325 {
326         int len = strlen(filename);
327
328         if (ellcc == 0 && len > 4 && !strcmp(filename + len - 4, ".elc")) {
329                 Current_file_type = elc_file;
330                 return scan_lisp_file(filename, READ_BINARY);
331         } else if (ellcc == 0 && len > 3 && !strcmp(filename + len - 3, ".el")) {
332                 Current_file_type = el_file;
333                 return scan_lisp_file(filename, READ_TEXT);
334         } else {
335                 Current_file_type = c_file;
336                 return scan_c_file(filename, READ_TEXT);
337         }
338 }
339 \f
340 char buf[128];
341
342 /* Skip a C string from INFILE,
343  and return the character that follows the closing ".
344  If printflag is positive, output string contents to outfile.
345  If it is negative, store contents in buf.
346  Convert escape sequences \n and \t to newline and tab;
347  discard \ followed by newline.  */
348
349 #define MDGET do { prevc = c; c = getc (infile); } while (0)
350 static int read_c_string(FILE * infile, int printflag, int c_docstring)
351 {
352         register int prevc = 0, c = 0;
353         char *p = buf;
354         int start = -1;
355
356         MDGET;
357         while (c != EOF) {
358                 while ((c_docstring || c != '"') && c != EOF) {
359                         if (c == '*') {
360                                 int cc = getc(infile);
361                                 if (cc == '/') {
362                                         if (prevc != '\n') {
363                                                 if (printflag > 0) {
364                                                         if (ellcc)
365                                                                 fprintf(outfile,
366                                                                         "\\n\\");
367                                                         putc('\n', outfile);
368                                                 } else if (printflag < 0)
369                                                         *p++ = '\n';
370                                         }
371                                         break;
372                                 } else
373                                         ungetc(cc, infile);
374                         }
375
376                         if (start == 1) {
377                                 if (printflag > 0) {
378                                         if (ellcc)
379                                                 fprintf(outfile, "\\n\\");
380                                         putc('\n', outfile);
381                                 } else if (printflag < 0)
382                                         *p++ = '\n';
383                         }
384
385                         if (c == '\\') {
386                                 MDGET;
387                                 if (c == '\n') {
388                                         MDGET;
389                                         start = 1;
390                                         continue;
391                                 }
392                                 if (!c_docstring && c == 'n')
393                                         c = '\n';
394                                 if (c == 't')
395                                         c = '\t';
396                         }
397                         if (c == '\n')
398                                 start = 1;
399                         else {
400                                 start = 0;
401                                 if (printflag > 0) {
402                                         if (ellcc && c == '"')
403                                                 putc('\\', outfile);
404                                         putc(c, outfile);
405                                 } else if (printflag < 0)
406                                         *p++ = c;
407                         }
408                         MDGET;
409                 }
410                 /* look for continuation of string */
411                 if (Current_file_type == c_file) {
412                         do {
413                                 MDGET;
414                         }
415                         while (isspace(c));
416                         if (c != '"')
417                                 break;
418                 } else {
419                         MDGET;
420                         if (c != '"')
421                                 break;
422                         /* If we had a "", concatenate the two strings.  */
423                 }
424                 MDGET;
425         }
426
427         if (printflag < 0)
428                 *p = 0;
429
430         return c;
431 }
432 \f
433 /* Write to file OUT the argument names of function FUNC, whose text is in BUF.
434    MINARGS and MAXARGS are the minimum and maximum number of arguments.  */
435
436 static void
437 write_c_args(FILE * out, const char *func, char *buff, int minargs, int maxargs)
438 {
439         register char *p;
440         int in_ident = 0;
441         int just_spaced = 0;
442 #if 0
443         int need_space = 1;
444
445         fprintf(out, "(%s", func);
446 #else
447         /* XEmacs - "arguments:" is for parsing the docstring.  FSF's help system
448            doesn't parse the docstring for arguments like we do, so we're also
449            going to omit the function name to preserve compatibility with elisp
450            that parses the docstring.  Finally, not prefixing the arglist with
451            anything is asking for trouble because it's not uncommon to have an
452            unescaped parenthesis at the beginning of a line. --Stig */
453         fprintf(out, "arguments: (");
454 #endif
455
456         if (*buff == '(')
457                 ++buff;
458
459         for (p = buff; *p; p++) {
460                 char c = *p;
461                 int ident_start = 0;
462
463                 /* Add support for ANSI prototypes. Hop over
464                    "Lisp_Object" string (the only C type allowed in DEFUNs) */
465                 static char lo[] = "Lisp_Object";
466                 if ((C_IDENTIFIER_CHAR_P(c) != in_ident) && !in_ident &&
467                     (strncmp(p, lo, sizeof(lo) - 1) == 0) &&
468                     isspace((unsigned char)(*(p + sizeof(lo) - 1)))) {
469                         p += (sizeof(lo) - 1);
470                         while (isspace((unsigned char)(*p)))
471                                 p++;
472                         c = *p;
473                 }
474
475                 /* Notice when we start printing a new identifier.  */
476                 if (C_IDENTIFIER_CHAR_P(c) != in_ident) {
477                         if (!in_ident) {
478                                 in_ident = 1;
479                                 ident_start = 1;
480 #if 0
481                                 /* XEmacs - This goes along with the change above. */
482                                 if (need_space)
483                                         putc(' ', out);
484 #endif
485                                 if (minargs == 0 && maxargs > 0)
486                                         fprintf(out, "&optional ");
487                                 just_spaced = 1;
488
489                                 minargs--;
490                                 maxargs--;
491                         } else
492                                 in_ident = 0;
493                 }
494
495                 /* Print the C argument list as it would appear in lisp:
496                    print underscores as hyphens, and print commas as spaces.
497                    Collapse adjacent spaces into one. */
498                 if (c == '_')
499                         c = '-';
500                 if (c == ',')
501                         c = ' ';
502
503                 /* If the C argument name ends with `_', change it to ' ',
504                    to allow use of C reserved words or global symbols as Lisp args. */
505                 if (c == '-' && !C_IDENTIFIER_CHAR_P(p[1])) {
506                         in_ident = 0;
507                         just_spaced = 0;
508                 }
509                 /* If the character is carriage return, escape it for the C compiler. */
510                 else if (c == '\n') {
511                         putc('\\', out);
512                         putc('\n', out);
513                 } else if (c != ' ' || !just_spaced) {
514                         if (c >= 'a' && c <= 'z')
515                                 /* Upcase the letter.  */
516                                 c += 'A' - 'a';
517                         putc(c, out);
518                 }
519
520                 just_spaced = (c == ' ');
521 #if 0
522                 need_space = 0;
523 #endif
524         }
525         if (!ellcc)
526                 putc('\n', out);        /* XEmacs addition */
527 }
528 \f
529 /* Read through a c file.  If a .o file is named,
530    the corresponding .c file is read instead.
531    Looks for DEFUN constructs such as are defined in ../src/lisp.h.
532    Accepts any word starting DEF... so it finds DEFSIMPLE and DEFPRED.  */
533
534 static int scan_c_file(const char *filename, const char *mode)
535 {
536         FILE *infile;
537         register int c;
538         register int commas;
539         register int defunflag;
540         register int defvarperbufferflag = 0;
541         register int defvarflag;
542         int minargs, maxargs;
543         size_t l = strlen(filename);
544         char f[MAXPATHLEN];
545
546         if (l > sizeof(f)) {
547 #ifdef ENAMETOOLONG
548                 errno = ENAMETOOLONG;
549 #else
550                 errno = EINVAL;
551 #endif
552                 return (0);
553         }
554
555         strcpy(f, filename);
556         if (f[l - 1] == 'o')
557                 f[l - 1] = 'c';
558         infile = fopen(f, mode);
559
560         /* No error if non-ex input file */
561         if (infile == NULL) {
562                 perror(f);
563                 return 0;
564         }
565
566         c = '\n';
567         while (!feof(infile)) {
568                 if (c != '\n') {
569                         c = getc(infile);
570                         continue;
571                 }
572                 c = getc(infile);
573                 /*
574                  * SXEmacs uses proper indentation so we need to
575                  * search for `\t' instead of ' ' here. 
576                  */
577                 if (c == '\t') {
578                         while (c == '\t')
579                                 c = getc(infile);
580                         if (c != 'D')
581                                 continue;
582                         c = getc(infile);
583                         if (c != 'E')
584                                 continue;
585                         c = getc(infile);
586                         if (c != 'F')
587                                 continue;
588                         c = getc(infile);
589                         if (c != 'V')
590                                 continue;
591                         c = getc(infile);
592                         if (c != 'A')
593                                 continue;
594                         c = getc(infile);
595                         if (c != 'R')
596                                 continue;
597                         c = getc(infile);
598                         if (c != '_')
599                                 continue;
600
601                         defvarflag = 1;
602                         defunflag = 0;
603
604                         c = getc(infile);
605                         /* Note that this business doesn't apply under XEmacs.
606                            DEFVAR_BUFFER_LOCAL in XEmacs behaves normally. */
607                         defvarperbufferflag = (c == 'P');
608
609                         c = getc(infile);
610                 } else if (c == 'D') {
611                         c = getc(infile);
612                         if (c != 'E')
613                                 continue;
614                         c = getc(infile);
615                         if (c != 'F')
616                                 continue;
617                         c = getc(infile);
618                         defunflag = (c == 'U');
619                         defvarflag = 0;
620                         c = getc(infile);
621                 } else
622                         continue;
623
624                 while (c != '(') {
625                         if (c < 0)
626                                 goto eof;
627                         c = getc(infile);
628                 }
629
630                 c = getc(infile);
631                 if (c != '"')
632                         continue;
633                 c = read_c_string(infile, -1, 0);
634
635                 if (defunflag)
636                         commas = 4;
637                 else if (defvarperbufferflag)
638                         commas = 2;
639                 else if (defvarflag)
640                         commas = 1;
641                 else            /* For DEFSIMPLE and DEFPRED */
642                         commas = 2;
643
644                 while (commas) {
645                         if (c == ',') {
646                                 commas--;
647                                 if (defunflag && (commas == 1 || commas == 2)) {
648                                         do
649                                                 c = getc(infile);
650                                         while (c == ' ' || c == '\n'
651                                                || c == '\t');
652                                         if (c < 0)
653                                                 goto eof;
654                                         ungetc(c, infile);
655                                         if (commas == 2) {      /* pick up minargs */
656                                                 if ( fscanf(infile, "%d", &minargs) != 1)
657                                                         fprintf(stderr, "Failed to read minargs\n");
658                                         } else /* pick up maxargs */ if (c == 'M' || c == 'U')  /* MANY || UNEVALLED */
659                                                 maxargs = -1;
660                                         else
661                                                 if ( fscanf(infile, "%d", &maxargs) != 1)
662                                                         fprintf(stderr, "Failed to read maxargs\n");;
663                                 }
664                         }
665                         if (c < 0)
666                                 goto eof;
667                         c = getc(infile);
668                 }
669                 while (c == ' ' || c == '\n' || c == '\t')
670                         c = getc(infile);
671                 if (c == '"')
672                         c = read_c_string(infile, 0, 0);
673                 if (defunflag | defvarflag) {
674                         while (c != '/')
675                                 c = getc(infile);
676                         c = getc(infile);
677                         while (c == '*')
678                                 c = getc(infile);
679                 } else {
680                         while (c != ',')
681                                 c = getc(infile);
682                         c = getc(infile);
683                 }
684                 while (c == ' ' || c == '\n' || c == '\t')
685                         c = getc(infile);
686                 if (defunflag | defvarflag)
687                         ungetc(c, infile);
688
689                 if (defunflag || defvarflag || c == '"') {
690                         if (ellcc) {
691                                 fprintf(outfile, "\tCDOC%s(\"%s\", \"\\\n",
692                                         defvarflag ? "SYM" : "SUBR", buf);
693                         } else {
694                                 put_filename (filename); /* XEmacs addition */
695                                 putc(037, outfile);
696                                 putc(defvarflag ? 'V' : 'F', outfile);
697                                 fprintf(outfile, "%s\n", buf);
698                         }
699                         c = read_c_string(infile, 1, (defunflag || defvarflag));
700
701                         /* If this is a defun, find the arguments and print
702                            them.  If this function takes MANY or UNEVALLED args,
703                            then the C source won't give the names of the
704                            arguments, so we shouldn't bother trying to find
705                            them.  */
706                         if (defunflag && maxargs != -1) {
707                                 char argbuf[1024], *p = argbuf;
708 #if 0                           /* For old DEFUN's only */
709                                 while (c != ')') {
710                                         if (c < 0)
711                                                 goto eof;
712                                         c = getc(infile);
713                                 }
714 #endif
715                                 /* Skip into arguments.  */
716                                 while (c != '(') {
717                                         if (c < 0)
718                                                 goto eof;
719                                         c = getc(infile);
720                                 }
721                                 /* Copy arguments into ARGBUF.  */
722                                 *p++ = c;
723                                 do
724                                         *p++ = c = getc(infile);
725                                 while (c != ')');
726                                 *p = '\0';
727                                 /* Output them.  */
728                                 if (ellcc)
729                                         fprintf(outfile, "\\n\\\n\\n\\\n");
730                                 else
731                                         fprintf(outfile, "\n\n");
732                                 write_c_args(outfile, buf, argbuf, minargs,
733                                              maxargs);
734                         }
735                         if (ellcc)
736                                 fprintf(outfile, "\\n\");\n\n");
737                 }
738         }
739       eof:
740         fclose(infile);
741         return 0;
742 }
743 \f
744 /* Read a file of Lisp code, compiled or interpreted.
745  Looks for
746   (defun NAME ARGS DOCSTRING ...)
747   (defmacro NAME ARGS DOCSTRING ...)
748   (autoload (quote NAME) FILE DOCSTRING ...)
749   (defvar NAME VALUE DOCSTRING)
750   (defconst NAME VALUE DOCSTRING)
751   (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
752   (fset (quote NAME) #[... DOCSTRING ...])
753   (defalias (quote NAME) #[... DOCSTRING ...])
754  starting in column zero.
755  (quote NAME) may appear as 'NAME as well.
756
757  We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
758  When we find that, we save it for the following defining-form,
759  and we use that instead of reading a doc string within that defining-form.
760
761  For defun, defmacro, and autoload, we know how to skip over the arglist.
762  For defvar, defconst, and fset we skip to the docstring with a kludgy
763  formatting convention: all docstrings must appear on the same line as the
764  initial open-paren (the one in column zero) and must contain a backslash
765  and a double-quote immediately after the initial double-quote.  No newlines
766  must appear between the beginning of the form and the first double-quote.
767  The only source file that must follow this convention is loaddefs.el; aside
768  from that, it is always the .elc file that we look at, and they are no
769  problem because byte-compiler output follows this convention.
770  The NAME and DOCSTRING are output.
771  NAME is preceded by `F' for a function or `V' for a variable.
772  An entry is output only if DOCSTRING has \ newline just after the opening "
773
774  Adds the filename a symbol or function was found in before its docstring;
775  there's no need for this with the load-history available, but we do it for
776  consistency with the C parsing code. 
777  */
778
779 static void skip_white(FILE * infile)
780 {
781         char c = ' ';
782         while (c == ' ' || c == '\t' || c == '\n')
783                 c = getc(infile);
784         ungetc(c, infile);
785 }
786
787 static void read_lisp_symbol(FILE * infile, char *buffer)
788 {
789         char c;
790         char *fillp = buffer;
791
792         skip_white(infile);
793         while (1) {
794                 c = getc(infile);
795                 if (c == '\\')
796                         /* FSF has *(++fillp), which is wrong. */
797                         *fillp++ = getc(infile);
798                 else if (c == ' ' || c == '\t' || c == '\n' || c == '('
799                          || c == ')') {
800                         ungetc(c, infile);
801                         *fillp = 0;
802                         break;
803                 } else
804                         *fillp++ = c;
805         }
806
807         if (!buffer[0])
808                 fprintf(stderr, "## expected a symbol, got '%c'\n", c);
809
810         skip_white(infile);
811 }
812
813 static int scan_lisp_file(const char *filename, const char *mode)
814 {
815         FILE *infile;
816         register int c;
817         char *saved_string = 0;
818
819         infile = fopen(filename, mode);
820         if (infile == NULL) {
821                 perror(filename);
822                 return 0;       /* No error */
823         }
824
825         c = '\n';
826         while (!feof(infile)) {
827                 char buffer[BUFSIZ];
828                 char type;
829
830                 if (c != '\n') {
831                         c = getc(infile);
832                         continue;
833                 }
834                 c = getc(infile);
835                 /* Detect a dynamic doc string and save it for the next expression.  */
836                 if (c == '#') {
837                         c = getc(infile);
838                         if (c == '@') {
839                                 int length = 0;
840                                 int i;
841
842                                 /* Read the length.  */
843                                 while ((c = getc(infile), c >= '0' && c <= '9')) {
844                                         length *= 10;
845                                         length += c - '0';
846                                 }
847
848                                 /* The next character is a space that is counted in the length
849                                    but not part of the doc string.
850                                    We already read it, so just ignore it.  */
851                                 length--;
852
853                                 /* Read in the contents.  */
854                                 if (saved_string != 0) 
855                                         free(saved_string);
856                                 saved_string = (char *)xmalloc(length);
857                                 for (i = 0; i < length; i++)
858                                         saved_string[i] = getc(infile);
859                                 /* The last character is a ^_.
860                                    That is needed in the .elc file
861                                    but it is redundant in DOC.  So get rid of it here.  */
862                                 saved_string[length - 1] = 0;
863                                 /* Skip the newline.  */
864                                 c = getc(infile);
865                                 while (c != '\n')
866                                         c = getc(infile);
867                         }
868                         continue;
869                 }
870
871                 if (c != '(')
872                         continue;
873
874                 read_lisp_symbol(infile, buffer);
875
876                 if (!strcmp(buffer, "defun") || !strcmp(buffer, "defmacro")) {
877                         type = 'F';
878                         read_lisp_symbol(infile, buffer);
879
880                         /* Skip the arguments: either "nil" or a list in parens */
881
882                         c = getc(infile);
883                         if (c == 'n') { /* nil */
884                                 if ((c = getc(infile)) != 'i' ||
885                                     (c = getc(infile)) != 'l') {
886                                         fprintf(stderr,
887                                                 "## unparsable arglist in %s (%s)\n",
888                                                 buffer, filename);
889                                         continue;
890                                 }
891                         } else if (c != '(') {
892                                 fprintf(stderr,
893                                         "## unparsable arglist in %s (%s)\n",
894                                         buffer, filename);
895                                 continue;
896                         } else
897                                 while (c != ')')
898                                         c = getc(infile);
899                         skip_white(infile);
900
901                         /* If the next three characters aren't `dquote bslash newline'
902                            then we're not reading a docstring.
903                          */
904                         if ((c = getc(infile)) != '"' ||
905                             (c = getc(infile)) != '\\' ||
906                             (c = getc(infile)) != '\n') {
907 #ifdef DEBUG
908                                 fprintf(stderr, "## non-docstring in %s (%s)\n",
909                                         buffer, filename);
910 #endif
911                                 continue;
912                         }
913                 }
914
915                 else if (!strcmp(buffer, "defvar") ||
916                          !strcmp(buffer, "defconst")) {
917                         char c1 = 0, c2 = 0;
918                         type = 'V';
919                         read_lisp_symbol(infile, buffer);
920
921                         if (saved_string == 0) {
922
923                                 /* Skip until the first newline; remember the two previous chars. */
924                                 while (c != '\n' && c >= 0) {
925                                         /* #### Kludge -- Ignore any ESC x x ISO2022 sequences */
926                                         if (c == 27) {
927                                                 (void)getc(infile);
928                                                 (void)getc(infile);
929                                                 goto nextchar;
930                                         }
931
932                                         c2 = c1;
933                                         c1 = c;
934                                       nextchar:
935                                         c = getc(infile);
936                                 }
937
938                                 /* If two previous characters were " and \,
939                                    this is a doc string.  Otherwise, there is none.  */
940                                 if (c2 != '"' || c1 != '\\') {
941 #ifdef DEBUG
942                                         fprintf(stderr,
943                                                 "## non-docstring in %s (%s)\n",
944                                                 buffer, filename);
945 #endif
946                                         continue;
947                                 }
948                         }
949                 }
950
951                 else if (! strcmp (buffer, "custom-declare-variable"))
952                 {
953                         char c1 = 0, c2 = 0;
954                         type = 'V';
955
956                         c = getc (infile);
957                         if (c == '\'')
958                                 read_lisp_symbol (infile, buffer);
959                         else {
960                                 if (c != '(') {
961                                         fprintf (stderr,
962                                                  "## unparsable name in "
963                                                  "custom-declare-variable "
964                                                  "in %s\n",
965                                                  filename);
966                                         continue;
967                                 }
968                                 read_lisp_symbol (infile, buffer);
969                                 if (strcmp (buffer, "quote")) {
970                                         fprintf (stderr,
971                                                  "## unparsable name in "
972                                                  "custom-declare-variable "
973                                                  "in %s\n",
974                                                  filename);
975                                         continue;
976                                 }
977                                 read_lisp_symbol (infile, buffer);
978                                 c = getc (infile);
979                                 if (c != ')') {
980                                         fprintf (stderr,
981                                                  "## unparsable quoted name "
982                                                  "in custom-declare-variable"
983                                                  " in %s\n", filename);
984                                         continue;
985                                 }
986                         }
987
988                         if (saved_string == 0)
989                         {
990                                 /* Skip to end of line; remember the two
991                                    previous chars.  */
992                                 while (c != '\n' && c >= 0) {
993                                         c2 = c1;
994                                         c1 = c;
995                                         /* SXEmacs: shame we can't do this. */
996                                         /* c = getc_skipping_iso2022 (infile); */
997                                         getc (infile);
998                                 }
999           
1000                                 /* If two previous characters were " and \,
1001                                    this is a doc string.  Otherwise, there is
1002                                    none.  */
1003                                 if (c2 != '"' || c1 != '\\') {
1004 #ifdef DEBUG
1005                                         fprintf (stderr, "## non-docstring in"
1006                                                  " %s (%s)\n",
1007                                                  buffer, filename);
1008 #endif
1009                                         continue;
1010                                 }
1011                         }
1012                 }
1013
1014                 else if (!strcmp(buffer, "fset") || !strcmp(buffer, "defalias")) {
1015                         char c1 = 0, c2 = 0;
1016                         type = 'F';
1017
1018                         c = getc(infile);
1019                         if (c == '\'')
1020                                 read_lisp_symbol(infile, buffer);
1021                         else {
1022                                 if (c != '(') {
1023                                         fprintf(stderr,
1024                                                 "## unparsable name in fset in %s\n",
1025                                                 filename);
1026                                         continue;
1027                                 }
1028                                 read_lisp_symbol(infile, buffer);
1029                                 if (strcmp(buffer, "quote")) {
1030                                         fprintf(stderr,
1031                                                 "## unparsable name in fset in %s\n",
1032                                                 filename);
1033                                         continue;
1034                                 }
1035                                 read_lisp_symbol(infile, buffer);
1036                                 c = getc(infile);
1037                                 if (c != ')') {
1038                                         fprintf(stderr,
1039                                                 "## unparsable quoted name in fset in %s\n",
1040                                                 filename);
1041                                         continue;
1042                                 }
1043                         }
1044
1045                         if (saved_string == 0) {
1046                                 /* Skip until the first newline; remember the two previous chars. */
1047                                 while (c != '\n' && c >= 0) {
1048                                         c2 = c1;
1049                                         c1 = c;
1050                                         c = getc(infile);
1051                                 }
1052
1053                                 /* If two previous characters were " and \,
1054                                    this is a doc string.  Otherwise, there is none.  */
1055                                 if (c2 != '"' || c1 != '\\') {
1056 #ifdef DEBUG
1057                                         fprintf(stderr,
1058                                                 "## non-docstring in %s (%s)\n",
1059                                                 buffer, filename);
1060 #endif
1061                                         continue;
1062                                 }
1063                         }
1064                 }
1065
1066                 else if (!strcmp(buffer, "autoload")) {
1067                         type = 'F';
1068                         c = getc(infile);
1069                         if (c == '\'')
1070                                 read_lisp_symbol(infile, buffer);
1071                         else {
1072                                 if (c != '(') {
1073                                         fprintf(stderr,
1074                                                 "## unparsable name in autoload in %s\n",
1075                                                 filename);
1076                                         continue;
1077                                 }
1078                                 read_lisp_symbol(infile, buffer);
1079                                 if (strcmp(buffer, "quote")) {
1080                                         fprintf(stderr,
1081                                                 "## unparsable name in autoload in %s\n",
1082                                                 filename);
1083                                         continue;
1084                                 }
1085                                 read_lisp_symbol(infile, buffer);
1086                                 c = getc(infile);
1087                                 if (c != ')') {
1088                                         fprintf(stderr,
1089                                                 "## unparsable quoted name in autoload in %s\n",
1090                                                 filename);
1091                                         continue;
1092                                 }
1093                         }
1094                         skip_white(infile);
1095                         if ((c = getc(infile)) != '\"') {
1096                                 fprintf(stderr,
1097                                         "## autoload of %s unparsable (%s)\n",
1098                                         buffer, filename);
1099                                 continue;
1100                         }
1101                         read_c_string(infile, 0, 0);
1102                         skip_white(infile);
1103
1104                         if (saved_string == 0) {
1105                                 /* If the next three characters aren't `dquote bslash newline'
1106                                    then we're not reading a docstring.  */
1107                                 if ((c = getc(infile)) != '"' ||
1108                                     (c = getc(infile)) != '\\' ||
1109                                     (c = getc(infile)) != '\n') {
1110 #ifdef DEBUG
1111                                         fprintf(stderr,
1112                                                 "## non-docstring in %s (%s)\n",
1113                                                 buffer, filename);
1114 #endif
1115                                         continue;
1116                                 }
1117                         }
1118                 }
1119 #if 0                           /* causes crash */
1120                 else if (!strcmp(buffer, "if") ||
1121                          !strcmp(buffer, "byte-code")) ;
1122 #endif
1123
1124                 else {
1125 #ifdef DEBUG
1126                         fprintf(stderr,
1127                                 "## unrecognized top-level form, %s (%s)\n",
1128                                 buffer, filename);
1129 #endif
1130                         continue;
1131                 }
1132
1133                 /* At this point, we should either use the previous
1134                    dynamic doc string in saved_string
1135                    or gobble a doc string from the input file.
1136
1137                    In the latter case, the opening quote (and leading
1138                    backslash-newline) have already been read.  */
1139                 put_filename (filename); /* XEmacs addition */
1140                 putc(037, outfile);
1141                 putc(type, outfile);
1142                 fprintf(outfile, "%s\n", buffer);
1143                 if (saved_string) {
1144                         fputs(saved_string, outfile);
1145                         /* Don't use one dynamic doc string twice.  */
1146                         free(saved_string);
1147                         saved_string = 0;
1148                 } else
1149                         read_c_string(infile, 1, 0);
1150         }
1151         if (saved_string) {
1152                 /* If this is true then a dynamic doc string was
1153                    detected without a next expression. We should not
1154                    emit anything since the input was badly formed,
1155                    but lets free the string...
1156                 */
1157                 free(saved_string);
1158                 saved_string = 0;
1159         }
1160         fclose(infile);
1161         return 0;
1162 }