Get rid of harmless warning about ignoring result of getc, while ignoring ESC ISO2022...
[sxemacs] / lib-src / make-docfile.c
1 /* Generate doc-string file for SXEmacs from source files.
2    Copyright (C) 1985, 1986, 1992, 1993, 1994 Free Software Foundation, Inc.
3    Copyright (C) 1995 Board of Trustees, University of Illinois.
4    Copyright (C) 1998, 1999 J. Kean Johnston.
5    Copyright (C) 2004 Steve Youngs.
6
7 This file is part of SXEmacs.
8
9 SXEmacs is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
13
14 SXEmacs is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program.  If not, see <http://www.gnu.org/licenses/>. */
21
22 /* Synched up with: FSF 19.30. */
23
24 /* The arguments given to this program are all the C and Lisp source files
25  of SXEmacs.  .elc and .el and .c files are allowed.
26  A .o file can also be specified; the .c file it was made from is used.
27  This helps the makefile pass the correct list of files.
28
29  The results, which go to standard output or to a file
30  specified with -a or -o (-a to append, -o to start from nothing),
31  are entries containing function or variable names and their documentation.
32  Each entry starts with a ^_ character.
33  Then comes F for a function or V for a variable.
34  Then comes the function or variable name, terminated with a newline.
35  Then comes the documentation for that function or variable.
36
37  Added 19.15/20.1:  `-i site-packages' allow installer to dump extra packages
38  without modifying Makefiles, etc.
39  */
40
41 #define NO_SHORTNAMES           /* Tell config not to load remap.h */
42 #include <config.h>
43
44 #include <stdio.h>
45 #include <errno.h>
46 #if __STDC__ || defined(STDC_HEADERS)
47 #include <stdlib.h>
48 #ifdef HAVE_UNISTD_H
49 #include <unistd.h>
50 #endif
51 #include <string.h>
52 #include <ctype.h>
53 #endif
54
55 #include <sys/param.h>
56
57 /* How long can a source filename be in DOC (including "\037S" at the start
58     and "\n" at the end) ? */
59 #define DOC_MAX_FILENAME_LENGTH 2048
60 #define IS_DIRECTORY_SEP(arg) ('/' == arg)
61
62 /* Can't use the system assert on OS X, it can't find a definition for
63    __eprintf on linking */
64 #define assert(x) ((x) ? (void) 0 : (void) abort ())
65
66 #define READ_TEXT "r"
67 #define READ_BINARY "r"
68 #define WRITE_BINARY "w"
69 #define APPEND_BINARY "a"
70
71 /* Stdio stream for output to the DOC file.  */
72 static FILE *outfile;
73 static char *modname = NULL;
74
75 enum {
76         el_file,
77         elc_file,
78         c_file
79 } Current_file_type;
80
81 static void put_filename (const char *filename);
82 static int scan_file(const char *filename);
83 static int read_c_string(FILE *, int, int);
84 static void write_c_args(FILE * out, const char *func, char *buf, int minargs,
85                          int maxargs);
86 static int scan_c_file(const char *filename, const char *mode);
87 static void skip_white(FILE *);
88 static void read_lisp_symbol(FILE *, char *);
89 static int scan_lisp_file(const char *filename, const char *mode);
90
91 #define C_IDENTIFIER_CHAR_P(c) \
92  (('A' <= c && c <= 'Z') || \
93   ('a' <= c && c <= 'z') || \
94   ('0' <= c && c <= '9') || \
95   (c == '_'))
96
97 /* Name this program was invoked with.  */
98 char *progname;
99
100 /* Set to 1 if this was invoked by ellcc */
101 int ellcc = 0;
102
103 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
104
105 static void error(const char *s1, const char *s2)
106 {
107         fprintf(stderr, "%s: ", progname);
108         fprintf(stderr, s1, s2);
109         fprintf(stderr, "\n");
110 }
111
112 /* Print error message and exit.  */
113
114 static void fatal(const char *s1, const char *s2)
115 {
116         error(s1, s2);
117         exit(1);
118 }
119
120 /* Like malloc but get fatal error if memory is exhausted.  */
121
122 static long *xmalloc(unsigned int size)
123 {
124         long *result = (long *)malloc(size);
125         if (result == NULL)
126                 fatal("virtual memory exhausted", 0);
127         return result;
128 }
129
130 static char *next_extra_elc(char *extra_elcs)
131 {
132         static FILE *fp = NULL;
133         static char line_buf[BUFSIZ];
134         char *p = line_buf + 1;
135
136         if (!fp) {
137                 if (!extra_elcs) {
138                         return NULL;
139                 } else if (!(fp = fopen(extra_elcs, READ_BINARY))) {
140                         /* It is not an error if this file doesn't exist. */
141                         /*fatal("error opening site package file list", 0); */
142                         return NULL;
143                 }
144                 fgets(line_buf, BUFSIZ, fp);
145         }
146
147       again:
148         if (!fgets(line_buf, BUFSIZ, fp)) {
149                 fclose(fp);
150                 fp = NULL;
151                 return NULL;
152         }
153         line_buf[0] = '\0';
154         if (strlen(p) <= 2 || strlen(p) >= (BUFSIZ - 5)) {
155                 /* reject too short or too long lines */
156                 goto again;
157         }
158         p[strlen(p) - 2] = '\0';
159         strcat(p, ".elc");
160
161         return p;
162 }
163
164 static void
165 write_doc_header(void)
166 {
167         char *tmp, *modout = strdup(modname), *modoutC;
168         size_t modsz;
169
170         if ((tmp = strrchr(modout, '.')) != NULL) {
171                 *tmp = '\0';
172                 tmp = strrchr(modout, '.');
173                 if (tmp != NULL) {
174                         *tmp = '\0';
175                 }
176         }
177         /* the same for modoutC */
178         modoutC = strdup(modout);
179         modsz = strlen(modoutC);
180
181         for (size_t i = 0; i < modsz; i++) {
182                 /* for the C version we have to convert any non-char to _ */
183                 if (!isdigit(modoutC[i]) && !isalpha(modoutC[i])) {
184                         modoutC[i] = '_';
185                 }
186         }
187
188         fprintf(outfile, "/* DO NOT EDIT - AUTOMATICALLY GENERATED */\n\n");
189         fprintf(outfile, "#include <emodules-ng.h>\n\n");
190
191 #if 0
192         fprintf(outfile, "const char *emodule_name = \"%s\";\n", modoutC);
193         fprintf(outfile, "const char *emodule_title = "
194                 "\"SXEmacs module: %s\";\n", modout);
195 #endif
196         /* declare and start the LTX_docs() block */
197         fprintf(outfile, "\n\nextern void %s_LTX_docs(void);\n", modoutC);
198         fprintf(outfile, "\nvoid\n%s_LTX_docs(void)\n", modoutC);
199 }
200
201 \f
202 int main(int argc, char **argv)
203 {
204         int i;
205         int err_count = 0;
206         char *extra_elcs = NULL;
207
208         progname = argv[0];
209
210         outfile = stdout;
211
212         /* If first two args are -o FILE, output to FILE.  */
213         for (i = 1; i < argc - 1;) {
214                 if (!strcmp(argv[i], "-o")) {
215                         outfile = fopen(argv[++i], WRITE_BINARY);
216                 }
217                 if (!strcmp(argv[i], "-a")) {
218                         outfile = fopen(argv[++i], APPEND_BINARY);
219                 }
220                 if (!strcmp(argv[i], "-E")) {
221                         if (modname == NULL) {
222                                 modname = strdup(argv[i+1]);
223                         }
224                         ellcc = 1;
225                         /*outfile = fopen(argv[++i], APPEND_BINARY);*/
226                         outfile = fopen(argv[++i], WRITE_BINARY);
227                 }
228                 if (!strcmp(argv[i], "-d")) {
229                         chdir(argv[++i]);
230                 }
231
232                 if (!strcmp(argv[i], "-i")) {
233                         extra_elcs = argv[++i];
234                 }
235
236                 if (!strcmp(argv[i], "--modname") || !strcmp(argv[i], "-m")) {
237                         modname = strdup(argv[++i]);
238                 }
239                 i++;
240         }
241         if (outfile == 0)
242                 fatal("No output file specified", "");
243
244         if (ellcc) {
245                 write_doc_header();
246                 fprintf(outfile, "{\n");
247         }
248
249         for (i = 1; i < argc; i++) {
250                 int j;
251
252                 if (argc > i + 1 && !strcmp (argv[i], "-d")) {
253                         /* XEmacs change; allow more than one chdir. The
254                            idea is that the second chdir is to source-lisp,
255                            and that any Lisp files not under there have the
256                            full path specified.  */
257                         i += 1;
258                         chdir (argv[i]);
259                         continue;
260                 } else if (argv[i][0] == '-') {
261                         i++;
262                         continue;
263                 }
264                 /* Don't process one file twice.  */
265                 for (j = 1; j < i; j++)
266                         if (!strcmp(argv[i], argv[j]))
267                                 break;
268                 if (j == i) {
269                         /* err_count seems to be {mis,un}used */
270                         err_count += scan_file(argv[i]);
271                 }
272         }
273
274         if (extra_elcs) {
275                 char *p;
276
277                 while ((p = next_extra_elc(extra_elcs)) != NULL) {
278                         err_count += scan_file(p);
279                 }
280         }
281
282         putc('\n', outfile);
283         if (ellcc)
284                 fprintf(outfile, "}\n\n");
285 #ifndef VMS
286         exit(err_count > 0);
287 #endif                          /* VMS */
288         return err_count > 0;
289 }
290
291 /* Add a source file name boundary in the output file.  */
292 static void put_filename (const char *filename)
293 {
294         /* XEmacs change; don't strip directory information. */
295 #if 0
296         const char *tmp;
297
298         /* Why are we cutting this off? */
299         for (tmp = filename; *tmp; tmp++) {
300                 if (IS_DIRECTORY_SEP(*tmp))
301                         filename = tmp + 1;
302         }
303 #endif
304         /* <= because sizeof includes the nul byte at the end. Not quite
305            right, because it should include the length of the symbol +
306            "\037[VF]" instead of simply 10. */
307         assert(sizeof("\037S\n") + strlen(filename) + 10 
308                <= DOC_MAX_FILENAME_LENGTH);
309
310         putc (037, outfile);
311         putc ('S', outfile);
312         fprintf (outfile, "%s\n", filename);
313 }
314
315 /* Read file FILENAME and output its doc strings to outfile.  */
316 /* Return 1 if file is not found, 0 if it is found.  */
317
318 static int scan_file(const char *filename)
319 {
320         int len = strlen(filename);
321
322         if (ellcc == 0 && len > 4 && !strcmp(filename + len - 4, ".elc")) {
323                 Current_file_type = elc_file;
324                 return scan_lisp_file(filename, READ_BINARY);
325         } else if (ellcc == 0 && len > 3 && !strcmp(filename + len - 3, ".el")) {
326                 Current_file_type = el_file;
327                 return scan_lisp_file(filename, READ_TEXT);
328         } else {
329                 Current_file_type = c_file;
330                 return scan_c_file(filename, READ_TEXT);
331         }
332 }
333 \f
334 char buf[128];
335
336 /* Skip a C string from INFILE,
337  and return the character that follows the closing ".
338  If printflag is positive, output string contents to outfile.
339  If it is negative, store contents in buf.
340  Convert escape sequences \n and \t to newline and tab;
341  discard \ followed by newline.  */
342
343 #define MDGET do { prevc = c; c = getc (infile); } while (0)
344 static int read_c_string(FILE * infile, int printflag, int c_docstring)
345 {
346         register int prevc = 0, c = 0;
347         char *p = buf;
348         int start = -1;
349
350         MDGET;
351         while (c != EOF) {
352                 while ((c_docstring || c != '"') && c != EOF) {
353                         if (c == '*') {
354                                 int cc = getc(infile);
355                                 if (cc == '/') {
356                                         if (prevc != '\n') {
357                                                 if (printflag > 0) {
358                                                         if (ellcc)
359                                                                 fprintf(outfile,
360                                                                         "\\n\\");
361                                                         putc('\n', outfile);
362                                                 } else if (printflag < 0)
363                                                         *p++ = '\n';
364                                         }
365                                         break;
366                                 } else
367                                         ungetc(cc, infile);
368                         }
369
370                         if (start == 1) {
371                                 if (printflag > 0) {
372                                         if (ellcc)
373                                                 fprintf(outfile, "\\n\\");
374                                         putc('\n', outfile);
375                                 } else if (printflag < 0)
376                                         *p++ = '\n';
377                         }
378
379                         if (c == '\\') {
380                                 MDGET;
381                                 if (c == '\n') {
382                                         MDGET;
383                                         start = 1;
384                                         continue;
385                                 }
386                                 if (!c_docstring && c == 'n')
387                                         c = '\n';
388                                 if (c == 't')
389                                         c = '\t';
390                         }
391                         if (c == '\n')
392                                 start = 1;
393                         else {
394                                 start = 0;
395                                 if (printflag > 0) {
396                                         if (ellcc && c == '"')
397                                                 putc('\\', outfile);
398                                         putc(c, outfile);
399                                 } else if (printflag < 0)
400                                         *p++ = c;
401                         }
402                         MDGET;
403                 }
404                 /* look for continuation of string */
405                 if (Current_file_type == c_file) {
406                         do {
407                                 MDGET;
408                         }
409                         while (isspace(c));
410                         if (c != '"')
411                                 break;
412                 } else {
413                         MDGET;
414                         if (c != '"')
415                                 break;
416                         /* If we had a "", concatenate the two strings.  */
417                 }
418                 MDGET;
419         }
420
421         if (printflag < 0)
422                 *p = 0;
423
424         return c;
425 }
426 \f
427 /* Write to file OUT the argument names of function FUNC, whose text is in BUF.
428    MINARGS and MAXARGS are the minimum and maximum number of arguments.  */
429
430 static void
431 write_c_args(FILE * out, const char *func, char *buff, int minargs, int maxargs)
432 {
433         register char *p;
434         int in_ident = 0;
435         int just_spaced = 0;
436 #if 0
437         int need_space = 1;
438
439         fprintf(out, "(%s", func);
440 #else
441         /* XEmacs - "arguments:" is for parsing the docstring.  FSF's help system
442            doesn't parse the docstring for arguments like we do, so we're also
443            going to omit the function name to preserve compatibility with elisp
444            that parses the docstring.  Finally, not prefixing the arglist with
445            anything is asking for trouble because it's not uncommon to have an
446            unescaped parenthesis at the beginning of a line. --Stig */
447         fprintf(out, "arguments: (");
448 #endif
449
450         if (*buff == '(')
451                 ++buff;
452
453         for (p = buff; *p; p++) {
454                 char c = *p;
455                 int ident_start = 0;
456
457                 /* Add support for ANSI prototypes. Hop over
458                    "Lisp_Object" string (the only C type allowed in DEFUNs) */
459                 static char lo[] = "Lisp_Object";
460                 if ((C_IDENTIFIER_CHAR_P(c) != in_ident) && !in_ident &&
461                     (strncmp(p, lo, sizeof(lo) - 1) == 0) &&
462                     isspace((unsigned char)(*(p + sizeof(lo) - 1)))) {
463                         p += (sizeof(lo) - 1);
464                         while (isspace((unsigned char)(*p)))
465                                 p++;
466                         c = *p;
467                 }
468
469                 /* Notice when we start printing a new identifier.  */
470                 if (C_IDENTIFIER_CHAR_P(c) != in_ident) {
471                         if (!in_ident) {
472                                 in_ident = 1;
473                                 ident_start = 1;
474 #if 0
475                                 /* XEmacs - This goes along with the change above. */
476                                 if (need_space)
477                                         putc(' ', out);
478 #endif
479                                 if (minargs == 0 && maxargs > 0)
480                                         fprintf(out, "&optional ");
481                                 just_spaced = 1;
482
483                                 minargs--;
484                                 maxargs--;
485                         } else
486                                 in_ident = 0;
487                 }
488
489                 /* Print the C argument list as it would appear in lisp:
490                    print underscores as hyphens, and print commas as spaces.
491                    Collapse adjacent spaces into one. */
492                 if (c == '_')
493                         c = '-';
494                 if (c == ',')
495                         c = ' ';
496
497                 /* If the C argument name ends with `_', change it to ' ',
498                    to allow use of C reserved words or global symbols as Lisp args. */
499                 if (c == '-' && !C_IDENTIFIER_CHAR_P(p[1])) {
500                         in_ident = 0;
501                         just_spaced = 0;
502                 }
503                 /* If the character is carriage return, escape it for the C compiler. */
504                 else if (c == '\n') {
505                         putc('\\', out);
506                         putc('\n', out);
507                 } else if (c != ' ' || !just_spaced) {
508                         if (c >= 'a' && c <= 'z')
509                                 /* Upcase the letter.  */
510                                 c += 'A' - 'a';
511                         putc(c, out);
512                 }
513
514                 just_spaced = (c == ' ');
515 #if 0
516                 need_space = 0;
517 #endif
518         }
519         if (!ellcc)
520                 putc('\n', out);        /* XEmacs addition */
521 }
522 \f
523 /* Read through a c file.  If a .o file is named,
524    the corresponding .c file is read instead.
525    Looks for DEFUN constructs such as are defined in ../src/lisp.h.
526    Accepts any word starting DEF... so it finds DEFSIMPLE and DEFPRED.  */
527
528 static int scan_c_file(const char *filename, const char *mode)
529 {
530         FILE *infile;
531         register int c;
532         register int commas;
533         register int defunflag;
534         register int defvarperbufferflag = 0;
535         register int defvarflag;
536         int minargs, maxargs;
537         size_t l = strlen(filename);
538         char f[MAXPATHLEN];
539
540         if (l > sizeof(f)) {
541 #ifdef ENAMETOOLONG
542                 errno = ENAMETOOLONG;
543 #else
544                 errno = EINVAL;
545 #endif
546                 return (0);
547         }
548
549         strcpy(f, filename);
550         if (f[l - 1] == 'o')
551                 f[l - 1] = 'c';
552         infile = fopen(f, mode);
553
554         /* No error if non-ex input file */
555         if (infile == NULL) {
556                 perror(f);
557                 return 0;
558         }
559
560         c = '\n';
561         while (!feof(infile)) {
562                 if (c != '\n') {
563                         c = getc(infile);
564                         continue;
565                 }
566                 c = getc(infile);
567                 /*
568                  * SXEmacs uses proper indentation so we need to
569                  * search for `\t' instead of ' ' here. 
570                  */
571                 if (c == '\t') {
572                         while (c == '\t')
573                                 c = getc(infile);
574                         if (c != 'D')
575                                 continue;
576                         c = getc(infile);
577                         if (c != 'E')
578                                 continue;
579                         c = getc(infile);
580                         if (c != 'F')
581                                 continue;
582                         c = getc(infile);
583                         if (c != 'V')
584                                 continue;
585                         c = getc(infile);
586                         if (c != 'A')
587                                 continue;
588                         c = getc(infile);
589                         if (c != 'R')
590                                 continue;
591                         c = getc(infile);
592                         if (c != '_')
593                                 continue;
594
595                         defvarflag = 1;
596                         defunflag = 0;
597
598                         c = getc(infile);
599                         /* Note that this business doesn't apply under XEmacs.
600                            DEFVAR_BUFFER_LOCAL in XEmacs behaves normally. */
601                         defvarperbufferflag = (c == 'P');
602
603                         c = getc(infile);
604                 } else if (c == 'D') {
605                         c = getc(infile);
606                         if (c != 'E')
607                                 continue;
608                         c = getc(infile);
609                         if (c != 'F')
610                                 continue;
611                         c = getc(infile);
612                         defunflag = (c == 'U');
613                         defvarflag = 0;
614                         c = getc(infile);
615                 } else
616                         continue;
617
618                 while (c != '(') {
619                         if (c < 0)
620                                 goto eof;
621                         c = getc(infile);
622                 }
623
624                 c = getc(infile);
625                 if (c != '"')
626                         continue;
627                 c = read_c_string(infile, -1, 0);
628
629                 if (defunflag)
630                         commas = 4;
631                 else if (defvarperbufferflag)
632                         commas = 2;
633                 else if (defvarflag)
634                         commas = 1;
635                 else            /* For DEFSIMPLE and DEFPRED */
636                         commas = 2;
637
638                 while (commas) {
639                         if (c == ',') {
640                                 commas--;
641                                 if (defunflag && (commas == 1 || commas == 2)) {
642                                         do
643                                                 c = getc(infile);
644                                         while (c == ' ' || c == '\n'
645                                                || c == '\t');
646                                         if (c < 0)
647                                                 goto eof;
648                                         ungetc(c, infile);
649                                         if (commas == 2)        /* pick up minargs */
650                                                 fscanf(infile, "%d", &minargs);
651                                         else /* pick up maxargs */ if (c == 'M' || c == 'U')    /* MANY || UNEVALLED */
652                                                 maxargs = -1;
653                                         else
654                                                 fscanf(infile, "%d", &maxargs);
655                                 }
656                         }
657                         if (c < 0)
658                                 goto eof;
659                         c = getc(infile);
660                 }
661                 while (c == ' ' || c == '\n' || c == '\t')
662                         c = getc(infile);
663                 if (c == '"')
664                         c = read_c_string(infile, 0, 0);
665                 if (defunflag | defvarflag) {
666                         while (c != '/')
667                                 c = getc(infile);
668                         c = getc(infile);
669                         while (c == '*')
670                                 c = getc(infile);
671                 } else {
672                         while (c != ',')
673                                 c = getc(infile);
674                         c = getc(infile);
675                 }
676                 while (c == ' ' || c == '\n' || c == '\t')
677                         c = getc(infile);
678                 if (defunflag | defvarflag)
679                         ungetc(c, infile);
680
681                 if (defunflag || defvarflag || c == '"') {
682                         if (ellcc) {
683                                 fprintf(outfile, "\tCDOC%s(\"%s\", \"\\\n",
684                                         defvarflag ? "SYM" : "SUBR", buf);
685                         } else {
686                                 put_filename (filename); /* XEmacs addition */
687                                 putc(037, outfile);
688                                 putc(defvarflag ? 'V' : 'F', outfile);
689                                 fprintf(outfile, "%s\n", buf);
690                         }
691                         c = read_c_string(infile, 1, (defunflag || defvarflag));
692
693                         /* If this is a defun, find the arguments and print
694                            them.  If this function takes MANY or UNEVALLED args,
695                            then the C source won't give the names of the
696                            arguments, so we shouldn't bother trying to find
697                            them.  */
698                         if (defunflag && maxargs != -1) {
699                                 char argbuf[1024], *p = argbuf;
700 #if 0                           /* For old DEFUN's only */
701                                 while (c != ')') {
702                                         if (c < 0)
703                                                 goto eof;
704                                         c = getc(infile);
705                                 }
706 #endif
707                                 /* Skip into arguments.  */
708                                 while (c != '(') {
709                                         if (c < 0)
710                                                 goto eof;
711                                         c = getc(infile);
712                                 }
713                                 /* Copy arguments into ARGBUF.  */
714                                 *p++ = c;
715                                 do
716                                         *p++ = c = getc(infile);
717                                 while (c != ')');
718                                 *p = '\0';
719                                 /* Output them.  */
720                                 if (ellcc)
721                                         fprintf(outfile, "\\n\\\n\\n\\\n");
722                                 else
723                                         fprintf(outfile, "\n\n");
724                                 write_c_args(outfile, buf, argbuf, minargs,
725                                              maxargs);
726                         }
727                         if (ellcc)
728                                 fprintf(outfile, "\\n\");\n\n");
729                 }
730         }
731       eof:
732         fclose(infile);
733         return 0;
734 }
735 \f
736 /* Read a file of Lisp code, compiled or interpreted.
737  Looks for
738   (defun NAME ARGS DOCSTRING ...)
739   (defmacro NAME ARGS DOCSTRING ...)
740   (autoload (quote NAME) FILE DOCSTRING ...)
741   (defvar NAME VALUE DOCSTRING)
742   (defconst NAME VALUE DOCSTRING)
743   (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
744   (fset (quote NAME) #[... DOCSTRING ...])
745   (defalias (quote NAME) #[... DOCSTRING ...])
746  starting in column zero.
747  (quote NAME) may appear as 'NAME as well.
748
749  We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
750  When we find that, we save it for the following defining-form,
751  and we use that instead of reading a doc string within that defining-form.
752
753  For defun, defmacro, and autoload, we know how to skip over the arglist.
754  For defvar, defconst, and fset we skip to the docstring with a kludgy
755  formatting convention: all docstrings must appear on the same line as the
756  initial open-paren (the one in column zero) and must contain a backslash
757  and a double-quote immediately after the initial double-quote.  No newlines
758  must appear between the beginning of the form and the first double-quote.
759  The only source file that must follow this convention is loaddefs.el; aside
760  from that, it is always the .elc file that we look at, and they are no
761  problem because byte-compiler output follows this convention.
762  The NAME and DOCSTRING are output.
763  NAME is preceded by `F' for a function or `V' for a variable.
764  An entry is output only if DOCSTRING has \ newline just after the opening "
765
766  Adds the filename a symbol or function was found in before its docstring;
767  there's no need for this with the load-history available, but we do it for
768  consistency with the C parsing code. 
769  */
770
771 static void skip_white(FILE * infile)
772 {
773         char c = ' ';
774         while (c == ' ' || c == '\t' || c == '\n')
775                 c = getc(infile);
776         ungetc(c, infile);
777 }
778
779 static void read_lisp_symbol(FILE * infile, char *buffer)
780 {
781         char c;
782         char *fillp = buffer;
783
784         skip_white(infile);
785         while (1) {
786                 c = getc(infile);
787                 if (c == '\\')
788                         /* FSF has *(++fillp), which is wrong. */
789                         *fillp++ = getc(infile);
790                 else if (c == ' ' || c == '\t' || c == '\n' || c == '('
791                          || c == ')') {
792                         ungetc(c, infile);
793                         *fillp = 0;
794                         break;
795                 } else
796                         *fillp++ = c;
797         }
798
799         if (!buffer[0])
800                 fprintf(stderr, "## expected a symbol, got '%c'\n", c);
801
802         skip_white(infile);
803 }
804
805 static int scan_lisp_file(const char *filename, const char *mode)
806 {
807         FILE *infile;
808         register int c;
809         char *saved_string = 0;
810
811         infile = fopen(filename, mode);
812         if (infile == NULL) {
813                 perror(filename);
814                 return 0;       /* No error */
815         }
816
817         c = '\n';
818         while (!feof(infile)) {
819                 char buffer[BUFSIZ];
820                 char type;
821
822                 if (c != '\n') {
823                         c = getc(infile);
824                         continue;
825                 }
826                 c = getc(infile);
827                 /* Detect a dynamic doc string and save it for the next expression.  */
828                 if (c == '#') {
829                         c = getc(infile);
830                         if (c == '@') {
831                                 int length = 0;
832                                 int i;
833
834                                 /* Read the length.  */
835                                 while ((c = getc(infile), c >= '0' && c <= '9')) {
836                                         length *= 10;
837                                         length += c - '0';
838                                 }
839
840                                 /* The next character is a space that is counted in the length
841                                    but not part of the doc string.
842                                    We already read it, so just ignore it.  */
843                                 length--;
844
845                                 /* Read in the contents.  */
846                                 if (saved_string != 0)
847                                         free(saved_string);
848                                 saved_string = (char *)xmalloc(length);
849                                 for (i = 0; i < length; i++)
850                                         saved_string[i] = getc(infile);
851                                 /* The last character is a ^_.
852                                    That is needed in the .elc file
853                                    but it is redundant in DOC.  So get rid of it here.  */
854                                 saved_string[length - 1] = 0;
855                                 /* Skip the newline.  */
856                                 c = getc(infile);
857                                 while (c != '\n')
858                                         c = getc(infile);
859                         }
860                         continue;
861                 }
862
863                 if (c != '(')
864                         continue;
865
866                 read_lisp_symbol(infile, buffer);
867
868                 if (!strcmp(buffer, "defun") || !strcmp(buffer, "defmacro")) {
869                         type = 'F';
870                         read_lisp_symbol(infile, buffer);
871
872                         /* Skip the arguments: either "nil" or a list in parens */
873
874                         c = getc(infile);
875                         if (c == 'n') { /* nil */
876                                 if ((c = getc(infile)) != 'i' ||
877                                     (c = getc(infile)) != 'l') {
878                                         fprintf(stderr,
879                                                 "## unparsable arglist in %s (%s)\n",
880                                                 buffer, filename);
881                                         continue;
882                                 }
883                         } else if (c != '(') {
884                                 fprintf(stderr,
885                                         "## unparsable arglist in %s (%s)\n",
886                                         buffer, filename);
887                                 continue;
888                         } else
889                                 while (c != ')')
890                                         c = getc(infile);
891                         skip_white(infile);
892
893                         /* If the next three characters aren't `dquote bslash newline'
894                            then we're not reading a docstring.
895                          */
896                         if ((c = getc(infile)) != '"' ||
897                             (c = getc(infile)) != '\\' ||
898                             (c = getc(infile)) != '\n') {
899 #ifdef DEBUG
900                                 fprintf(stderr, "## non-docstring in %s (%s)\n",
901                                         buffer, filename);
902 #endif
903                                 continue;
904                         }
905                 }
906
907                 else if (!strcmp(buffer, "defvar") ||
908                          !strcmp(buffer, "defconst")) {
909                         char c1 = 0, c2 = 0;
910                         type = 'V';
911                         read_lisp_symbol(infile, buffer);
912
913                         if (saved_string == 0) {
914
915                                 /* Skip until the first newline; remember the two previous chars. */
916                                 while (c != '\n' && c >= 0) {
917                                         /* #### Kludge -- Ignore any ESC x x ISO2022 sequences */
918                                         if (c == 27) {
919                                                 (void)getc(infile);
920                                                 (void)getc(infile);
921                                                 goto nextchar;
922                                         }
923
924                                         c2 = c1;
925                                         c1 = c;
926                                       nextchar:
927                                         c = getc(infile);
928                                 }
929
930                                 /* If two previous characters were " and \,
931                                    this is a doc string.  Otherwise, there is none.  */
932                                 if (c2 != '"' || c1 != '\\') {
933 #ifdef DEBUG
934                                         fprintf(stderr,
935                                                 "## non-docstring in %s (%s)\n",
936                                                 buffer, filename);
937 #endif
938                                         continue;
939                                 }
940                         }
941                 }
942
943                 else if (! strcmp (buffer, "custom-declare-variable"))
944                 {
945                         char c1 = 0, c2 = 0;
946                         type = 'V';
947
948                         c = getc (infile);
949                         if (c == '\'')
950                                 read_lisp_symbol (infile, buffer);
951                         else {
952                                 if (c != '(') {
953                                         fprintf (stderr,
954                                                  "## unparsable name in "
955                                                  "custom-declare-variable "
956                                                  "in %s\n",
957                                                  filename);
958                                         continue;
959                                 }
960                                 read_lisp_symbol (infile, buffer);
961                                 if (strcmp (buffer, "quote")) {
962                                         fprintf (stderr,
963                                                  "## unparsable name in "
964                                                  "custom-declare-variable "
965                                                  "in %s\n",
966                                                  filename);
967                                         continue;
968                                 }
969                                 read_lisp_symbol (infile, buffer);
970                                 c = getc (infile);
971                                 if (c != ')') {
972                                         fprintf (stderr,
973                                                  "## unparsable quoted name "
974                                                  "in custom-declare-variable"
975                                                  " in %s\n", filename);
976                                         continue;
977                                 }
978                         }
979
980                         if (saved_string == 0)
981                         {
982                                 /* Skip to end of line; remember the two
983                                    previous chars.  */
984                                 while (c != '\n' && c >= 0) {
985                                         c2 = c1;
986                                         c1 = c;
987                                         /* SXEmacs: shame we can't do this. */
988                                         /* c = getc_skipping_iso2022 (infile); */
989                                         getc (infile);
990                                 }
991           
992                                 /* If two previous characters were " and \,
993                                    this is a doc string.  Otherwise, there is
994                                    none.  */
995                                 if (c2 != '"' || c1 != '\\') {
996 #ifdef DEBUG
997                                         fprintf (stderr, "## non-docstring in"
998                                                  " %s (%s)\n",
999                                                  buffer, filename);
1000 #endif
1001                                         continue;
1002                                 }
1003                         }
1004                 }
1005
1006                 else if (!strcmp(buffer, "fset") || !strcmp(buffer, "defalias")) {
1007                         char c1 = 0, c2 = 0;
1008                         type = 'F';
1009
1010                         c = getc(infile);
1011                         if (c == '\'')
1012                                 read_lisp_symbol(infile, buffer);
1013                         else {
1014                                 if (c != '(') {
1015                                         fprintf(stderr,
1016                                                 "## unparsable name in fset in %s\n",
1017                                                 filename);
1018                                         continue;
1019                                 }
1020                                 read_lisp_symbol(infile, buffer);
1021                                 if (strcmp(buffer, "quote")) {
1022                                         fprintf(stderr,
1023                                                 "## unparsable name in fset in %s\n",
1024                                                 filename);
1025                                         continue;
1026                                 }
1027                                 read_lisp_symbol(infile, buffer);
1028                                 c = getc(infile);
1029                                 if (c != ')') {
1030                                         fprintf(stderr,
1031                                                 "## unparsable quoted name in fset in %s\n",
1032                                                 filename);
1033                                         continue;
1034                                 }
1035                         }
1036
1037                         if (saved_string == 0) {
1038                                 /* Skip until the first newline; remember the two previous chars. */
1039                                 while (c != '\n' && c >= 0) {
1040                                         c2 = c1;
1041                                         c1 = c;
1042                                         c = getc(infile);
1043                                 }
1044
1045                                 /* If two previous characters were " and \,
1046                                    this is a doc string.  Otherwise, there is none.  */
1047                                 if (c2 != '"' || c1 != '\\') {
1048 #ifdef DEBUG
1049                                         fprintf(stderr,
1050                                                 "## non-docstring in %s (%s)\n",
1051                                                 buffer, filename);
1052 #endif
1053                                         continue;
1054                                 }
1055                         }
1056                 }
1057
1058                 else if (!strcmp(buffer, "autoload")) {
1059                         type = 'F';
1060                         c = getc(infile);
1061                         if (c == '\'')
1062                                 read_lisp_symbol(infile, buffer);
1063                         else {
1064                                 if (c != '(') {
1065                                         fprintf(stderr,
1066                                                 "## unparsable name in autoload in %s\n",
1067                                                 filename);
1068                                         continue;
1069                                 }
1070                                 read_lisp_symbol(infile, buffer);
1071                                 if (strcmp(buffer, "quote")) {
1072                                         fprintf(stderr,
1073                                                 "## unparsable name in autoload in %s\n",
1074                                                 filename);
1075                                         continue;
1076                                 }
1077                                 read_lisp_symbol(infile, buffer);
1078                                 c = getc(infile);
1079                                 if (c != ')') {
1080                                         fprintf(stderr,
1081                                                 "## unparsable quoted name in autoload in %s\n",
1082                                                 filename);
1083                                         continue;
1084                                 }
1085                         }
1086                         skip_white(infile);
1087                         if ((c = getc(infile)) != '\"') {
1088                                 fprintf(stderr,
1089                                         "## autoload of %s unparsable (%s)\n",
1090                                         buffer, filename);
1091                                 continue;
1092                         }
1093                         read_c_string(infile, 0, 0);
1094                         skip_white(infile);
1095
1096                         if (saved_string == 0) {
1097                                 /* If the next three characters aren't `dquote bslash newline'
1098                                    then we're not reading a docstring.  */
1099                                 if ((c = getc(infile)) != '"' ||
1100                                     (c = getc(infile)) != '\\' ||
1101                                     (c = getc(infile)) != '\n') {
1102 #ifdef DEBUG
1103                                         fprintf(stderr,
1104                                                 "## non-docstring in %s (%s)\n",
1105                                                 buffer, filename);
1106 #endif
1107                                         continue;
1108                                 }
1109                         }
1110                 }
1111 #if 0                           /* causes crash */
1112                 else if (!strcmp(buffer, "if") ||
1113                          !strcmp(buffer, "byte-code")) ;
1114 #endif
1115
1116                 else {
1117 #ifdef DEBUG
1118                         fprintf(stderr,
1119                                 "## unrecognized top-level form, %s (%s)\n",
1120                                 buffer, filename);
1121 #endif
1122                         continue;
1123                 }
1124
1125                 /* At this point, we should either use the previous
1126                    dynamic doc string in saved_string
1127                    or gobble a doc string from the input file.
1128
1129                    In the latter case, the opening quote (and leading
1130                    backslash-newline) have already been read.  */
1131                 put_filename (filename); /* XEmacs addition */
1132                 putc(037, outfile);
1133                 putc(type, outfile);
1134                 fprintf(outfile, "%s\n", buffer);
1135                 if (saved_string) {
1136                         fputs(saved_string, outfile);
1137                         /* Don't use one dynamic doc string twice.  */
1138                         free(saved_string);
1139                         saved_string = 0;
1140                 } else
1141                         read_c_string(infile, 1, 0);
1142         }
1143         fclose(infile);
1144         return 0;
1145 }