1 /* Generate doc-string file for SXEmacs from source files.
2 Copyright (C) 1985, 1986, 1992, 1993, 1994 Free Software Foundation, Inc.
3 Copyright (C) 1995 Board of Trustees, University of Illinois.
4 Copyright (C) 1998, 1999 J. Kean Johnston.
5 Copyright (C) 2004 Steve Youngs.
7 This file is part of SXEmacs.
9 SXEmacs is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
14 SXEmacs is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 /* Synched up with: FSF 19.30. */
24 /* The arguments given to this program are all the C and Lisp source files
25 of SXEmacs. .elc and .el and .c files are allowed.
26 A .o file can also be specified; the .c file it was made from is used.
27 This helps the makefile pass the correct list of files.
29 The results, which go to standard output or to a file
30 specified with -a or -o (-a to append, -o to start from nothing),
31 are entries containing function or variable names and their documentation.
32 Each entry starts with a ^_ character.
33 Then comes F for a function or V for a variable.
34 Then comes the function or variable name, terminated with a newline.
35 Then comes the documentation for that function or variable.
37 Added 19.15/20.1: `-i site-packages' allow installer to dump extra packages
38 without modifying Makefiles, etc.
40 Big cleanup 2012-01-08 Sebastian Freundt
43 #define NO_SHORTNAMES /* Tell config not to load remap.h */
48 #if __STDC__ || defined(STDC_HEADERS)
57 #include <sys/param.h>
59 /* How long can a source filename be in DOC (including "\037S" at the start
60 and "\n" at the end) ? */
61 #define DOC_MAX_FILENAME_LENGTH 2048
62 #define IS_DIRECTORY_SEP(arg) ('/' == arg)
64 /* Can't use the system assert on OS X, it can't find a definition for
65 __eprintf on linking */
66 #define assert(x) ((x) ? (void) 0 : (void) abort ())
69 #define READ_BINARY "r"
70 #define WRITE_BINARY "w"
71 #define APPEND_BINARY "a"
73 /* Stdio stream for output to the DOC file. */
75 static char *modname = NULL;
83 static void put_filename (const char *filename);
84 static int scan_file(const char *filename);
85 static int read_c_string(FILE *, int, int);
87 write_c_args(FILE * out, const char *func, char *buf, int minargs, int maxargs);
88 static int scan_c_file(const char *filename, const char *mode);
89 static void skip_white(FILE *);
90 static void read_lisp_symbol(FILE *, char *);
91 static int scan_lisp_file(const char *filename, const char *mode);
93 #define C_IDENTIFIER_CHAR_P(c) \
94 (('A' <= c && c <= 'Z') || \
95 ('a' <= c && c <= 'z') || \
96 ('0' <= c && c <= '9') || \
99 /* Name this program was invoked with. */
100 static char *progname;
102 /* Set to 1 if this was invoked by ellcc */
103 static int ellcc = 0;
106 * Print error message. `s1' is printf control string, `s2' is arg for it. */
108 error(const char *s1, const char *s2)
110 fprintf(stderr, "%s: ", progname);
111 fprintf(stderr, s1, s2);
112 fprintf(stderr, "\n");
117 * Print error message and exit. */
119 __attribute__((noreturn))
120 fatal(const char *s1, const char *s2)
127 * Like malloc but get fatal error if memory is exhausted. */
129 xmalloc(unsigned int size)
131 void *result = malloc(size);
132 if (result == NULL) {
133 fatal("virtual memory exhausted", 0);
139 next_extra_elc(char *extra_elcs)
141 static FILE *fp = NULL;
142 static char line_buf[BUFSIZ];
143 char *p = line_buf + 1;
148 } else if (!(fp = fopen(extra_elcs, READ_BINARY))) {
149 /* It is not an error if this file doesn't exist. */
152 if(!fgets(line_buf, BUFSIZ, fp)) {
160 if (!fgets(line_buf, BUFSIZ, fp)) {
166 if (strlen(p) <= 2 || strlen(p) >= (BUFSIZ - 5)) {
167 /* reject too short or too long lines */
170 p[strlen(p) - 2] = '\0';
177 write_doc_header(void)
179 char *tmp, *modout = strdup(modname), *modoutC;
182 if ((tmp = strrchr(modout, '.')) != NULL) {
184 tmp = strrchr(modout, '.');
189 /* the same for modoutC */
190 modoutC = strdup(modout);
191 modsz = strlen(modoutC);
193 for (size_t i = 0; i < modsz; i++) {
194 /* for the C version we have to convert any non-char to _ */
195 if (!isdigit(modoutC[i]) && !isalpha(modoutC[i])) {
200 fprintf(outfile, "/* DO NOT EDIT - AUTOMATICALLY GENERATED */\n\n");
201 fprintf(outfile, "#include <emodules-ng.h>\n\n");
203 /* declare and start the LTX_docs() block */
204 fprintf(outfile, "\n\nextern void %s_LTX_docs(void);\n", modoutC);
205 fprintf(outfile, "\nvoid\n%s_LTX_docs(void)\n", modoutC);
211 main(int argc, char **argv)
215 char *extra_elcs = NULL;
221 /* If first two args are -o FILE, output to FILE. */
222 for (i = 1; i < argc - 1;) {
223 if (!strcmp(argv[i], "-o")) {
224 outfile = fopen(argv[++i], WRITE_BINARY);
226 if (!strcmp(argv[i], "-a")) {
227 outfile = fopen(argv[++i], APPEND_BINARY);
229 if (!strcmp(argv[i], "-E")) {
230 if (modname == NULL) {
231 modname = strdup(argv[i+1]);
234 outfile = fopen(argv[++i], WRITE_BINARY);
236 if (!strcmp(argv[i], "-d")) {
237 if (chdir(argv[++i]) < 0) {
238 fatal("Could not change to directory ",argv[i]);
242 if (!strcmp(argv[i], "-i")) {
243 extra_elcs = argv[++i];
246 if (!strcmp(argv[i], "--modname") || !strcmp(argv[i], "-m")) {
247 modname = strdup(argv[++i]);
252 fatal("No output file specified", "");
256 fprintf(outfile, "{\n");
259 for (i = 1; i < argc; i++) {
262 if (argc > i + 1 && !strcmp (argv[i], "-d")) {
263 /* XEmacs change; allow more than one chdir. The
264 idea is that the second chdir is to source-lisp,
265 and that any Lisp files not under there have the
266 full path specified. */
268 if (chdir (argv[i]) < 0) {
269 fatal("Could not change to directory ", argv[i]);
272 } else if (argv[i][0] == '-') {
276 /* Don't process one file twice. */
277 for (j = 1; j < i; j++) {
278 if (!strcmp(argv[i], argv[j])) {
283 /* err_count seems to be {mis,un}used */
284 err_count += scan_file(argv[i]);
291 while ((p = next_extra_elc(extra_elcs)) != NULL) {
292 err_count += scan_file(p);
298 fprintf(outfile, "}\n\n");
303 return err_count > 0;
306 /* Add a source file name boundary in the output file. */
308 put_filename (const char *filename)
310 /* XEmacs change; don't strip directory information. */
311 /* <= because sizeof includes the nul byte at the end. Not quite
312 right, because it should include the length of the symbol +
313 "\037[VF]" instead of simply 10. */
314 assert(sizeof("\037S\n") + strlen(filename) + 10
315 <= DOC_MAX_FILENAME_LENGTH);
319 fprintf (outfile, "%s\n", filename);
324 * Read file FILENAME and output its doc strings to outfile.
325 * Return 1 if file is not found, 0 if it is found. **/
327 scan_file(const char *filename)
329 int len = strlen(filename);
331 if (ellcc == 0 && len > 4 && !strcmp(filename + len - 4, ".elc")) {
332 Current_file_type = elc_file;
333 return scan_lisp_file(filename, READ_BINARY);
334 } else if (ellcc == 0 && len > 3 &&
335 strcmp(filename + len - 3, ".el") == 0) {
336 Current_file_type = el_file;
337 return scan_lisp_file(filename, READ_TEXT);
339 Current_file_type = c_file;
340 return scan_c_file(filename, READ_TEXT);
348 * Print a simple return in accordance with printflag and ellcc state*/
350 pr_char(int printflag, char **p, register int c)
365 } else if (printflag < 0) {
380 * Skip a C string from INFILE,
381 * and return the character that follows the closing ".
382 * If printflag is positive, output string contents to outfile.
383 * If it is negative, store contents in buf.
384 * Convert escape sequences \n and \t to newline and tab;
385 * discard \ followed by newline. **/
387 read_c_string(FILE *infile, int printflag, int c_docstring)
389 register int prevc = 0;
396 while ((c_docstring || c != '"') && c != EOF) {
398 int cc = getc(infile);
400 if (cc == '/' && prevc != '\n') {
401 pr_char(printflag, &p, '\n');
403 } else if (cc == '/') {
411 pr_char(printflag, &p, '\n');
421 if (!c_docstring && c == 'n') {
432 pr_char(printflag, &p, c);
436 /* look for continuation of string */
437 if (Current_file_type == c_file) {
450 /* If we had a "", concatenate the two strings. */
462 * Write to file OUT the argument names of function FUNC, whose text is in BUF.
463 * MINARGS and MAXARGS are the minimum and maximum number of arguments. **/
466 write_c_args(FILE *out, const char *func, char *buff, int minargs, int maxargs)
471 /* XEmacs - "arguments:" is for parsing the docstring. FSF's help system
472 doesn't parse the docstring for arguments like we do, so we're also
473 going to omit the function name to preserve compatibility with elisp
474 that parses the docstring. Finally, not prefixing the arglist with
475 anything is asking for trouble because it's not uncommon to have an
476 unescaped parenthesis at the beginning of a line. --Stig */
477 fprintf(out, "arguments: (");
483 for (p = buff; *p; p++) {
486 /* Add support for ANSI prototypes. Hop over
487 "Lisp_Object" string (the only C type allowed in DEFUNs) */
488 static char lo[] = "Lisp_Object";
489 if ((C_IDENTIFIER_CHAR_P(c) != in_ident) && !in_ident &&
490 (strncmp(p, lo, sizeof(lo) - 1) == 0) &&
491 isspace((unsigned char)(*(p + sizeof(lo) - 1)))) {
492 p += (sizeof(lo) - 1);
493 while (isspace((unsigned char)(*p))) {
499 /* Notice when we start printing a new identifier. */
500 if (C_IDENTIFIER_CHAR_P(c) != in_ident) {
503 if (minargs == 0 && maxargs > 0) {
504 fprintf(out, "&optional ");
515 /* Print the C argument list as it would appear in lisp:
516 print underscores as hyphens, and print commas as spaces.
517 Collapse adjacent spaces into one. */
525 /* If the C argument name ends with `_', change it to ' ', to
526 allow use of C reserved words or global symbols as Lisp
528 if (c == '-' && !C_IDENTIFIER_CHAR_P(p[1])) {
532 /* If the character is carriage return, escape it for the C
534 else if (c == '\n') {
537 } else if (c != ' ' || !just_spaced) {
538 if (c >= 'a' && c <= 'z') {
539 /* Upcase the letter. */
545 just_spaced = (c == ' ');
548 /* XEmacs addition */
555 check_comma(FILE *infile, register int commas, int *minargs, int *maxargs)
561 } while (c == ' ' || c == '\n' || c == '\t');
567 /* pick up minargs */
568 if (fscanf(infile, "%d", minargs) != 1) {
569 fprintf(stderr, "Failed to read minargs\n");
571 } else if (c == 'M' || c == 'U') {
572 /* MANY || UNEVALLED */
575 /* pick up maxargs */
576 if (fscanf(infile, "%d", maxargs) != 1) {
577 fprintf(stderr, "Failed to read maxargs\n");
584 * Read through a c file. If a .o file is named, the corresponding .c file is
586 * Looks for DEFUN constructs such as are defined in ../src/lisp.h.
587 * Accepts any word starting DEF... so it finds DEFSIMPLE and DEFPRED. */
589 scan_c_file(const char *filename, const char *mode)
594 register int defunflag;
595 register int defvarperbufferflag = 0;
596 register int defvarflag;
599 size_t l = strlen(filename);
604 errno = ENAMETOOLONG;
612 if (f[l - 1] == 'o') {
615 infile = fopen(f, mode);
617 /* No error if non-ex input file */
618 if (infile == NULL) {
624 while (!feof(infile)) {
631 * SXEmacs uses proper indentation so we need to
632 * search for `\t' instead of ' ' here.
639 (c = getc(infile)) == 'E' &&
640 (c = getc(infile)) == 'F' &&
641 (c = getc(infile)) == 'V' &&
642 (c = getc(infile)) == 'A' &&
643 (c = getc(infile)) == 'R' &&
644 (c = getc(infile)) == '_') {
649 /* Note that this business doesn't apply under
650 XEmacs. DEFVAR_BUFFER_LOCAL in XEmacs
652 defvarperbufferflag = (c == 'P');
658 } else if (c == 'D' &&
659 (c = getc(infile)) == 'E' &&
660 (c = getc(infile)) == 'F') {
662 defunflag = (c == 'U');
679 c = read_c_string(infile, -1, 0);
683 } else if (defvarperbufferflag) {
685 } else if (defvarflag) {
688 /* For DEFSIMPLE and DEFPRED */
691 for (; commas; c = getc(infile)) {
694 if (defunflag && (commas == 1 || commas == 2)) {
704 while (c == ' ' || c == '\n' || c == '\t') {
708 c = read_c_string(infile, 0, 0);
710 if (defunflag | defvarflag) {
724 while (c == ' ' || c == '\n' || c == '\t') {
727 if (defunflag | defvarflag) {
730 if (defunflag || defvarflag || c == '"') {
732 fprintf(outfile, "\tCDOC%s(\"%s\", \"\\\n",
733 defvarflag ? "SYM" : "SUBR", buf);
735 put_filename (filename); /* XEmacs addition */
737 putc(defvarflag ? 'V' : 'F', outfile);
738 fprintf(outfile, "%s\n", buf);
740 c = read_c_string(infile, 1, (defunflag || defvarflag));
742 /* If this is a defun, find the arguments and print
743 them. If this function takes MANY or UNEVALLED args,
744 then the C source won't give the names of the
745 arguments, so we shouldn't bother trying to find
747 if (defunflag && maxargs != -1) {
751 /* Skip into arguments. */
758 /* Copy arguments into ARGBUF. */
767 fprintf(outfile, "\\n\\\n\\n\\\n");
769 fprintf(outfile, "\n\n");
772 outfile, buf, argbuf, minargs, maxargs);
775 fprintf(outfile, "\\n\");\n\n");
784 /* Read a file of Lisp code, compiled or interpreted.
786 (defun NAME ARGS DOCSTRING ...)
787 (defmacro NAME ARGS DOCSTRING ...)
788 (autoload (quote NAME) FILE DOCSTRING ...)
789 (defvar NAME VALUE DOCSTRING)
790 (defconst NAME VALUE DOCSTRING)
791 (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
792 (fset (quote NAME) #[... DOCSTRING ...])
793 (defalias (quote NAME) #[... DOCSTRING ...])
794 starting in column zero.
795 (quote NAME) may appear as 'NAME as well.
797 We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
798 When we find that, we save it for the following defining-form,
799 and we use that instead of reading a doc string within that defining-form.
801 For defun, defmacro, and autoload, we know how to skip over the arglist.
802 For defvar, defconst, and fset we skip to the docstring with a kludgy
803 formatting convention: all docstrings must appear on the same line as the
804 initial open-paren (the one in column zero) and must contain a backslash
805 and a double-quote immediately after the initial double-quote. No newlines
806 must appear between the beginning of the form and the first double-quote.
807 The only source file that must follow this convention is loaddefs.el; aside
808 from that, it is always the .elc file that we look at, and they are no
809 problem because byte-compiler output follows this convention.
810 The NAME and DOCSTRING are output.
811 NAME is preceded by `F' for a function or `V' for a variable.
812 An entry is output only if DOCSTRING has \ newline just after the opening "
814 Adds the filename a symbol or function was found in before its docstring;
815 there's no need for this with the load-history available, but we do it for
816 consistency with the C parsing code.
820 skip_white(FILE *infile)
823 while (c == ' ' || c == '\t' || c == '\n') {
831 read_lisp_symbol(FILE *infile, char *buffer)
834 char *fillp = buffer;
840 /* FSF has *(++fillp), which is wrong. */
845 *fillp++ = (char)(c);
846 } else if (c == ' ' ||
849 c == '(' || c == ')') {
854 *fillp++ = (char)(c);
859 fprintf(stderr, "## expected a symbol, got '%c'\n", c);
866 get_dyna_doc(FILE *infile, char **saved_string)
872 /* Read the length. */
873 while ((c = getc(infile), c >= '0' && c <= '9')) {
878 /* The next character is a space that is counted in the length
879 but not part of the doc string.
880 We already read it, so just ignore it. */
883 /* Read in the contents. */
884 if (*saved_string != NULL) {
887 *saved_string = xmalloc(length);
888 for (i = 0; i < length; i++) {
891 (*saved_string)[i] = (char)(c);
893 (*saved_string)[i] = '\0';
897 /* The last character is a ^_.
898 * That is needed in the .elc file
899 * but it is redundant in DOC. So get rid of it here. */
900 (*saved_string)[length - 1] = 0;
902 /* Skip the newline. */
904 while (c > 0 && c != '\n') {
911 scan_lisp_file(const char *filename, const char *mode)
915 char *saved_string = 0;
917 infile = fopen(filename, mode);
918 if (infile == NULL) {
925 while (!feof(infile)) {
934 /* Detect a dynamic doc string and save it for the next
939 c = get_dyna_doc(infile, &saved_string);
948 read_lisp_symbol(infile, buffer);
950 if (!strcmp(buffer, "defun") || !strcmp(buffer, "defmacro")) {
952 read_lisp_symbol(infile, buffer);
954 /* Skip the arguments: either "nil" or
955 * a list in parens */
957 if (c == 'n') { /* nil */
958 if ((c = getc(infile)) != 'i' ||
959 (c = getc(infile)) != 'l') {
961 ## unparsable arglist in %s (%s)\n",
965 } else if (c != '(') {
967 ## unparsable arglist in %s (%s)\n",
977 /* If the next three characters aren't
978 * `dquote bslash newline' then we're not
979 * reading a docstring. */
980 if ((c = getc(infile)) != '"' ||
981 (c = getc(infile)) != '\\' ||
982 (c = getc(infile)) != '\n') {
985 ## non-docstring in %s (%s)\n",
991 } else if (!strcmp(buffer, "defvar") ||
992 !strcmp(buffer, "defconst")) {
997 read_lisp_symbol(infile, buffer);
999 if (saved_string == 0) {
1000 /* Skip until the first newline;
1001 * remember the two previous chars. */
1002 while (c != '\n' && c >= 0) {
1004 * Ignore any ESC x x ISO2022 seqs */
1017 /* If two previous characters were " and \,
1018 this is a doc string.
1019 Otherwise, there is none. */
1020 if (c2 != '"' || c1 != '\\') {
1023 ## non-docstring in %s (%s)\n",
1030 } else if (!strcmp(buffer, "custom-declare-variable")) {
1031 char c1 = 0, c2 = 0;
1036 read_lisp_symbol (infile, buffer);
1040 ## unparsable name in custom-declare-variable in %s\n",
1044 read_lisp_symbol (infile, buffer);
1045 if (strcmp (buffer, "quote")) {
1047 ## unparsable name in custom-declare-variable in %s\n",
1051 read_lisp_symbol (infile, buffer);
1055 ## unparsable quoted name in custom-declare-variable in %s\n",
1061 if (saved_string == 0) {
1062 /* Skip to end of line; remember the two
1064 while (c != '\n' && c >= 0) {
1067 /* SXEmacs: shame we can't do this. */
1068 /* c = getc_skipping_iso2022(infile); */
1069 (void)getc (infile);
1072 /* If two previous characters were " and \,
1073 this is a doc string. Otherwise, there is
1075 if (c2 != '"' || c1 != '\\') {
1078 ## non-docstring in %s (%s)\n",
1085 } else if (!strcmp(buffer, "fset") ||
1086 !strcmp(buffer, "defalias")) {
1087 char c1 = 0, c2 = 0;
1092 read_lisp_symbol(infile, buffer);
1096 ## unparsable name in fset in %s\n",
1100 read_lisp_symbol(infile, buffer);
1101 if (strcmp(buffer, "quote")) {
1103 ## unparsable name in fset in %s\n",
1107 read_lisp_symbol(infile, buffer);
1111 ## unparsable quoted name in fset in %s\n",
1117 if (saved_string == 0) {
1118 /* Skip until the first newline;
1119 * remember the two previous chars. */
1120 while (c != '\n' && c >= 0) {
1126 /* If two previous characters were " and \,
1127 this is a doc string.
1128 Otherwise, there is none. */
1129 if (c2 != '"' || c1 != '\\') {
1132 ## non-docstring in %s (%s)\n",
1139 } else if (!strcmp(buffer, "autoload")) {
1143 read_lisp_symbol(infile, buffer);
1147 ## unparsable name in autoload in %s\n",
1151 read_lisp_symbol(infile, buffer);
1152 if (strcmp(buffer, "quote")) {
1154 ## unparsable name in autoload in %s\n",
1158 read_lisp_symbol(infile, buffer);
1162 ## unparsable quoted name in autoload in %s\n",
1168 if ((c = getc(infile)) != '\"') {
1170 ## autoload of %s unparsable (%s)\n",
1174 read_c_string(infile, 0, 0);
1177 if (saved_string == 0) {
1178 /* If the next three characters aren't
1179 `dquote bslash newline'
1180 then we're not reading a docstring. */
1181 if ((c = getc(infile)) != '"' ||
1182 (c = getc(infile)) != '\\' ||
1183 (c = getc(infile)) != '\n') {
1186 ## non-docstring in %s (%s)\n",
1196 ## unrecognized top-level form, %s (%s)\n",
1202 /* At this point, we should either use the previous
1203 dynamic doc string in saved_string
1204 or gobble a doc string from the input file.
1206 In the latter case, the opening quote (and leading
1207 backslash-newline) have already been read. */
1208 put_filename (filename); /* XEmacs addition */
1210 putc(type, outfile);
1211 fprintf(outfile, "%s\n", buffer);
1213 fputs(saved_string, outfile);
1214 /* Don't use one dynamic doc string twice. */
1218 read_c_string(infile, 1, 0);
1222 /* If this is true then a dynamic doc string was
1223 detected without a next expression. We should not
1224 emit anything since the input was badly formed,
1225 but lets free the string...
1234 /* make-docfile.c ends here */