/* $Id: main.c,v 1.2 2016/12/18 16:56:32 christos Exp $ */ /* * Copyright (c) 2016 Kristaps Dzonsons * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #ifdef __linux__ #define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #ifdef __linux__ #include #include #include #endif /* * Phase of parsing input file. */ enum phase { PHASE_INIT = 0, /* waiting to encounter definition */ PHASE_KEYS, /* have definition, now keywords */ PHASE_DESC, /* have keywords, now description */ PHASE_SEEALSO, PHASE_DECL /* have description, now declarations */ }; /* * What kind of declaration (preliminary analysis). */ enum decltype { DECLTYPE_CPP, /* pre-processor */ DECLTYPE_C, /* semicolon-closed non-preprocessor */ DECLTYPE_NEITHER /* non-preprocessor, no semicolon */ }; /* * In variables and function declarations, we toss these. */ enum preproc { PREPROC_SQLITE_API, PREPROC_SQLITE_DEPRECATED, PREPROC_SQLITE_EXPERIMENTAL, PREPROC_SQLITE_EXTERN, PREPROC__MAX }; /* * HTML tags that we recognise. */ enum tag { TAG_B_CLOSE, TAG_B_OPEN, TAG_BLOCK_CLOSE, TAG_BLOCK_OPEN, TAG_DD_CLOSE, TAG_DD_OPEN, TAG_DL_CLOSE, TAG_DL_OPEN, TAG_DT_CLOSE, TAG_DT_OPEN, TAG_H3_CLOSE, TAG_H3_OPEN, TAG_LI_CLOSE, TAG_LI_OPEN, TAG_OL_CLOSE, TAG_OL_OPEN, TAG_PRE_CLOSE, TAG_PRE_OPEN, TAG_UL_CLOSE, TAG_UL_OPEN, TAG__MAX }; TAILQ_HEAD(defnq, defn); TAILQ_HEAD(declq, decl); /* * A declaration of type DECLTYPE_CPP or DECLTYPE_C. * These need not be unique (if ifdef'd). */ struct decl { enum decltype type; /* type of declaration */ char *text; /* text */ size_t textsz; /* strlen(text) */ TAILQ_ENTRY(decl) entries; }; /* * A definition is basically the manpage contents. */ struct defn { char *name; /* really Nd */ TAILQ_ENTRY(defn) entries; char *desc; /* long description */ size_t descsz; /* strlen(desc) */ struct declq dcqhead; /* declarations */ int multiline; /* used when parsing */ int instruct; /* used when parsing */ const char *fn; /* parsed from file */ size_t ln; /* parsed at line */ int postprocessed; /* good for emission? */ char *dt; /* manpage title */ char **nms; /* manpage names */ size_t nmsz; /* number of names */ char *fname; /* manpage filename */ char *keybuf; /* raw keywords */ size_t keybufsz; /* length of "keysbuf" */ char *seealso; /* see also tags */ size_t seealsosz; /* length of seealso */ char **xrs; /* parsed "see also" references */ size_t xrsz; /* number of references */ char **keys; /* parsed keywords */ size_t keysz; /* number of keywords */ }; /* * Entire parse routine. */ struct parse { enum phase phase; /* phase of parse */ size_t ln; /* line number */ const char *fn; /* open file */ struct defnq dqhead; /* definitions */ }; /* * How to handle HTML tags we find in the text. */ struct taginfo { const char *html; /* HTML to key on */ const char *mdoc; /* generate mdoc(7) */ unsigned int flags; #define TAGINFO_NOBR 0x01 /* follow w/space, not newline */ #define TAGINFO_NOOP 0x02 /* just strip out */ #define TAGINFO_NOSP 0x04 /* follow w/o space or newline */ #define TAGINFO_INLINE 0x08 /* inline block (notused) */ }; static const struct taginfo tags[TAG__MAX] = { { "", "\\fP", TAGINFO_INLINE }, /* TAG_B_CLOSE */ { "", "\\fB", TAGINFO_INLINE }, /* TAG_B_OPEN */ { "", ".Ed\n.Pp", 0 }, /* TAG_BLOCK_CLOSE */ { "
", ".Bd -ragged", 0 }, /* TAG_BLOCK_OPEN */ { "", "", TAGINFO_NOOP }, /* TAG_DD_CLOSE */ { "
", "", TAGINFO_NOOP }, /* TAG_DD_OPEN */ { "", ".El\n.Pp", 0 }, /* TAG_DL_CLOSE */ { "
", ".Bl -tag -width Ds", 0 }, /* TAG_DL_OPEN */ { "", "", TAGINFO_NOBR | TAGINFO_NOSP}, /* TAG_DT_CLOSE */ { "
", ".It", TAGINFO_NOBR }, /* TAG_DT_OPEN */ { "", "", TAGINFO_NOBR | TAGINFO_NOSP}, /* TAG_H3_CLOSE */ { "

", ".Ss", TAGINFO_NOBR }, /* TAG_H3_OPEN */ { "", "", TAGINFO_NOOP }, /* TAG_LI_CLOSE */ { "
  • ", ".It", 0 }, /* TAG_LI_OPEN */ { "", ".El\n.Pp", 0 }, /* TAG_OL_CLOSE */ { "
      ", ".Bl -enum", 0 }, /* TAG_OL_OPEN */ { "", ".Ed\n.Pp", 0 }, /* TAG_PRE_CLOSE */ { "
      ", ".Bd -literal", 0 }, /* TAG_PRE_OPEN */
      	{ "", ".El\n.Pp", 0 }, /* TAG_UL_CLOSE */
      	{ "
        ", ".Bl -bullet", 0 }, /* TAG_UL_OPEN */ }; static const char *const preprocs[TAG__MAX] = { "SQLITE_API", /* PREPROC_SQLITE_API */ "SQLITE_DEPRECATED", /* PREPROC_SQLITE_DEPRECATED */ "SQLITE_EXPERIMENTAL", /* PREPROC_SQLITE_EXPERIMENTAL */ "SQLITE_EXTERN", /* PREPROC_SQLITE_EXTERN */ }; /* Verbose reporting. */ static int verbose; /* Don't output any files: use stdout. */ static int nofile; static void decl_function_add(struct parse *p, char **etext, size_t *etextsz, const char *cp, size_t len) { if (' ' != (*etext)[*etextsz - 1]) { *etext = realloc(*etext, *etextsz + 2); if (NULL == *etext) err(EXIT_FAILURE, "%s:%zu: " "realloc", p->fn, p->ln); (*etextsz)++; strlcat(*etext, " ", *etextsz + 1); } *etext = realloc(*etext, *etextsz + len + 1); if (NULL == *etext) err(EXIT_FAILURE, "%s:%zu: realloc", p->fn, p->ln); memcpy(*etext + *etextsz, cp, len); *etextsz += len; (*etext)[*etextsz] = '\0'; } static void decl_function_copy(struct parse *p, char **etext, size_t *etextsz, const char *cp, size_t len) { *etext = malloc(len + 1); if (NULL == *etext) err(EXIT_FAILURE, "%s:%zu: strdup", p->fn, p->ln); memcpy(*etext, cp, len); *etextsz = len; (*etext)[*etextsz] = '\0'; } /* * A C function (or variable, or whatever). * This is more specifically any non-preprocessor text. */ static int decl_function(struct parse *p, char *cp, size_t len) { char *ep, *ncp, *lcp, *rcp; size_t nlen; struct defn *d; struct decl *e; /* Fetch current interface definition. */ d = TAILQ_LAST(&p->dqhead, defnq); assert(NULL != d); /* * Since C tokens are semicolon-separated, we may be invoked any * number of times per a single line. */ again: while (isspace((int)*cp)) { cp++; len--; } if ('\0' == *cp) return(1); /* Whether we're a continuation clause. */ if (d->multiline) { /* This might be NULL if we're not a continuation. */ e = TAILQ_LAST(&d->dcqhead, declq); assert(DECLTYPE_C == e->type); assert(NULL != e); assert(NULL != e->text); assert(e->textsz); } else { assert(0 == d->instruct); e = calloc(1, sizeof(struct decl)); e->type = DECLTYPE_C; if (NULL == e) err(EXIT_FAILURE, "%s:%zu: calloc", p->fn, p->ln); TAILQ_INSERT_TAIL(&d->dcqhead, e, entries); } /* * We begin by seeing if there's a semicolon on this line. * If there is, we'll need to do some special handling. */ ep = strchr(cp, ';'); lcp = strchr(cp, '{'); rcp = strchr(cp, '}'); /* We're only a partial statement (i.e., no closure). */ if (NULL == ep && d->multiline) { assert(NULL != e->text); assert(e->textsz > 0); /* Is a struct starting or ending here? */ if (d->instruct && NULL != rcp) d->instruct--; else if (NULL != lcp) d->instruct++; decl_function_add(p, &e->text, &e->textsz, cp, len); return(1); } else if (NULL == ep && ! d->multiline) { d->multiline = 1; /* Is a structure starting in this line? */ if (NULL != lcp && (NULL == rcp || rcp < lcp)) d->instruct++; decl_function_copy(p, &e->text, &e->textsz, cp, len); return(1); } /* Position ourselves after the semicolon. */ assert(NULL != ep); ncp = cp; nlen = (ep - cp) + 1; cp = ep + 1; len -= nlen; if (d->multiline) { assert(NULL != e->text); /* Don't stop the multi-line if we're in a struct. */ if (0 == d->instruct) { if (NULL == lcp || lcp > cp) d->multiline = 0; } else if (NULL != rcp && rcp < cp) if (0 == --d->instruct) d->multiline = 0; decl_function_add(p, &e->text, &e->textsz, ncp, nlen); } else { assert(NULL == e->text); if (NULL != lcp && lcp < cp) { d->multiline = 1; d->instruct++; } decl_function_copy(p, &e->text, &e->textsz, ncp, nlen); } goto again; } /* * A definition is just #define followed by space followed by the name, * then the value of that name. * We ignore the latter. * FIXME: this does not understand multi-line CPP, but I don't think * there are any instances of that in sqlite.h. */ static int decl_define(struct parse *p, char *cp, size_t len) { struct defn *d; struct decl *e; size_t sz; while (isspace((int)*cp)) { cp++; len--; } if (0 == len) { warnx("%s:%zu: empty pre-processor " "constant", p->fn, p->ln); return(1); } d = TAILQ_LAST(&p->dqhead, defnq); assert(NULL != d); /* * We're parsing a preprocessor definition, but we're still * waiting on a semicolon from a function definition. * It might be a comment or an error. */ if (d->multiline) { warnx("%s:%zu: multiline declaration " "still open (harmless?)", p->fn, p->ln); e = TAILQ_LAST(&d->dcqhead, declq); assert(NULL != e); e->type = DECLTYPE_NEITHER; d->multiline = d->instruct = 0; } sz = 0; while ('\0' != cp[sz] && ! isspace((int)cp[sz])) sz++; e = calloc(1, sizeof(struct decl)); if (NULL == e) err(EXIT_FAILURE, "%s:%zu: calloc", p->fn, p->ln); e->type = DECLTYPE_CPP; e->text = calloc(1, sz + 1); if (NULL == e->text) err(EXIT_FAILURE, "%s:%zu: calloc", p->fn, p->ln); strlcpy(e->text, cp, sz + 1); e->textsz = sz; TAILQ_INSERT_TAIL(&d->dcqhead, e, entries); return(1); } /* * A declaration is a function, variable, preprocessor definition, or * really anything else until we reach a blank line. */ static void decl(struct parse *p, char *cp, size_t len) { struct defn *d; struct decl *e; while (isspace((int)*cp)) { cp++; len--; } /* Check closure. */ if ('\0' == *cp) { p->phase = PHASE_INIT; /* Check multiline status. */ d = TAILQ_LAST(&p->dqhead, defnq); assert(NULL != d); if (d->multiline) { warnx("%s:%zu: multiline declaration " "still open (harmless?)", p->fn, p->ln); e = TAILQ_LAST(&d->dcqhead, declq); assert(NULL != e); e->type = DECLTYPE_NEITHER; d->multiline = d->instruct = 0; } return; } /* * Catch preprocessor defines, but discard all other types of * preprocessor statements. */ if ('#' == *cp) { len--; cp++; while (isspace((int)*cp)) { len--; cp++; } if (0 == strncmp(cp, "define", 6)) decl_define(p, cp + 6, len - 6); return; } decl_function(p, cp, len); } /* * Parse "SEE ALSO" phrases, which can come at any point in the * interface description (unlike what they claim). */ static void seealso(struct parse *p, char *cp, size_t len) { struct defn *d; if ('\0' == *cp) { warnx("%s:%zu: warn: unexpected end of " "interface description", p->fn, p->ln); p->phase = PHASE_INIT; return; } else if (0 == strcmp(cp, "*/")) { p->phase = PHASE_DECL; return; } else if ('*' != cp[0] || '*' != cp[1]) { warnx("%s:%zu: warn: unexpected end of " "interface description", p->fn, p->ln); p->phase = PHASE_INIT; return; } cp += 2; len -= 2; while (isspace((int)*cp)) { cp++; len--; } /* Blank line: back to description part. */ if (0 == len) { p->phase = PHASE_DESC; return; } /* Fetch current interface definition. */ d = TAILQ_LAST(&p->dqhead, defnq); assert(NULL != d); d->seealso = realloc(d->seealso, d->seealsosz + len + 1); memcpy(d->seealso + d->seealsosz, cp, len); d->seealsosz += len; d->seealso[d->seealsosz] = '\0'; } /* * A definition description is a block of text that we'll later format * in mdoc(7). * It extends from the name of the definition down to the declarations * themselves. */ static void desc(struct parse *p, char *cp, size_t len) { struct defn *d; size_t nsz; if ('\0' == *cp) { warnx("%s:%zu: warn: unexpected end of " "interface description", p->fn, p->ln); p->phase = PHASE_INIT; return; } else if (0 == strcmp(cp, "*/")) { /* End of comment area, start of declarations. */ p->phase = PHASE_DECL; return; } else if ('*' != cp[0] || '*' != cp[1]) { warnx("%s:%zu: warn: unexpected end of " "interface description", p->fn, p->ln); p->phase = PHASE_INIT; return; } cp += 2; len -= 2; while (isspace((int)*cp)) { cp++; len--; } /* Fetch current interface definition. */ d = TAILQ_LAST(&p->dqhead, defnq); assert(NULL != d); /* Ignore leading blank lines. */ if (0 == len && NULL == d->desc) return; /* Collect SEE ALSO clauses. */ if (0 == strncasecmp(cp, "see also:", 9)) { cp += 9; len -= 9; while (isspace((int)*cp)) { cp++; len--; } p->phase = PHASE_SEEALSO; d->seealso = realloc(d->seealso, d->seealsosz + len + 1); memcpy(d->seealso + d->seealsosz, cp, len); d->seealsosz += len; d->seealso[d->seealsosz] = '\0'; return; } /* White-space padding between lines. */ if (NULL != d->desc && ' ' != d->desc[d->descsz - 1] && '\n' != d->desc[d->descsz - 1]) { d->desc = realloc(d->desc, d->descsz + 2); if (NULL == d->desc) err(EXIT_FAILURE, "%s:%zu: realloc", p->fn, p->ln); d->descsz++; strlcat(d->desc, " ", d->descsz + 1); } /* Either append the line of a newline, if blank. */ nsz = 0 == len ? 1 : len; if (NULL == d->desc) { d->desc = calloc(1, nsz + 1); if (NULL == d->desc) err(EXIT_FAILURE, "%s:%zu: calloc", p->fn, p->ln); } else { d->desc = realloc(d->desc, d->descsz + nsz + 1); if (NULL == d->desc) err(EXIT_FAILURE, "%s:%zu: realloc", p->fn, p->ln); } d->descsz += nsz; strlcat(d->desc, 0 == len ? "\n" : cp, d->descsz + 1); } /* * Copy all KEYWORDS into a buffer. */ static void keys(struct parse *p, char *cp, size_t len) { struct defn *d; if ('\0' == *cp) { warnx("%s:%zu: warn: unexpected end of " "interface keywords", p->fn, p->ln); p->phase = PHASE_INIT; return; } else if (0 == strcmp(cp, "*/")) { /* End of comment area, start of declarations. */ p->phase = PHASE_DECL; return; } else if ('*' != cp[0] || '*' != cp[1]) { if ('\0' != cp[1]) { warnx("%s:%zu: warn: unexpected end of " "interface keywords", p->fn, p->ln); p->phase = PHASE_INIT; return; } else warnx("%s:%zu: warn: workaround in effect " "for unexpected end of " "interface keywords", p->fn, p->ln); } cp += 2; len -= 2; while (isspace((int)*cp)) { cp++; len--; } if (0 == len) { p->phase = PHASE_DESC; return; } else if (strncmp(cp, "KEYWORDS:", 9)) return; cp += 9; len -= 9; d = TAILQ_LAST(&p->dqhead, defnq); assert(NULL != d); d->keybuf = realloc(d->keybuf, d->keybufsz + len + 1); if (NULL == d->keybuf) err(EXIT_FAILURE, "%s:%zu: realloc", p->fn, p->ln); memcpy(d->keybuf + d->keybufsz, cp, len); d->keybufsz += len; d->keybuf[d->keybufsz] = '\0'; } /* * Initial state is where we're scanning forward to find commented * instances of CAPI3REF. */ static void init(struct parse *p, char *cp) { struct defn *d; /* Look for comment hook. */ if ('*' != cp[0] || '*' != cp[1]) return; cp += 2; while (isspace((int)*cp)) cp++; /* Look for beginning of definition. */ if (strncmp(cp, "CAPI3REF:", 9)) return; cp += 9; while (isspace((int)*cp)) cp++; if ('\0' == *cp) { warnx("%s:%zu: warn: unexpected end of " "interface definition", p->fn, p->ln); return; } /* Add definition to list of existing ones. */ d = calloc(1, sizeof(struct defn)); if (NULL == d) err(EXIT_FAILURE, "%s:%zu: calloc", p->fn, p->ln); d->name = strdup(cp); if (NULL == d->name) err(EXIT_FAILURE, "%s:%zu: strdup", p->fn, p->ln); d->fn = p->fn; d->ln = p->ln; p->phase = PHASE_KEYS; TAILQ_INIT(&d->dcqhead); TAILQ_INSERT_TAIL(&p->dqhead, d, entries); } #define BPOINT(_cp) \ (';' == (_cp)[0] || \ '[' == (_cp)[0] || \ ('(' == (_cp)[0] && '*' != (_cp)[1]) || \ ')' == (_cp)[0] || \ '{' == (_cp)[0]) /* * Given a declaration (be it preprocessor or C), try to parse out a * reasonable "name" for the affair. * For a struct, for example, it'd be the struct name. * For a typedef, it'd be the type name. * For a function, it'd be the function name. */ static void grok_name(const struct decl *e, const char **start, size_t *sz) { const char *cp; *start = NULL; *sz = 0; if (DECLTYPE_CPP != e->type) { assert(';' == e->text[e->textsz - 1]); cp = e->text; do { while (isspace((int)*cp)) cp++; if (BPOINT(cp)) break; /* Function pointers... */ if ('(' == *cp) cp++; /* Pass over pointers. */ while ('*' == *cp) cp++; *start = cp; *sz = 0; while ( ! isspace((int)*cp)) { if (BPOINT(cp)) break; cp++; (*sz)++; } } while ( ! BPOINT(cp)); } else { *sz = e->textsz; *start = e->text; } } static int xrcmp(const void *p1, const void *p2) { const char *s1 = *(const char **)p1, *s2 = *(const char **)p2; return(strcasecmp(s1, s2)); } /* * Extract information from the interface definition. * Mark it as "postprocessed" on success. */ static void postprocess(const char *prefix, struct defn *d) { struct decl *first; const char *start; size_t offs, sz, i; ENTRY ent; if (TAILQ_EMPTY(&d->dcqhead)) return; /* Find the first #define or declaration. */ TAILQ_FOREACH(first, &d->dcqhead, entries) if (DECLTYPE_CPP == first->type || DECLTYPE_C == first->type) break; if (NULL == first) { warnx("%s:%zu: no entry to document", d->fn, d->ln); return; } /* * Now compute the document name (`Dt'). * We'll also use this for the filename. */ grok_name(first, &start, &sz); if (NULL == start) { warnx("%s:%zu: couldn't deduce " "entry name", d->fn, d->ln); return; } /* Document name needs all-caps. */ d->dt = malloc(sz + 1); if (NULL == d->dt) err(EXIT_FAILURE, "malloc"); memcpy(d->dt, start, sz); d->dt[sz] = '\0'; for (i = 0; i < sz; i++) d->dt[i] = toupper((int)d->dt[i]); /* Filename needs no special chars. */ asprintf(&d->fname, "%s/%.*s.3", prefix, (int)sz, start); if (NULL == d->fname) err(EXIT_FAILURE, "asprintf"); offs = strlen(prefix) + 1; for (i = 0; i < sz; i++) { if (isalnum((int)d->fname[offs + i]) || '_' == d->fname[offs + i] || '-' == d->fname[offs + i]) continue; d->fname[offs + i] = '_'; } /* * First, extract all keywords. */ for (i = 0; i < d->keybufsz; ) { while (isspace((int)d->keybuf[i])) i++; if (i == d->keybufsz) break; sz = 0; start = &d->keybuf[i]; if ('{' == d->keybuf[i]) { start = &d->keybuf[++i]; for ( ; i < d->keybufsz; i++, sz++) if ('}' == d->keybuf[i]) break; if ('}' == d->keybuf[i]) i++; } else for ( ; i < d->keybufsz; i++, sz++) if (isspace((int)d->keybuf[i])) break; if (0 == sz) continue; d->keys = realloc(d->keys, (d->keysz + 1) * sizeof(char *)); if (NULL == d->keys) err(EXIT_FAILURE, "realloc"); d->keys[d->keysz] = malloc(sz + 1); if (NULL == d->keys[d->keysz]) err(EXIT_FAILURE, "malloc"); memcpy(d->keys[d->keysz], start, sz); d->keys[d->keysz][sz] = '\0'; d->keysz++; /* Hash the keyword. */ ent.key = d->keys[d->keysz - 1]; ent.data = d; (void)hsearch(ent, ENTER); } /* * Now extract all `Nm' values for this document. * We only use CPP and C references, and hope for the best when * doing so. * Enter each one of these as a searchable keyword. */ TAILQ_FOREACH(first, &d->dcqhead, entries) { if (DECLTYPE_CPP != first->type && DECLTYPE_C != first->type) continue; grok_name(first, &start, &sz); if (NULL == start) continue; d->nms = realloc(d->nms, (d->nmsz + 1) * sizeof(char *)); if (NULL == d->nms) err(EXIT_FAILURE, "realloc"); d->nms[d->nmsz] = malloc(sz + 1); if (NULL == d->nms[d->nmsz]) err(EXIT_FAILURE, "malloc"); memcpy(d->nms[d->nmsz], start, sz); d->nms[d->nmsz][sz] = '\0'; d->nmsz++; /* Hash the name. */ ent.key = d->nms[d->nmsz - 1]; ent.data = d; (void)hsearch(ent, ENTER); } if (0 == d->nmsz) { warnx("%s:%zu: couldn't deduce " "any names", d->fn, d->ln); return; } /* * Next, scan for all `Xr' values. * We'll add more to this list later. */ for (i = 0; i < d->seealsosz; i++) { /* * Find next value starting with `['. * There's other stuff in there (whitespace or * free text leading up to these) that we're ok * to ignore. */ while (i < d->seealsosz && '[' != d->seealso[i]) i++; if (i == d->seealsosz) break; /* * Now scan for the matching `]'. * We can also have a vertical bar if we're separating a * keyword and its shown name. */ start = &d->seealso[++i]; sz = 0; while (i < d->seealsosz && ']' != d->seealso[i] && '|' != d->seealso[i]) { i++; sz++; } if (i == d->seealsosz) break; if (0 == sz) continue; /* * Continue on to the end-of-reference, if we weren't * there to begin with. */ if (']' != d->seealso[i]) while (i < d->seealsosz && ']' != d->seealso[i]) i++; /* Strip trailing whitespace. */ while (sz > 1 && ' ' == start[sz - 1]) sz--; /* Strip trailing parenthesis. */ if (sz > 2 && '(' == start[sz - 2] && ')' == start[sz - 1]) sz -= 2; d->xrs = realloc(d->xrs, (d->xrsz + 1) * sizeof(char *)); if (NULL == d->xrs) err(EXIT_FAILURE, "realloc"); d->xrs[d->xrsz] = malloc(sz + 1); if (NULL == d->xrs[d->xrsz]) err(EXIT_FAILURE, "malloc"); memcpy(d->xrs[d->xrsz], start, sz); d->xrs[d->xrsz][sz] = '\0'; d->xrsz++; } /* * Next, extract all references. * We'll accumulate these into a list of SEE ALSO tags, after. * See how these are parsed above for a description: this is * basically the same thing. */ for (i = 0; i < d->descsz; i++) { if ('[' != d->desc[i]) continue; i++; if ('[' == d->desc[i]) continue; start = &d->desc[i]; for (sz = 0; i < d->descsz; i++, sz++) if (']' == d->desc[i] || '|' == d->desc[i]) break; if (i == d->descsz) break; else if (sz == 0) continue; if (']' != d->desc[i]) while (i < d->descsz && ']' != d->desc[i]) i++; while (sz > 1 && ' ' == start[sz - 1]) sz--; if (sz > 2 && '(' == start[sz - 2] && ')' == start[sz - 1]) sz -= 2; d->xrs = realloc(d->xrs, (d->xrsz + 1) * sizeof(char *)); if (NULL == d->xrs) err(EXIT_FAILURE, "realloc"); d->xrs[d->xrsz] = malloc(sz + 1); if (NULL == d->xrs[d->xrsz]) err(EXIT_FAILURE, "malloc"); memcpy(d->xrs[d->xrsz], start, sz); d->xrs[d->xrsz][sz] = '\0'; d->xrsz++; } qsort(d->xrs, d->xrsz, sizeof(char *), xrcmp); d->postprocessed = 1; } /* * Convenience function to look up a keyword. * Returns the keyword's file if found or NULL. */ static const char * lookup(char *key) { ENTRY ent; ENTRY *res; struct defn *d; ent.key = key; res = hsearch(ent, FIND); if (NULL == res) return(NULL); d = (struct defn *)res->data; if (0 == d->nmsz) return(NULL); assert(NULL != d->nms[0]); return(d->nms[0]); } /* * Emit a valid mdoc(7) document within the given prefix. */ static void emit(const struct defn *d, const char *mdocdate) { struct decl *first; size_t sz, i, col, last, ns; FILE *f; char *cp; const char *res, *lastres, *args, *str, *end; enum tag tag; enum preproc pre; if ( ! d->postprocessed) { warnx("%s:%zu: interface has errors, not " "producing manpage", d->fn, d->ln); return; } if (0 == nofile) { if (NULL == (f = fopen(d->fname, "w"))) { warn("%s: fopen", d->fname); return; } } else f = stdout; /* Begin by outputting the mdoc(7) header. */ #if 0 fputs(".Dd $" "Mdocdate$\n", f); #else fprintf(f, ".Dd %s\n", mdocdate); #endif fprintf(f, ".Dt %s 3\n", d->dt); fputs(".Os\n", f); fputs(".Sh NAME\n", f); /* Now print the name bits of each declaration. */ for (i = 0; i < d->nmsz; i++) fprintf(f, ".Nm %s%s\n", d->nms[i], i < d->nmsz - 1 ? " ," : ""); fprintf(f, ".Nd %s\n", d->name); fputs(".Sh SYNOPSIS\n", f); TAILQ_FOREACH(first, &d->dcqhead, entries) { if (DECLTYPE_CPP != first->type && DECLTYPE_C != first->type) continue; /* Easy: just print the CPP name. */ if (DECLTYPE_CPP == first->type) { fprintf(f, ".Fd #define %s\n", first->text); continue; } /* First, strip out the sqlite CPPs. */ for (i = 0; i < first->textsz; ) { for (pre = 0; pre < PREPROC__MAX; pre++) { sz = strlen(preprocs[pre]); if (strncmp(preprocs[pre], &first->text[i], sz)) continue; i += sz; while (isspace((int)first->text[i])) i++; break; } if (pre == PREPROC__MAX) break; } /* If we're a typedef, immediately print Vt. */ if (0 == strncmp(&first->text[i], "typedef", 7)) { fprintf(f, ".Vt %s\n", &first->text[i]); continue; } /* Are we a struct? */ if (first->textsz > 2 && '}' == first->text[first->textsz - 2] && NULL != (cp = strchr(&first->text[i], '{'))) { *cp = '\0'; fprintf(f, ".Vt %s;\n", &first->text[i]); /* Restore brace for later usage. */ *cp = '{'; continue; } /* Catch remaining non-functions. */ if (first->textsz > 2 && ')' != first->text[first->textsz - 2]) { fprintf(f, ".Vt %s\n", &first->text[i]); continue; } str = &first->text[i]; if (NULL == (args = strchr(str, '('))) { /* What is this? */ fputs(".Bd -literal\n", f); fputs(&first->text[i], f); fputs("\n.Ed\n", f); continue; } /* Scroll back to end of function name. */ end = args - 1; while (end > str && isspace((int)*end)) end--; /* Scroll back to what comes before. */ for ( ; end > str; end--) if (isspace((int)*end) || '*' == *end) break; /* * If we can't find what came before, then the function * has no type, which is odd... let's just call it void. */ if (end > str) { fprintf(f, ".Ft %.*s\n", (int)(end - str + 1), str); fprintf(f, ".Fo %.*s\n", (int)(args - end - 1), end + 1); } else { fputs(".Ft void\n", f); fprintf(f, ".Fo %.*s\n", (int)(args - end), end); } /* * Convert function arguments into `Fa' clauses. * This also handles nested function pointers, which * would otherwise throw off the delimeters. */ for (;;) { str = ++args; while (isspace((int)*str)) str++; fputs(".Fa \"", f); ns = 0; while ('\0' != *str && (ns || ',' != *str) && (ns || ')' != *str)) { if ('/' == str[0] && '*' == str[1]) { str += 2; for ( ; '\0' != str[0]; str++) if ('*' == str[0] && '/' == str[1]) break; if ('\0' == *str) break; str += 2; while (isspace((int)*str)) str++; if ('\0' == *str || (0 == ns && ',' == *str) || (0 == ns && ')' == *str)) break; } if ('(' == *str) ns++; else if (')' == *str) ns--; fputc(*str, f); str++; } fputs("\"\n", f); if ('\0' == *str || ')' == *str) break; args = str; } fputs(".Fc\n", f); } fputs(".Sh DESCRIPTION\n", f); /* * Strip the crap out of the description. * "Crap" consists of things I don't understand that mess up * parsing of the HTML, for instance, *
        [[foo bar]]
        foo bar
        ...
        * These are not well-formed HTML. */ for (i = 0; i < d->descsz; i++) { if ('^' == d->desc[i] && '(' == d->desc[i + 1]) { d->desc[i] = d->desc[i + 1] = ' '; i++; continue; } else if (')' == d->desc[i] && '^' == d->desc[i + 1]) { d->desc[i] = d->desc[i + 1] = ' '; i++; continue; } else if ('^' == d->desc[i]) { d->desc[i] = ' '; continue; } else if ('[' != d->desc[i] || '[' != d->desc[i + 1]) continue; d->desc[i] = d->desc[i + 1] = ' '; for (i += 2; i < d->descsz; i++) { if (']' == d->desc[i] && ']' == d->desc[i + 1]) break; d->desc[i] = ' '; } if (i == d->descsz) continue; d->desc[i] = d->desc[i + 1] = ' '; i++; } /* * Here we go! * Print out the description as best we can. * Do on-the-fly processing of any HTML we encounter into * mdoc(7) and try to break lines up. */ col = 0; for (i = 0; i < d->descsz; ) { /* * Newlines are paragraph breaks. * If we have multiple newlines, then keep to a single * `Pp' to keep it clean. * Only do this if we're not before a block-level HTML, * as this would mean, for instance, a `Pp'-`Bd' pair. */ if ('\n' == d->desc[i]) { while (isspace((int)d->desc[i])) i++; for (tag = 0; tag < TAG__MAX; tag++) { sz = strlen(tags[tag].html); if (0 == strncmp(&d->desc[i], tags[tag].html, sz)) break; } if (TAG__MAX == tag || TAGINFO_INLINE & tags[tag].flags) { if (col > 0) fputs("\n", f); fputs(".Pp\n", f); /* We're on a new line. */ col = 0; } continue; } /* * New sentence, new line. * We guess whether this is the case by using the * dumbest possible heuristic. */ if (' ' == d->desc[i] && i && '.' == d->desc[i - 1]) { while (' ' == d->desc[i]) i++; fputs("\n", f); col = 0; continue; } /* * After 65 characters, force a break when we encounter * white-space to keep our lines more or less tidy. */ if (col > 65 && ' ' == d->desc[i]) { while (' ' == d->desc[i]) i++; fputs("\n", f); col = 0; continue; } /* * Parsing HTML tags. * Why, sqlite guys, couldn't you have used something * like markdown or something? * Sheesh. */ if ('<' == d->desc[i]) { for (tag = 0; tag < TAG__MAX; tag++) { sz = strlen(tags[tag].html); if (strncmp(&d->desc[i], tags[tag].html, sz)) continue; /* * NOOP tags don't do anything, such as * the case of `
  • ', which only * serves to end an `It' block that will * be closed out by a subsequent `It' or * end of clause `El' anyway. * Skip the trailing space. */ if (TAGINFO_NOOP & tags[tag].flags) { i += sz; while (isspace((int)d->desc[i])) i++; break; } else if (TAGINFO_INLINE & tags[tag].flags) { fputs(tags[tag].mdoc, f); i += sz; break; } /* * A breaking mdoc(7) statement. * Break the current line, output the * macro, and conditionally break * following that (or we might do * nothing at all). */ if (col > 0) { fputs("\n", f); col = 0; } fputs(tags[tag].mdoc, f); if ( ! (TAGINFO_NOBR & tags[tag].flags)) { fputs("\n", f); col = 0; } else if ( ! (TAGINFO_NOSP & tags[tag].flags)) { fputs(" ", f); col++; } i += sz; while (isspace((int)d->desc[i])) i++; break; } if (tag < TAG__MAX) continue; } else if ('[' == d->desc[i] && ']' != d->desc[i + 1]) { /* Do we start at the bracket or bar? */ for (sz = i + 1; sz < d->descsz; sz++) if ('|' == d->desc[sz] || ']' == d->desc[sz]) break; if (sz == d->descsz) continue; else if ('|' == d->desc[sz]) i = sz + 1; else i = i + 1; /* * Now handle in-page references. * Print them out as-is: we've already * accumulated them into our "SEE ALSO" values, * which we'll use below. */ for ( ; i < d->descsz; i++, col++) { if (']' == d->desc[i]) { i++; break; } fputc(d->desc[i], f); col++; } continue; } if (' ' == d->desc[i] && 0 == col) { while (' ' == d->desc[i]) i++; continue; } assert('\n' != d->desc[i]); /* * Handle some oddities. * The following HTML escapes exist in the output that I * could find. * There might be others... */ if (0 == strncmp(&d->desc[i], " ", 6)) { i += 6; fputc(' ', f); } else if (0 == strncmp(&d->desc[i], "<", 4)) { i += 4; fputc('<', f); } else if (0 == strncmp(&d->desc[i], ">", 4)) { i += 4; fputc('>', f); } else if (0 == strncmp(&d->desc[i], "[", 5)) { i += 5; fputc('[', f); } else { /* Make sure we don't trigger a macro. */ if (0 == col && '.' == d->desc[i]) fputs("\\&", f); fputc(d->desc[i], f); i++; } col++; } if (col > 0) fputs("\n", f); if (d->xrsz > 0) { /* * Look up all of our keywords (which are in the xrs * field) in the table of all known keywords. * Don't print duplicates. */ lastres = NULL; for (last = 0, i = 0; i < d->xrsz; i++) { res = lookup(d->xrs[i]); /* Ignore self-reference. */ if (res == d->nms[0] && verbose) warnx("%s:%zu: self-reference: %s", d->fn, d->ln, d->xrs[i]); if (res == d->nms[0] && verbose) continue; if (NULL == res && verbose) warnx("%s:%zu: ref not found: %s", d->fn, d->ln, d->xrs[i]); if (NULL == res) continue; /* Ignore duplicates. */ if (NULL != lastres && lastres == res) continue; if (last) fputs(" ,\n", f); else fputs(".Sh SEE ALSO\n", f); fprintf(f, ".Xr %s 3", res); last = 1; lastres = res; } if (last) fputs("\n", f); } if (0 == nofile) fclose(f); } int main(int argc, char *argv[]) { size_t i, len; FILE *f; char *cp; const char *prefix; struct parse p; int rc, ch; struct defn *d; struct decl *e; rc = 0; prefix = "."; f = stdin; memset(&p, 0, sizeof(struct parse)); p.fn = ""; p.ln = 0; p.phase = PHASE_INIT; TAILQ_INIT(&p.dqhead); while (-1 != (ch = getopt(argc, argv, "np:v"))) switch (ch) { case ('n'): nofile = 1; break; case ('p'): prefix = optarg; break; case ('v'): verbose = 1; break; default: goto usage; } time_t now = time(NULL); struct tm tm; char mdocdate[256]; if (gmtime_r(&now, &tm) == NULL) err(EXIT_FAILURE, "gmtime"); strftime(mdocdate, sizeof(mdocdate), "%B %d, %Y", &tm); /* * Read in line-by-line and process in the phase dictated by our * finite state automaton. */ while (NULL != (cp = fgetln(f, &len))) { assert(len > 0); p.ln++; if ('\n' != cp[len - 1]) { warnx("%s:%zu: unterminated line", p.fn, p.ln); break; } cp[--len] = '\0'; /* Lines are always nil-terminated. */ switch (p.phase) { case (PHASE_INIT): init(&p, cp); break; case (PHASE_KEYS): keys(&p, cp, len); break; case (PHASE_DESC): desc(&p, cp, len); break; case (PHASE_SEEALSO): seealso(&p, cp, len); break; case (PHASE_DECL): decl(&p, cp, len); break; } } /* * If we hit the last line, then try to process. * Otherwise, we failed along the way. */ if (NULL == cp) { /* * Allow us to be at the declarations or scanning for * the next clause. */ if (PHASE_INIT == p.phase || PHASE_DECL == p.phase) { if (0 == hcreate(5000)) err(EXIT_FAILURE, "hcreate"); TAILQ_FOREACH(d, &p.dqhead, entries) postprocess(prefix, d); TAILQ_FOREACH(d, &p.dqhead, entries) emit(d, mdocdate); rc = 1; } else if (PHASE_DECL != p.phase) warnx("%s:%zu: exit when not in " "initial state", p.fn, p.ln); } while ( ! TAILQ_EMPTY(&p.dqhead)) { d = TAILQ_FIRST(&p.dqhead); TAILQ_REMOVE(&p.dqhead, d, entries); while ( ! TAILQ_EMPTY(&d->dcqhead)) { e = TAILQ_FIRST(&d->dcqhead); TAILQ_REMOVE(&d->dcqhead, e, entries); free(e->text); free(e); } free(d->name); free(d->desc); free(d->dt); for (i = 0; i < d->nmsz; i++) free(d->nms[i]); for (i = 0; i < d->xrsz; i++) free(d->xrs[i]); for (i = 0; i < d->keysz; i++) free(d->keys[i]); free(d->keys); free(d->nms); free(d->xrs); free(d->fname); free(d->seealso); free(d->keybuf); free(d); } return(rc ? EXIT_SUCCESS : EXIT_FAILURE); usage: fprintf(stderr, "usage: %s [-nv] [-p prefix]\n", getprogname()); return(EXIT_FAILURE); }