/* * CDE - Common Desktop Environment * * Copyright (c) 1993-2012, The Open Group. All rights reserved. * * These libraries and programs are free software; you can * redistribute them and/or modify them under the terms of the GNU * Lesser General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) * any later version. * * These libraries and programs are distributed in the hope that * they will be useful, but WITHOUT ANY WARRANTY; without even the * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU Lesser General Public License for more * details. * * You should have received a copy of the GNU Lesser General Public * License along with these librararies and programs; if not, write * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth * Floor, Boston, MA 02110-1301 USA */ /* $XConsortium: sgmldecl.c /main/3 1996/06/19 17:17:29 drk $ */ /* sgmldecl.c - SGML declaration parsing. Written by James Clark (jjc@jclark.com). */ #include "sgmlincl.h" /* Symbolic names for the error numbers that are be generated only by this module. */ #define E_STANDARD 163 #define E_SIGNIFICANT 164 #define E_BADLIT 165 #define E_SCOPE 166 #define E_XNUM 167 #define E_BADVERSION 168 #define E_NMUNSUP 169 #define E_XNMLIT 170 #define E_CHARDESC 171 #define E_CHARDUP 172 #define E_CHARRANGE 173 #define E_7BIT 174 #define E_CHARMISSING 175 #define E_SHUNNED 176 #define E_NONSGML 177 #define E_CAPSET 178 #define E_CAPMISSING 179 #define E_SYNTAX 180 #define E_CHARNUM 181 #define E_SWITCHES 182 #define E_INSTANCE 183 #define E_ZEROFEATURE 184 #define E_YESNO 185 #define E_CAPACITY 186 #define E_NOTSUPPORTED 187 #define E_FORMAL 189 #define E_BADCLASS 190 #define E_MUSTBENON 191 #define E_BADBASECHAR 199 #define E_SYNREFUNUSED 200 #define E_SYNREFUNDESC 201 #define E_SYNREFUNKNOWN 202 #define E_SYNREFUNKNOWNSET 203 #define E_FUNDUP 204 #define E_BADFUN 205 #define E_FUNCHAR 206 #define E_GENDELIM 207 #define E_SRDELIM 208 #define E_BADKEY 209 #define E_BADQUANTITY 210 #define E_BADNAME 211 #define E_REFNAME 212 #define E_DUPNAME 213 #define E_QUANTITY 214 #define E_QTOOBIG 215 #define E_NMSTRTCNT 219 #define E_NMCHARCNT 220 #define E_NMDUP 221 #define E_NMBAD 222 #define E_NMMINUS 223 #define E_UNKNOWNSET 227 #define CANON_NMC '.' /* Canonical name character. */ #define CANON_NMS 'A' /* Canonical name start character. */ #define CANON_MIN ':' /* Canonical minimum data character. */ #define SUCCESS 1 #define FAIL 0 #define SIZEOF(v) (sizeof(v)/sizeof(v[0])) #define matches(tok, str) (ustrcmp((tok)+1, (str)) == 0) static UNCH standard[] = "ISO 8879:1986"; #define REFERENCE_SYNTAX "ISO 8879:1986//SYNTAX Reference//EN" #define CORE_SYNTAX "ISO 8879:1986//SYNTAX Core//EN" static UNCH (*newkey)[REFNAMELEN+1] = 0; struct pmap { char *name; UNIV value; }; /* The reference capacity set. */ #define REFCAPSET \ { 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, \ 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L } long refcapset[NCAPACITY] = REFCAPSET; /* A pmap of known capacity sets. */ static struct pmap capset_map[] = { { "ISO 8879:1986//CAPACITY Reference//EN", (UNIV)refcapset }, { 0 }, }; /* Table of capacity names. Must match *CAP in sgmldecl.h. */ char *captab[] = { "TOTALCAP", "ENTCAP", "ENTCHCAP", "ELEMCAP", "GRPCAP", "EXGRPCAP", "EXNMCAP", "ATTCAP", "ATTCHCAP", "AVGRPCAP", "NOTCAP", "NOTCHCAP", "IDCAP", "IDREFCAP", "MAPCAP", "LKSETCAP", "LKNMCAP", }; /* The default SGML declaration. */ #define MAXNUMBER 99999999L /* Reference quantity set */ #define REFATTCNT 40 #define REFATTSPLEN 960 #define REFBSEQLEN 960 #define REFDTAGLEN 16 #define REFDTEMPLEN 16 #define REFENTLVL 16 #define REFGRPCNT 32 #define REFGRPGTCNT 96 #define REFGRPLVL 16 #define REFNORMSEP 2 #define REFPILEN 240 #define REFTAGLEN 960 #define REFTAGLVL 24 #define ALLOC_MAX 65534 #define BIGINT 30000 #define MAXATTCNT ((ALLOC_MAX/sizeof(struct ad)) - 2) #define MAXATTSPLEN BIGINT #define MAXBSEQLEN BIGINT #define MAXDTAGLEN 16 #define MAXDTEMPLEN 16 #define MAXENTLVL ((ALLOC_MAX/sizeof(struct source)) - 1) #define MAXGRPCNT MAXGRPGTCNT /* Must be between 96 and 253 */ #define MAXGRPGTCNT 253 #define MAXGRPLVL MAXGRPGTCNT #define MAXLITLEN BIGINT /* This guarantees that NAMELEN < LITLEN (ie there's always space for a name in a buffer intended for a literal.) */ #define MAXNAMELEN (REFLITLEN - 1) #define MAXNORMSEP 2 #define MAXPILEN BIGINT #define MAXTAGLEN BIGINT #define MAXTAGLVL ((ALLOC_MAX/sizeof(struct tag)) - 1) /* Table of quantity names. Must match Q* in sgmldecl.h. */ static char *quantity_names[] = { "ATTCNT", "ATTSPLEN", "BSEQLEN", "DTAGLEN", "DTEMPLEN", "ENTLVL", "GRPCNT", "GRPGTCNT", "GRPLVL", "LITLEN", "NAMELEN", "NORMSEP", "PILEN", "TAGLEN", "TAGLVL", }; static int max_quantity[] = { MAXATTCNT, MAXATTSPLEN, MAXBSEQLEN, MAXDTAGLEN, MAXDTEMPLEN, MAXENTLVL, MAXGRPCNT, MAXGRPGTCNT, MAXGRPLVL, MAXLITLEN, MAXNAMELEN, MAXNORMSEP, MAXPILEN, MAXTAGLEN, MAXTAGLVL, }; static char *quantity_changed; /* Non-zero means the APPINFO parameter was not NONE. */ static int appinfosw = 0; struct sgmldecl sd = { REFCAPSET, /* capacity */ #ifdef SUPPORT_SUBDOC MAXNUMBER, /* subdoc */ #else /* not SUPPORT_SUBDOC */ 0, /* subdoc */ #endif /* not SUPPORT_SUBDOC */ 1, /* formal */ 1, /* omittag */ 1, /* shorttag */ 1, /* shortref */ { 1, 0 }, /* general/entity name case translation */ { /* reference quantity set */ REFATTCNT, REFATTSPLEN, REFBSEQLEN, REFDTAGLEN, REFDTEMPLEN, REFENTLVL, REFGRPCNT, REFGRPGTCNT, REFGRPLVL, REFLITLEN, REFNAMELEN, REFNORMSEP, REFPILEN, REFTAGLEN, REFTAGLVL, }, }; static int systemcharset[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, }; static struct pmap charset_map[] = { { "ESC 2/5 4/0", (UNIV)asciicharset }, /* ISO 646 IRV */ { "ESC 2/8 4/2", (UNIV)asciicharset }, /* ISO Registration Number 6, ASCII */ { SYSTEM_CHARSET_DESIGNATING_SEQUENCE, (UNIV)systemcharset }, /* system character set */ { 0 } }; static int synrefcharset[256]; /* the syntax reference character set */ #define CHAR_NONSGML 01 #define CHAR_SIGNIFICANT 02 #define CHAR_MAGIC 04 #define CHAR_SHUNNED 010 static UNCH char_flags[256]; static int done_nonsgml = 0; static UNCH *nlextoke = 0; /* new lextoke */ static UNCH *nlextran = 0; /* new lextran */ static UNCH kcharset[] = "CHARSET"; static UNCH kbaseset[] = "BASESET"; static UNCH kdescset[] = "DESCSET"; static UNCH kunused[] = "UNUSED"; static UNCH kcapacity[] = "CAPACITY"; static UNCH kpublic[] = "PUBLIC"; static UNCH ksgmlref[] = "SGMLREF"; static UNCH kscope[] = "SCOPE"; static UNCH kdocument[] = "DOCUMENT"; static UNCH kinstance[] = "INSTANCE"; static UNCH ksyntax[] = "SYNTAX"; static UNCH kswitches[] = "SWITCHES"; static UNCH kfeatures[] = "FEATURES"; static UNCH kminimize[] = "MINIMIZE"; static UNCH kdatatag[] = "DATATAG"; static UNCH komittag[] = "OMITTAG"; static UNCH krank[] = "RANK"; static UNCH kshorttag[] = "SHORTTAG"; static UNCH klink[] = "LINK"; static UNCH ksimple[] = "SIMPLE"; static UNCH kimplicit[] = "IMPLICIT"; static UNCH kexplicit[] = "EXPLICIT"; static UNCH kother[] = "OTHER"; static UNCH kconcur[] = "CONCUR"; static UNCH ksubdoc[] = "SUBDOC"; static UNCH kformal[] = "FORMAL"; static UNCH kyes[] = "YES"; static UNCH kno[] = "NO"; static UNCH kappinfo[] = "APPINFO"; static UNCH knone[] = "NONE"; static UNCH kshunchar[] = "SHUNCHAR"; static UNCH kcontrols[] = "CONTROLS"; static UNCH kfunction[] = "FUNCTION"; static UNCH krs[] = "RS"; static UNCH kre[] = "RE"; static UNCH kspace[] = "SPACE"; static UNCH knaming[] = "NAMING"; static UNCH klcnmstrt[] = "LCNMSTRT"; static UNCH kucnmstrt[] = "UCNMSTRT"; static UNCH klcnmchar[] = "LCNMCHAR"; static UNCH kucnmchar[] = "UCNMCHAR"; static UNCH knamecase[] = "NAMECASE"; static UNCH kdelim[] = "DELIM"; static UNCH kgeneral[] = "GENERAL"; static UNCH kentity[] = "ENTITY"; static UNCH kshortref[] = "SHORTREF"; static UNCH knames[] = "NAMES"; static UNCH kquantity[] = "QUANTITY"; #define sderr mderr static UNIV pmaplookup P((struct pmap *, char *)); static UNCH *ltous P((long)); static VOID sdfixstandard P((UNCH *)); static int sdparm P((UNCH *, struct parse *)); static int sdname P((UNCH *, UNCH *)); static int sdckname P((UNCH *, UNCH *)); static int sdversion P((UNCH *)); static int sdcharset P((UNCH *)); static int sdcsdesc P((UNCH *, int *)); static int sdpubcapacity P((UNCH *)); static int sdcapacity P((UNCH *)); static int sdscope P((UNCH *)); static VOID setlexical P((void)); static VOID noemptytag P((void)); static int sdpubsyntax P((UNCH *)); static int sdsyntax P((UNCH *)); static int sdxsyntax P((UNCH *)); static int sdtranscharnum P((UNCH *)); static int sdtranschar P((int)); static int sdshunchar P((UNCH *)); static int sdsynref P((UNCH *)); static int sdfunction P((UNCH *)); static int sdnaming P((UNCH *)); static int sddelim P((UNCH *)); static int sdnames P((UNCH *)); static int sdquantity P((UNCH *)); static int sdfeatures P((UNCH *)); static int sdappinfo P((UNCH *)); static VOID bufsalloc P((void)); static VOID bufsrealloc P((void)); /* Parse the SGML declaration. Return non-zero if there was some appinfo. */ int sgmldecl() { int i; int errsw = 0; UNCH endbuf[REFNAMELEN+2]; /* buffer for parsing terminating > */ static int (*section[]) P((UNCH *)) = { sdversion, sdcharset, sdcapacity, sdscope, sdsyntax, sdfeatures, sdappinfo, }; /* These are needed if we use mderr. */ parmno = 0; mdname = sgmlkey; subdcl = NULL; for (i = 0; i < SIZEOF(section); i++) if ((*section[i])(tbuf) == FAIL) { errsw = 1; break; } if (!errsw) setlexical(); bufsrealloc(); /* Parse the >. Don't overwrite the appinfo. */ if (!errsw) sdparm(endbuf, 0); /* We must exit if we hit end of document. */ if (pcbsd.action == EOD_) exiterr(161, &pcbsd); if (!errsw && pcbsd.action != ESGD) sderr(126, (UNCH *)0, (UNCH *)0); return appinfosw; } /* Parse the literal (which should contain the version of the standard) at the beginning of a SGML declaration. */ static int sdversion(tbuf) UNCH *tbuf; { if (sdparm(tbuf, &pcblitv) != LIT1) { sderr(123, (UNCH *)0, (UNCH *)0); return FAIL; } sdfixstandard(tbuf); if (ustrcmp(tbuf, standard) != 0) sderr(E_BADVERSION, tbuf, standard); return SUCCESS; } /* Parse the CHARSET section. Use one token lookahead. */ static int sdcharset(tbuf) UNCH *tbuf; { int i; int status[256]; if (sdname(tbuf, kcharset) == FAIL) return FAIL; (void)sdparm(tbuf, 0); if (sdcsdesc(tbuf, status) == FAIL) return FAIL; for (i = 128; i < 256; i++) if (status[i] != UNDESC) break; if (i >= 256) { /* Only a 7-bit character set was described. Fill it out to 8-bits. */ for (i = 128; i < 256; i++) status[i] = UNUSED; #if 0 sderr(E_7BIT, (UNCH *)0, (UNCH *)0); #endif } /* Characters that are declared UNUSED in the document character set are assigned to non-SGML. */ for (i = 0; i < 256; i++) { if (status[i] == UNDESC) { sderr(E_CHARMISSING, ltous((long)i), (UNCH *)0); char_flags[i] |= CHAR_NONSGML; } else if (status[i] == UNUSED) char_flags[i] |= CHAR_NONSGML; } done_nonsgml = 1; return SUCCESS; } /* Parse a character set description. Uses one character lookahead. */ static int sdcsdesc(tbuf, status) UNCH *tbuf; int *status; { int i; int nsets = 0; struct fpi fpi; for (i = 0; i < 256; i++) status[i] = UNDESC; for (;;) { int nchars; int *baseset = 0; if (pcbsd.action != NAS1) { if (nsets == 0) { sderr(120, (UNCH *)0, (UNCH *)0); return FAIL; } break; } if (!matches(tbuf, kbaseset)) { if (nsets == 0) { sderr(118, tbuf+1, kbaseset); return FAIL; } break; } nsets++; MEMZERO((UNIV)&fpi, FPISZ); if (sdparm(tbuf, &pcblitv) != LIT1) { sderr(123, (UNCH *)0, (UNCH *)0); return FAIL; } fpi.fpipubis = tbuf; /* Give a warning if it is not a CHARSET fpi. */ if (parsefpi(&fpi)) sderr(E_FORMAL, (UNCH *)0, (UNCH *)0); else if (fpi.fpic != FPICHARS) sderr(E_BADCLASS, kcharset, (UNCH *)0); else { fpi.fpipubis[fpi.fpil + fpi.fpill] = '\0'; baseset = (int *)pmaplookup(charset_map, (char *)fpi.fpipubis + fpi.fpil); if (!baseset) sderr(E_UNKNOWNSET, fpi.fpipubis + fpi.fpil, (UNCH *)0); } if (sdname(tbuf, kdescset) == FAIL) return FAIL; nchars = 0; for (;;) { long start, count; long basenum; if (sdparm(tbuf, 0) != NUM1) break; start = atol((char *)tbuf); if (sdparm(tbuf, 0) != NUM1) { sderr(E_XNUM, (UNCH *)0, (UNCH *)0); return FAIL; } count = atol((char *)tbuf); switch (sdparm(tbuf, &pcblitv)) { case NUM1: basenum = atol((char *)tbuf); break; case LIT1: basenum = UNKNOWN; break; case NAS1: if (matches(tbuf, kunused)) { basenum = UNUSED; break; } /* fall through */ default: sderr(E_CHARDESC, ltous(start), (UNCH *)0); return FAIL; } if (start + count > 256) sderr(E_CHARRANGE, (UNCH *)0, (UNCH *)0); else { int i; int lim = (int)start + count; for (i = (int)start; i < lim; i++) { if (status[i] != UNDESC) sderr(E_CHARDUP, ltous((long)i), (UNCH *)0); else if (basenum == UNUSED || basenum == UNKNOWN) status[i] = (int)basenum; else if (baseset == 0) status[i] = UNKNOWN_SET; else { int n = basenum + (i - start); if (n < 0 || n > 255) sderr(E_CHARRANGE, (UNCH *)0, (UNCH *)0); else if (baseset[n] == UNUSED) sderr(E_BADBASECHAR, ltous((long)n), (UNCH *)0); else status[i] = baseset[n]; } } } nchars++; } if (nchars == 0) { sderr(E_XNUM, (UNCH *)0, (UNCH *)0); return FAIL; } } return SUCCESS; } /* Parse the CAPACITY section. Uses one token lookahead. */ static int sdcapacity(tbuf) UNCH *tbuf; { int ncap; if (sdckname(tbuf, kcapacity) == FAIL) return FAIL; if (sdparm(tbuf, 0) != NAS1) { sderr(120, (UNCH *)0, (UNCH *)0); return FAIL; } if (matches(tbuf, kpublic)) return sdpubcapacity(tbuf); if (!matches(tbuf, ksgmlref)) { sderr(E_CAPACITY, tbuf+1, (UNCH *)0); return FAIL; } memcpy((UNIV)sd.capacity, (UNIV)refcapset, sizeof(sd.capacity)); ncap = 0; for (;;) { int capno = -1; int i; if (sdparm(tbuf, 0) != NAS1) break; for (i = 0; i < SIZEOF(captab); i++) if (matches(tbuf, captab[i])) { capno = i; break; } if (capno < 0) break; if (sdparm(tbuf, 0) != NUM1) { sderr(E_XNUM, (UNCH *)0, (UNCH *)0); return FAIL; } sd.capacity[capno] = atol((char *)tbuf); ncap++; } if (ncap == 0) { sderr(E_CAPMISSING, (UNCH *)0, (UNCH *)0); return FAIL; } return SUCCESS; } /* Parse a CAPACITY section that started with PUBLIC. Must do one token lookahead, since sdcapacity() also does. */ static int sdpubcapacity(tbuf) UNCH *tbuf; { UNIV ptr; if (sdparm(tbuf, &pcblitv) != LIT1) { sderr(123, (UNCH *)0, (UNCH *)0); return FAIL; } sdfixstandard(tbuf); ptr = pmaplookup(capset_map, (char *)tbuf); if (!ptr) sderr(E_CAPSET, tbuf, (UNCH *)0); else memcpy((UNIV)sd.capacity, (UNIV)ptr, sizeof(sd.capacity)); (void)sdparm(tbuf, 0); return SUCCESS; } /* Parse the SCOPE section. Uses no lookahead. */ static int sdscope(tbuf) UNCH *tbuf; { if (sdckname(tbuf, kscope) == FAIL) return FAIL; if (sdparm(tbuf, 0) != NAS1) { sderr(120, (UNCH *)0, (UNCH *)0); return FAIL; } if (matches(tbuf, kdocument)) ; else if (matches(tbuf, kinstance)) sderr(E_INSTANCE, (UNCH *)0, (UNCH *)0); else { sderr(E_SCOPE, tbuf+1, (UNCH *)0); return FAIL; } return SUCCESS; } /* Parse the SYNTAX section. Uses one token lookahead. */ static int sdsyntax(tbuf) UNCH *tbuf; { if (sdname(tbuf, ksyntax) == FAIL) return FAIL; if (sdparm(tbuf, 0) != NAS1) { sderr(120, (UNCH *)0, (UNCH *)0); return FAIL; } if (matches(tbuf, kpublic)) return sdpubsyntax(tbuf); return sdxsyntax(tbuf); } /* Parse the SYNTAX section which starts with PUBLIC. Uses one token lookahead. */ static int sdpubsyntax(tbuf) UNCH *tbuf; { int nswitches; if (sdparm(tbuf, &pcblitv) != LIT1) return FAIL; sdfixstandard(tbuf); if (ustrcmp(tbuf, CORE_SYNTAX) == 0) sd.shortref = 0; else if (ustrcmp(tbuf, REFERENCE_SYNTAX) == 0) sd.shortref = 1; else sderr(E_SYNTAX, tbuf, (UNCH *)0); if (sdparm(tbuf, 0) != NAS1) return SUCCESS; if (!matches(tbuf, kswitches)) return SUCCESS; nswitches = 0; for (;;) { int errsw = 0; if (sdparm(tbuf, 0) != NUM1) break; if (atol((char *)tbuf) > 255) { sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0); errsw = 1; } if (sdparm(tbuf, 0) != NUM1) { sderr(E_XNUM, (UNCH *)0, (UNCH *)0); return FAIL; } if (!errsw) { if (atol((char *)tbuf) > 255) sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0); } nswitches++; } if (nswitches == 0) { sderr(E_XNUM, (UNCH *)0, (UNCH *)0); return FAIL; } sderr(E_SWITCHES, (UNCH *)0, (UNCH *)0); return SUCCESS; } /* Parse an explicit concrete syntax. Uses one token lookahead. */ static int sdxsyntax(tbuf) UNCH *tbuf; { static int (*section[]) P((UNCH *)) = { sdshunchar, sdsynref, sdfunction, sdnaming, sddelim, sdnames, sdquantity, }; int i; for (i = 0; i < SIZEOF(section); i++) if ((*section[i])(tbuf) == FAIL) return FAIL; return SUCCESS; } /* Parse the SHUNCHAR section. Uses one token lookahead. */ static int sdshunchar(tbuf) UNCH *tbuf; { int i; for (i = 0; i < 256; i++) char_flags[i] &= ~CHAR_SHUNNED; if (sdckname(tbuf, kshunchar) == FAIL) return FAIL; if (sdparm(tbuf, 0) == NAS1) { if (matches(tbuf, knone)) { (void)sdparm(tbuf, 0); return SUCCESS; } if (matches(tbuf, kcontrols)) { for (i = 0; i < 256; i++) if (ISASCII(i) && iscntrl(i)) char_flags[i] |= CHAR_SHUNNED; if (sdparm(tbuf, 0) != NUM1) return SUCCESS; } } if (pcbsd.action != NUM1) { sderr(E_XNUM, (UNCH *)0, (UNCH *)0); return FAIL; } do { long n = atol((char *)tbuf); if (n > 255) sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0); else char_flags[(int)n] |= CHAR_SHUNNED; } while (sdparm(tbuf, 0) == NUM1); return SUCCESS; } /* Parse the syntax reference character set. Uses one token lookahead. */ static int sdsynref(tbuf) UNCH *tbuf; { return sdcsdesc(tbuf, synrefcharset); } /* Translate a character number from the syntax reference character set to the system character set. If it can't be done, give an error message and return -1. */ static int sdtranscharnum(tbuf) UNCH *tbuf; { long n = atol((char *)tbuf); if (n > 255) { sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0); return -1; } return sdtranschar((int)n); } static int sdtranschar(n) int n; { int ch = synrefcharset[n]; if (ch >= 0) return ch; switch (ch) { case UNUSED: sderr(E_SYNREFUNUSED, ltous((long)n), (UNCH *)0); break; case UNDESC: sderr(E_SYNREFUNDESC, ltous((long)n), (UNCH *)0); break; case UNKNOWN: sderr(E_SYNREFUNKNOWN, ltous((long)n), (UNCH *)0); break; case UNKNOWN_SET: sderr(E_SYNREFUNKNOWNSET, ltous((long)n), (UNCH *)0); break; default: abort(); } return -1; } /* Parse the function section. Uses two tokens lookahead. "NAMING" could be a function name. */ static int sdfunction(tbuf) UNCH *tbuf; { static UNCH *fun[] = { kre, krs, kspace }; static int funval[] = { RECHAR, RSCHAR, ' ' }; int i; int had_tab = 0; int changed = 0; /* attempted to change reference syntax */ if (sdckname(tbuf, kfunction) == FAIL) return FAIL; for (i = 0; i < SIZEOF(fun); i++) { int ch; if (sdname(tbuf, fun[i]) == FAIL) return FAIL; if (sdparm(tbuf, 0) != NUM1) { sderr(E_XNUM, (UNCH *)0, (UNCH *)0); return FAIL; } ch = sdtranscharnum(tbuf); if (ch >= 0 && ch != funval[i]) changed = 1; } for (;;) { int tabsw = 0; int namingsw = 0; if (sdparm(tbuf, 0) != NAS1) { sderr(120, (UNCH *)0, (UNCH *)0); return FAIL; } if (matches(tbuf, (UNCH *)"TAB")) { tabsw = 1; if (had_tab) sderr(E_FUNDUP, (UNCH *)0, (UNCH *)0); } else { for (i = 0; i < SIZEOF(fun); i++) if (matches(tbuf, fun[i])) sderr(E_BADFUN, fun[i], (UNCH *)0); if (matches(tbuf, knaming)) namingsw = 1; else changed = 1; } if (sdparm(tbuf, 0) != NAS1) { sderr(120, (UNCH *)0, (UNCH *)0); return FAIL; } if (namingsw) { if (matches(tbuf, klcnmstrt)) break; changed = 1; } if (sdparm(tbuf, 0) != NUM1) { sderr(E_XNUM, (UNCH *)0, (UNCH *)0); return FAIL; } if (tabsw && !had_tab) { int ch = sdtranscharnum(tbuf); if (ch >= 0 && ch != TABCHAR) changed = 1; had_tab = 1; } } if (!had_tab) changed = 1; if (changed) sderr(E_FUNCHAR, (UNCH *)0, (UNCH *)0); return SUCCESS; } /* Parse the NAMING section. Uses no lookahead. */ static int sdnaming(tbuf) UNCH *tbuf; { int i; int bad = 0; static UNCH *classes[] = { klcnmstrt, kucnmstrt, klcnmchar, kucnmchar }; static UNCH *types[] = { kgeneral, kentity }; #define NCLASSES SIZEOF(classes) int bufsize = 4; /* allocated size of buf */ UNCH *buf = (UNCH *)rmalloc(bufsize); /* holds characters in naming classes */ int bufi = 0; /* next index into buf */ int start[NCLASSES]; /* index of first character for each class */ int count[NCLASSES]; /* number of characters for each class */ for (i = 0; i < NCLASSES; i++) { UNCH *s; if (sdckname(tbuf, classes[i]) == FAIL) { frem((UNIV)buf); return FAIL; } if (sdparm(tbuf, &pcblitp) != LIT1) { sderr(123, (UNCH *)0, (UNCH *)0); frem((UNIV)buf); return FAIL; } start[i] = bufi; for (s = tbuf; *s; s++) { int c = *s; if (c == DELNONCH) { c = UNSHIFTNON(*s); s++; } c = sdtranschar(c); if (c < 0) bad = 1; else if ((char_flags[c] & (CHAR_SIGNIFICANT | CHAR_MAGIC)) && c != '.' && c != '-') { int class = lextoke[c]; if (class == SEP || class == SP || class == NMC || class == NMS || class == NU) sderr(E_NMBAD, ltous((long)c), (UNCH *)0); else sderr(E_NMUNSUP, ltous((long)c), (UNCH *)0); bad = 1; } if (bufi >= bufsize) buf = (UNCH *)rrealloc((UNIV)buf, bufsize *= 2); buf[bufi++] = c; } count[i] = bufi - start[i]; (void)sdparm(tbuf, 0); } if (!bad && count[0] != count[1]) { sderr(E_NMSTRTCNT, (UNCH *)0, (UNCH *)0); bad = 1; } if (!bad && count[2] != count[3]) { sderr(E_NMCHARCNT, (UNCH *)0, (UNCH *)0); bad = 1; } if (!bad) { nlextoke = (UNCH *)rmalloc(256); memcpy((UNIV)nlextoke, lextoke, 256); nlextoke['.'] = nlextoke['-'] = INV; nlextran = (UNCH *)rmalloc(256); memcpy((UNIV)nlextran, lextran, 256); for (i = 0; i < count[0]; i++) { UNCH lc = buf[start[0] + i]; UNCH uc = buf[start[1] + i]; nlextoke[lc] = NMS; nlextoke[uc] = NMS; nlextran[lc] = uc; } for (i = 0; i < count[2]; i++) { UNCH lc = buf[start[2] + i]; UNCH uc = buf[start[3] + i]; if (nlextoke[lc] == NMS) { sderr(E_NMDUP, ltous((long)lc), (UNCH *)0); bad = 1; } else if (nlextoke[uc] == NMS) { sderr(E_NMDUP, ltous((long)uc), (UNCH *)0); bad = 1; } else { nlextoke[lc] = NMC; nlextoke[uc] = NMC; nlextran[lc] = uc; } } if (nlextoke['-'] != NMC) { sderr(E_NMMINUS, (UNCH *)0, (UNCH *)0); bad = 1; } if (bad) { if (nlextoke) { frem((UNIV)nlextoke); nlextoke = 0; } if (nlextran) { frem((UNIV)nlextran); nlextran = 0; } } } frem((UNIV)buf); if (sdckname(tbuf, knamecase) == FAIL) return FAIL; for (i = 0; i < SIZEOF(types); ++i) { if (sdname(tbuf, types[i]) == FAIL) return FAIL; if (sdparm(tbuf, 0) != NAS1) { sderr(120, (UNCH *)0, (UNCH *)0); return FAIL; } if (matches(tbuf, kyes)) sd.namecase[i] = 1; else if (matches(tbuf, kno)) sd.namecase[i] = 0; else { sderr(E_YESNO, tbuf+1, (UNCH *)0); return FAIL; } } return SUCCESS; } /* Parse the DELIM section. Uses one token lookahead. */ static int sddelim(tbuf) UNCH *tbuf; { int changed = 0; if (sdname(tbuf, kdelim) == FAIL || sdname(tbuf, kgeneral) == FAIL || sdname(tbuf, ksgmlref) == FAIL) return FAIL; for (;;) { if (sdparm(tbuf, 0) != NAS1) { sderr(120, (UNCH *)0, (UNCH *)0); return FAIL; } if (matches(tbuf, kshortref)) break; if (sdparm(tbuf, &pcblitp) != LIT1) { sderr(123, (UNCH *)0, (UNCH *)0); return FAIL; } changed = 1; } if (changed) { sderr(E_GENDELIM, (UNCH *)0,(UNCH *)0); changed = 0; } if (sdparm(tbuf, 0) != NAS1) { sderr(120, (UNCH *)0, (UNCH *)0); return FAIL; } if (matches(tbuf, ksgmlref)) sd.shortref = 1; else if (matches(tbuf, knone)) sd.shortref = 0; else { sderr(118, tbuf+1, ksgmlref); /* probably they forgot SGMLREF */ return FAIL; } while (sdparm(tbuf, &pcblitp) == LIT1) changed = 1; if (changed) sderr(E_SRDELIM, (UNCH *)0, (UNCH *)0); return SUCCESS; } /* Parse the NAMES section. Uses one token lookahead. */ static int sdnames(tbuf) UNCH *tbuf; { int i; if (sdckname(tbuf, knames) == FAIL) return FAIL; if (sdname(tbuf, ksgmlref) == FAIL) return FAIL; while (sdparm(tbuf, 0) == NAS1) { int j; if (matches(tbuf, kquantity)) break; for (i = 0; i < NKEYS; i++) if (matches(tbuf, key[i])) break; if (i >= NKEYS) { sderr(E_BADKEY, tbuf+1, (UNCH *)0); return FAIL; } if (sdparm(tbuf, &pcblitp) != NAS1) { sderr(120, (UNCH *)0, (UNCH *)0); return FAIL; } if (!newkey) { newkey = (UNCH (*)[REFNAMELEN+1])rmalloc((REFNAMELEN+1)*NKEYS); MEMZERO((UNIV)newkey, (REFNAMELEN+1)*NKEYS); } for (j = 0; j < NKEYS; j++) { if (matches(tbuf, key[j])) { sderr(E_REFNAME, tbuf + 1, (UNCH *)0); break; } if (matches(tbuf, newkey[j])) { sderr(E_DUPNAME, tbuf + 1, (UNCH *)0); break; } } if (j >= NKEYS) ustrcpy(newkey[i], tbuf + 1); } /* Now install the new keys. */ if (newkey) { for (i = 0; i < NKEYS; i++) if (newkey[i][0] != '\0') { UNCH temp[REFNAMELEN + 1]; ustrcpy(temp, key[i]); ustrcpy(key[i], newkey[i]); ustrcpy(newkey[i], temp); } } return SUCCESS; } /* Parse the QUANTITY section. Uses one token lookahead. */ static int sdquantity(tbuf) UNCH *tbuf; { int quantity[NQUANTITY]; int i; for (i = 0; i < NQUANTITY; i++) quantity[i] = -1; if (sdckname(tbuf, kquantity) == FAIL) return FAIL; if (sdname(tbuf, ksgmlref) == FAIL) return FAIL; while (sdparm(tbuf, 0) == NAS1 && !matches(tbuf, kfeatures)) { long n; for (i = 0; i < SIZEOF(quantity_names); i++) if (matches(tbuf, quantity_names[i])) break; if (i >= SIZEOF(quantity_names)) { sderr(E_BADQUANTITY, tbuf + 1, (UNCH *)0); return FAIL; } if (sdparm(tbuf, 0) != NUM1) { sderr(E_XNUM, (UNCH *)0, (UNCH *)0); return FAIL; } n = atol((char *)tbuf); if (n < sd.quantity[i]) sderr(E_QUANTITY, (UNCH *)quantity_names[i], ltous((long)sd.quantity[i])); else if (n > max_quantity[i]) { sderr(E_QTOOBIG, (UNCH *)quantity_names[i], ltous((long)max_quantity[i])); quantity[i] = max_quantity[i]; } else quantity[i] = (int)n; } for (i = 0; i < NQUANTITY; i++) if (quantity[i] > 0) { sd.quantity[i] = quantity[i]; if (!quantity_changed) quantity_changed = (char *)rmalloc(NQUANTITY); quantity_changed[i] = 1; } return SUCCESS; } /* Parse the FEATURES section. Uses no lookahead. */ static int sdfeatures(tbuf) UNCH *tbuf; { static struct { UNCH *name; UNCH argtype; /* 0 = no argument, 1 = boolean, 2 = numeric */ UNIV valp; /* UNCH * if boolean, long * if numeric. */ } features[] = { { kminimize, 0, 0 }, { kdatatag, 1, 0 }, { komittag, 1, (UNIV)&sd.omittag }, { krank, 1, 0 }, { kshorttag, 1, (UNIV)&sd.shorttag }, { klink, 0, 0 }, { ksimple, 2, 0 }, { kimplicit, 1, 0 }, { kexplicit, 2, 0 }, { kother, 0, 0 }, { kconcur, 2, 0 }, { ksubdoc, 2, (UNIV)&sd.subdoc }, { kformal, 1, (UNIV)&sd.formal }, }; int i; if (sdckname(tbuf, kfeatures) == FAIL) return FAIL; for (i = 0; i < SIZEOF(features); i++) { if (sdname(tbuf, features[i].name) == FAIL) return FAIL; if (features[i].argtype > 0) { long n; if (sdparm(tbuf, 0) != NAS1) { sderr(120, (UNCH *)0, (UNCH *)0); return FAIL; } if (matches(tbuf, kyes)) { if (features[i].argtype > 1) { if (sdparm(tbuf, 0) != NUM1) { sderr(E_XNUM, (UNCH *)0, (UNCH *)0); return FAIL; } n = atol((char *)tbuf); if (n == 0) sderr(E_ZEROFEATURE, features[i].name, (UNCH *)0); } else n = 1; } else if (matches(tbuf, kno)) n = 0; else { sderr(E_YESNO, tbuf+1, (UNCH *)0); return FAIL; } if (features[i].valp == 0) { if (n > 0) sderr(E_NOTSUPPORTED, features[i].name, (UNCH *)0); } else if (features[i].argtype > 1) *(long *)features[i].valp = n; else *(UNCH *)features[i].valp = (UNCH)n; } } if (!sd.shorttag) noemptytag(); return SUCCESS; } /* Parse the APPINFO section. Uses no lookahead. */ static int sdappinfo(tbuf) UNCH *tbuf; { if (sdname(tbuf, kappinfo) == FAIL) return FAIL; switch (sdparm(tbuf, &pcblitv)) { case LIT1: appinfosw = 1; break; case NAS1: if (matches(tbuf, knone)) break; sderr(118, tbuf+1, knone); return FAIL; default: sderr(E_XNMLIT, knone, (UNCH *)0); return FAIL; } return SUCCESS; } /* Change a prefix of ISO 8879-1986 to ISO 8879:1986. Amendment 1 to the standard requires the latter. */ static VOID sdfixstandard(tbuf) UNCH *tbuf; { if (strncmp((char *)tbuf, "ISO 8879-1986", 13) == 0) { sderr(E_STANDARD, (UNCH *)0, (UNCH *)0); tbuf[8] = ':'; } } static int sdname(tbuf, key) UNCH *tbuf; UNCH *key; { if (sdparm(tbuf, 0) != NAS1) { sderr(120, (UNCH *)0, (UNCH *)0); return FAIL; } if (!matches(tbuf, key)) { sderr(118, tbuf+1, key); return FAIL; } return SUCCESS; } static int sdckname(tbuf, key) UNCH *tbuf; UNCH *key; { if (pcbsd.action != NAS1) { sderr(120, (UNCH *)0, (UNCH *)0); return FAIL; } if (!matches(tbuf, key)) { sderr(118, tbuf+1, key); return FAIL; } return SUCCESS; } /* Parse a SGML declaration parameter. If lpcb is NULL, pt must be REFNAMELEN+2 characters long, otherwise at least LITLEN+2 characters long. LPCB should be NULL if a literal is not allowed. */ static int sdparm(pt, lpcb) UNCH *pt; /* Token buffer. */ struct parse *lpcb; /* PCB for literal parse. */ { for (;;) { parse(&pcbsd); if (pcbsd.action != ISIG) break; sderr(E_SIGNIFICANT, (UNCH *)0, (UNCH *)0); } ++parmno; switch (pcbsd.action) { case LIT1: if (!lpcb) { sderr(E_BADLIT, (UNCH *)0, (UNCH *)0); REPEATCC; return pcbsd.action = INV_; } parselit(pt, lpcb, REFLITLEN, lex.d.lit); return pcbsd.action; case LIT2: if (!lpcb) { sderr(E_BADLIT, (UNCH *)0, (UNCH *)0); REPEATCC; return pcbsd.action = INV_; } parselit(pt, lpcb, REFLITLEN, lex.d.lita); return pcbsd.action = LIT1; case NAS1: parsenm(pt, 1); return pcbsd.action; case NUM1: parsetkn(pt, NU, REFNAMELEN); return pcbsd.action; } return pcbsd.action; } VOID sdinit() { int i; /* Shunned character numbers in the reference concrete syntax. */ static UNCH refshun[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, 255 }; UNCH **p; /* A character is magic if it is a non-SGML character used for some internal purpose in the parser. */ char_flags[EOS] |= CHAR_MAGIC; char_flags[EOBCHAR] |= CHAR_MAGIC; char_flags[EOFCHAR] |= CHAR_MAGIC; char_flags[GENRECHAR] |= CHAR_MAGIC; char_flags[DELNONCH] |= CHAR_MAGIC; char_flags[DELCDATA] |= CHAR_MAGIC; char_flags[DELSDATA] |= CHAR_MAGIC; /* Figure out the significant SGML characters. */ for (p = lextabs; *p; p++) { UNCH datclass = (*p)[CANON_DATACHAR]; UNCH nonclass = (*p)[CANON_NONSGML]; for (i = 0; i < 256; i++) if (!(char_flags[i] & CHAR_MAGIC) && (*p)[i] != datclass && (*p)[i] != nonclass) char_flags[i] |= CHAR_SIGNIFICANT; } for (i = 0; i < SIZEOF(refshun); i++) char_flags[refshun[i]] |= CHAR_SHUNNED; for (i = 0; i < 256; i++) if (ISASCII(i) && iscntrl(i)) char_flags[i] |= CHAR_SHUNNED; bufsalloc(); } static VOID bufsalloc() { scbs = (struct source *)rmalloc((REFENTLVL+1)*sizeof(struct source)); tbuf = (UNCH *)rmalloc(REFATTSPLEN+REFLITLEN+1); /* entbuf is used for parsing numeric character references */ entbuf = (UNCH *)rmalloc(REFNAMELEN + 2); } static VOID bufsrealloc() { UNS size; if (ENTLVL != REFENTLVL) scbs = (struct source *)rrealloc((UNIV)scbs, (ENTLVL+1)*sizeof(struct source)); /* Calculate the size for tbuf. */ size = LITLEN + ATTSPLEN; if (PILEN > size) size = PILEN; if (BSEQLEN > size) size = BSEQLEN; if (size != REFATTSPLEN + REFLITLEN) tbuf = (UNCH *)rrealloc((UNIV)tbuf, size + 1); if (NAMELEN != REFNAMELEN) entbuf = (UNCH *)rrealloc((UNIV)entbuf, NAMELEN + 2); } /* Check that the non-SGML characters are compatible with the concrete syntax and munge the lexical tables accordingly. If IMPLIED is non-zero, then the SGML declaration was implied; in this case, don't give error messages about shunned characters not being declared non-SGML. Also make any changes that are required by the NAMING section. */ static VOID setlexical() { int i; UNCH **p; if (nlextoke) { /* Handle characters that were made significant by the NAMING section. */ for (i = 0; i < 256; i++) if (nlextoke[i] == NMC || nlextoke[i] == NMS) char_flags[i] |= CHAR_SIGNIFICANT; } for (i = 0; i < 256; i++) if (char_flags[i] & CHAR_SIGNIFICANT) { /* Significant SGML characters musn't be non-SGML. */ if (char_flags[i] & CHAR_NONSGML) { UNCH buf[2]; buf[0] = i; buf[1] = '\0'; sderr(E_NONSGML, buf, (UNCH *)0); char_flags[i] &= ~CHAR_NONSGML; } } else { /* Shunned characters that are not significant SGML characters must be non-SGML. */ if ((char_flags[i] & (CHAR_SHUNNED | CHAR_NONSGML)) == CHAR_SHUNNED) { sderr(E_SHUNNED, ltous((long)i), (UNCH *)0); char_flags[i] |= CHAR_NONSGML; } } /* Now munge the lexical tables. */ for (p = lextabs; *p; p++) { UNCH nonclass = (*p)[CANON_NONSGML]; UNCH datclass = (*p)[CANON_DATACHAR]; UNCH nmcclass = (*p)[CANON_NMC]; UNCH nmsclass = (*p)[CANON_NMS]; UNCH minclass = (*p)[CANON_MIN]; for (i = 0; i < 256; i++) { if (char_flags[i] & CHAR_NONSGML) { /* We already know that it's not significant. */ if (!(char_flags[i] & CHAR_MAGIC)) (*p)[i] = nonclass; } else { if (char_flags[i] & CHAR_MAGIC) { sderr(E_MUSTBENON, ltous((long)i), (UNCH *)0); } else if (!(char_flags[i] & CHAR_SIGNIFICANT)) (*p)[i] = datclass; else if (nlextoke /* This relies on the fact that lextoke occurs last in lextabs. */ && lextoke[i] != nlextoke[i]) { switch (nlextoke[i]) { case NMC: (*p)[i] = nmcclass; break; case NMS: (*p)[i] = nmsclass; break; case INV: /* This will happen if period is not a name character. */ (*p)[i] = minclass; break; default: abort(); } } } } } if (nlextran) { memcpy((UNIV)lextran, (UNIV)nlextran, 256); frem((UNIV)nlextran); } if (nlextoke) { frem((UNIV)nlextoke); nlextoke = 0; } } /* Munge parse tables so that empty start and end tags are not recognized. */ static VOID noemptytag() { static struct parse *pcbs[] = { &pcbconm, &pcbcone, &pcbconr, &pcbconc }; int i; for (i = 0; i < SIZEOF(pcbs); i++) { int maxclass, maxstate; int j, k, act; UNCH *plex = pcbs[i]->plex; UNCH **ptab = pcbs[i]->ptab; /* Figure out the maximum lexical class. */ maxclass = 0; for (j = 0; j < 256; j++) if (plex[j] > maxclass) maxclass = plex[j]; /* Now figure out the maximum state number and at the same time change actions. */ maxstate = 0; for (j = 0; j <= maxstate; j += 2) { for (k = 0; k <= maxclass; k++) if (ptab[j][k] > maxstate) maxstate = ptab[j][k]; /* If the '>' class has an empty start or end tag action, change it to the action that the NMC class has. */ act = ptab[j + 1][plex['>']]; if (act == NET_ || act == NST_) ptab[j + 1][plex['>']] = ptab[j + 1][plex['_']]; } } } /* Lookup the value of the entry in pmap PTR whose key is KEY. */ static UNIV pmaplookup(ptr, key) struct pmap *ptr; char *key; { for (; ptr->name; ptr++) if (strcmp(key, ptr->name) == 0) return ptr->value; return 0; } /* Return an ASCII representation of N. */ static UNCH *ltous(n) long n; { static char buf[sizeof(long)*3 + 2]; sprintf(buf, "%ld", n); return (UNCH *)buf; } VOID sgmlwrsd(fp) FILE *fp; { int i; int changed; char *p; char uc[256]; /* upper case characters (with different lower case characters) */ char lcletter[256]; /* LC letters: a-z */ fprintf(fp, " 0) fprintf(fp, "SUBDOC YES %ld ", sd.subdoc); else fprintf(fp, "SUBDOC NO "); fprintf(fp, "FORMAL %s\n", sd.formal ? "YES" : "NO"); fprintf(fp, "APPINFO NONE"); fprintf(fp, ">\n"); } /* Local Variables: c-indent-level: 5 c-continued-statement-offset: 5 c-brace-offset: -5 c-argdecl-indent: 0 c-label-offset: -5 End: */