1765 lines
44 KiB
C
1765 lines
44 KiB
C
/*
|
|
* CDE - Common Desktop Environment
|
|
*
|
|
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
|
|
*
|
|
* These libraries and programs are free software; you can
|
|
* redistribute them and/or modify them under the terms of the GNU
|
|
* Lesser General Public License as published by the Free Software
|
|
* Foundation; either version 2 of the License, or (at your option)
|
|
* any later version.
|
|
*
|
|
* These libraries and programs are distributed in the hope that
|
|
* they will be useful, but WITHOUT ANY WARRANTY; without even the
|
|
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
* PURPOSE. See the GNU Lesser General Public License for more
|
|
* details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with these libraries and programs; if not, write
|
|
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
|
|
* Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
/* $XConsortium: sgmldecl.c /main/3 1996/06/19 17:17:29 drk $ */
|
|
/* sgmldecl.c -
|
|
SGML declaration parsing.
|
|
|
|
Written by James Clark (jjc@jclark.com).
|
|
*/
|
|
|
|
#include "sgmlincl.h"
|
|
|
|
/* Symbolic names for the error numbers that are be generated only by
|
|
this module. */
|
|
|
|
#define E_STANDARD 163
|
|
#define E_SIGNIFICANT 164
|
|
#define E_BADLIT 165
|
|
#define E_SCOPE 166
|
|
#define E_XNUM 167
|
|
#define E_BADVERSION 168
|
|
#define E_NMUNSUP 169
|
|
#define E_XNMLIT 170
|
|
#define E_CHARDESC 171
|
|
#define E_CHARDUP 172
|
|
#define E_CHARRANGE 173
|
|
#define E_7BIT 174
|
|
#define E_CHARMISSING 175
|
|
#define E_SHUNNED 176
|
|
#define E_NONSGML 177
|
|
#define E_CAPSET 178
|
|
#define E_CAPMISSING 179
|
|
#define E_SYNTAX 180
|
|
#define E_CHARNUM 181
|
|
#define E_SWITCHES 182
|
|
#define E_INSTANCE 183
|
|
#define E_ZEROFEATURE 184
|
|
#define E_YESNO 185
|
|
#define E_CAPACITY 186
|
|
#define E_NOTSUPPORTED 187
|
|
#define E_FORMAL 189
|
|
#define E_BADCLASS 190
|
|
#define E_MUSTBENON 191
|
|
#define E_BADBASECHAR 199
|
|
#define E_SYNREFUNUSED 200
|
|
#define E_SYNREFUNDESC 201
|
|
#define E_SYNREFUNKNOWN 202
|
|
#define E_SYNREFUNKNOWNSET 203
|
|
#define E_FUNDUP 204
|
|
#define E_BADFUN 205
|
|
#define E_FUNCHAR 206
|
|
#define E_GENDELIM 207
|
|
#define E_SRDELIM 208
|
|
#define E_BADKEY 209
|
|
#define E_BADQUANTITY 210
|
|
#define E_BADNAME 211
|
|
#define E_REFNAME 212
|
|
#define E_DUPNAME 213
|
|
#define E_QUANTITY 214
|
|
#define E_QTOOBIG 215
|
|
#define E_NMSTRTCNT 219
|
|
#define E_NMCHARCNT 220
|
|
#define E_NMDUP 221
|
|
#define E_NMBAD 222
|
|
#define E_NMMINUS 223
|
|
#define E_UNKNOWNSET 227
|
|
|
|
#define CANON_NMC '.' /* Canonical name character. */
|
|
#define CANON_NMS 'A' /* Canonical name start character. */
|
|
#define CANON_MIN ':' /* Canonical minimum data character. */
|
|
|
|
#define SUCCESS 1
|
|
#define FAIL 0
|
|
#define SIZEOF(v) (sizeof(v)/sizeof(v[0]))
|
|
#define matches(tok, str) (ustrcmp((tok)+1, (str)) == 0)
|
|
|
|
static UNCH standard[] = "ISO 8879:1986";
|
|
|
|
#define REFERENCE_SYNTAX "ISO 8879:1986//SYNTAX Reference//EN"
|
|
#define CORE_SYNTAX "ISO 8879:1986//SYNTAX Core//EN"
|
|
|
|
static UNCH (*newkey)[REFNAMELEN+1] = 0;
|
|
|
|
struct pmap {
|
|
char *name;
|
|
UNIV value;
|
|
};
|
|
|
|
/* The reference capacity set. */
|
|
#define REFCAPSET \
|
|
{ 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, \
|
|
35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L }
|
|
|
|
long refcapset[NCAPACITY] = REFCAPSET;
|
|
|
|
/* A pmap of known capacity sets. */
|
|
|
|
static struct pmap capset_map[] = {
|
|
{ "ISO 8879:1986//CAPACITY Reference//EN", (UNIV)refcapset },
|
|
{ 0 },
|
|
};
|
|
|
|
/* Table of capacity names. Must match *CAP in sgmldecl.h. */
|
|
|
|
char *captab[] = {
|
|
"TOTALCAP",
|
|
"ENTCAP",
|
|
"ENTCHCAP",
|
|
"ELEMCAP",
|
|
"GRPCAP",
|
|
"EXGRPCAP",
|
|
"EXNMCAP",
|
|
"ATTCAP",
|
|
"ATTCHCAP",
|
|
"AVGRPCAP",
|
|
"NOTCAP",
|
|
"NOTCHCAP",
|
|
"IDCAP",
|
|
"IDREFCAP",
|
|
"MAPCAP",
|
|
"LKSETCAP",
|
|
"LKNMCAP",
|
|
};
|
|
|
|
/* The default SGML declaration. */
|
|
#define MAXNUMBER 99999999L
|
|
|
|
/* Reference quantity set */
|
|
|
|
#define REFATTCNT 40
|
|
#define REFATTSPLEN 960
|
|
#define REFBSEQLEN 960
|
|
#define REFDTAGLEN 16
|
|
#define REFDTEMPLEN 16
|
|
#define REFENTLVL 16
|
|
#define REFGRPCNT 32
|
|
#define REFGRPGTCNT 96
|
|
#define REFGRPLVL 16
|
|
#define REFNORMSEP 2
|
|
#define REFPILEN 240
|
|
#define REFTAGLEN 960
|
|
#define REFTAGLVL 24
|
|
|
|
#define ALLOC_MAX 65534
|
|
|
|
#define BIGINT 30000
|
|
|
|
#define MAXATTCNT ((ALLOC_MAX/sizeof(struct ad)) - 2)
|
|
#define MAXATTSPLEN BIGINT
|
|
#define MAXBSEQLEN BIGINT
|
|
#define MAXDTAGLEN 16
|
|
#define MAXDTEMPLEN 16
|
|
#define MAXENTLVL ((ALLOC_MAX/sizeof(struct source)) - 1)
|
|
#define MAXGRPCNT MAXGRPGTCNT
|
|
/* Must be between 96 and 253 */
|
|
#define MAXGRPGTCNT 253
|
|
#define MAXGRPLVL MAXGRPGTCNT
|
|
#define MAXLITLEN BIGINT
|
|
/* This guarantees that NAMELEN < LITLEN (ie there's always space for a name
|
|
in a buffer intended for a literal.) */
|
|
#define MAXNAMELEN (REFLITLEN - 1)
|
|
#define MAXNORMSEP 2
|
|
#define MAXPILEN BIGINT
|
|
#define MAXTAGLEN BIGINT
|
|
#define MAXTAGLVL ((ALLOC_MAX/sizeof(struct tag)) - 1)
|
|
|
|
/* Table of quantity names. Must match Q* in sgmldecl.h. */
|
|
|
|
static char *quantity_names[] = {
|
|
"ATTCNT",
|
|
"ATTSPLEN",
|
|
"BSEQLEN",
|
|
"DTAGLEN",
|
|
"DTEMPLEN",
|
|
"ENTLVL",
|
|
"GRPCNT",
|
|
"GRPGTCNT",
|
|
"GRPLVL",
|
|
"LITLEN",
|
|
"NAMELEN",
|
|
"NORMSEP",
|
|
"PILEN",
|
|
"TAGLEN",
|
|
"TAGLVL",
|
|
};
|
|
|
|
static int max_quantity[] = {
|
|
MAXATTCNT,
|
|
MAXATTSPLEN,
|
|
MAXBSEQLEN,
|
|
MAXDTAGLEN,
|
|
MAXDTEMPLEN,
|
|
MAXENTLVL,
|
|
MAXGRPCNT,
|
|
MAXGRPGTCNT,
|
|
MAXGRPLVL,
|
|
MAXLITLEN,
|
|
MAXNAMELEN,
|
|
MAXNORMSEP,
|
|
MAXPILEN,
|
|
MAXTAGLEN,
|
|
MAXTAGLVL,
|
|
};
|
|
|
|
static char *quantity_changed;
|
|
|
|
/* Non-zero means the APPINFO parameter was not NONE. */
|
|
static int appinfosw = 0;
|
|
|
|
struct sgmldecl sd = {
|
|
REFCAPSET, /* capacity */
|
|
#ifdef SUPPORT_SUBDOC
|
|
MAXNUMBER, /* subdoc */
|
|
#else /* not SUPPORT_SUBDOC */
|
|
0, /* subdoc */
|
|
#endif /* not SUPPORT_SUBDOC */
|
|
1, /* formal */
|
|
1, /* omittag */
|
|
1, /* shorttag */
|
|
1, /* shortref */
|
|
{ 1, 0 }, /* general/entity name case translation */
|
|
{ /* reference quantity set */
|
|
REFATTCNT,
|
|
REFATTSPLEN,
|
|
REFBSEQLEN,
|
|
REFDTAGLEN,
|
|
REFDTEMPLEN,
|
|
REFENTLVL,
|
|
REFGRPCNT,
|
|
REFGRPGTCNT,
|
|
REFGRPLVL,
|
|
REFLITLEN,
|
|
REFNAMELEN,
|
|
REFNORMSEP,
|
|
REFPILEN,
|
|
REFTAGLEN,
|
|
REFTAGLVL,
|
|
},
|
|
};
|
|
|
|
static int systemcharset[] = {
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
|
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
|
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
|
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
|
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
|
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
|
|
96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
|
|
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
|
|
128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
|
|
144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
|
|
160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
|
|
176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
|
|
192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
|
|
208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
|
|
224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
|
|
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
|
|
};
|
|
|
|
static struct pmap charset_map[] = {
|
|
{ "ESC 2/5 4/0", (UNIV)asciicharset }, /* ISO 646 IRV */
|
|
{ "ESC 2/8 4/2", (UNIV)asciicharset }, /* ISO Registration Number 6, ASCII */
|
|
{ SYSTEM_CHARSET_DESIGNATING_SEQUENCE, (UNIV)systemcharset },
|
|
/* system character set */
|
|
{ 0 }
|
|
};
|
|
|
|
static int synrefcharset[256]; /* the syntax reference character set */
|
|
|
|
#define CHAR_NONSGML 01
|
|
#define CHAR_SIGNIFICANT 02
|
|
#define CHAR_MAGIC 04
|
|
#define CHAR_SHUNNED 010
|
|
|
|
static UNCH char_flags[256];
|
|
static int done_nonsgml = 0;
|
|
static UNCH *nlextoke = 0; /* new lextoke */
|
|
static UNCH *nlextran = 0; /* new lextran */
|
|
|
|
|
|
static UNCH kcharset[] = "CHARSET";
|
|
static UNCH kbaseset[] = "BASESET";
|
|
static UNCH kdescset[] = "DESCSET";
|
|
static UNCH kunused[] = "UNUSED";
|
|
static UNCH kcapacity[] = "CAPACITY";
|
|
static UNCH kpublic[] = "PUBLIC";
|
|
static UNCH ksgmlref[] = "SGMLREF";
|
|
static UNCH kscope[] = "SCOPE";
|
|
static UNCH kdocument[] = "DOCUMENT";
|
|
static UNCH kinstance[] = "INSTANCE";
|
|
static UNCH ksyntax[] = "SYNTAX";
|
|
static UNCH kswitches[] = "SWITCHES";
|
|
static UNCH kfeatures[] = "FEATURES";
|
|
static UNCH kminimize[] = "MINIMIZE";
|
|
static UNCH kdatatag[] = "DATATAG";
|
|
static UNCH komittag[] = "OMITTAG";
|
|
static UNCH krank[] = "RANK";
|
|
static UNCH kshorttag[] = "SHORTTAG";
|
|
static UNCH klink[] = "LINK";
|
|
static UNCH ksimple[] = "SIMPLE";
|
|
static UNCH kimplicit[] = "IMPLICIT";
|
|
static UNCH kexplicit[] = "EXPLICIT";
|
|
static UNCH kother[] = "OTHER";
|
|
static UNCH kconcur[] = "CONCUR";
|
|
static UNCH ksubdoc[] = "SUBDOC";
|
|
static UNCH kformal[] = "FORMAL";
|
|
static UNCH kyes[] = "YES";
|
|
static UNCH kno[] = "NO";
|
|
static UNCH kappinfo[] = "APPINFO";
|
|
static UNCH knone[] = "NONE";
|
|
static UNCH kshunchar[] = "SHUNCHAR";
|
|
static UNCH kcontrols[] = "CONTROLS";
|
|
static UNCH kfunction[] = "FUNCTION";
|
|
static UNCH krs[] = "RS";
|
|
static UNCH kre[] = "RE";
|
|
static UNCH kspace[] = "SPACE";
|
|
static UNCH knaming[] = "NAMING";
|
|
static UNCH klcnmstrt[] = "LCNMSTRT";
|
|
static UNCH kucnmstrt[] = "UCNMSTRT";
|
|
static UNCH klcnmchar[] = "LCNMCHAR";
|
|
static UNCH kucnmchar[] = "UCNMCHAR";
|
|
static UNCH knamecase[] = "NAMECASE";
|
|
static UNCH kdelim[] = "DELIM";
|
|
static UNCH kgeneral[] = "GENERAL";
|
|
static UNCH kentity[] = "ENTITY";
|
|
static UNCH kshortref[] = "SHORTREF";
|
|
static UNCH knames[] = "NAMES";
|
|
static UNCH kquantity[] = "QUANTITY";
|
|
|
|
#define sderr mderr
|
|
|
|
static UNIV pmaplookup P((struct pmap *, char *));
|
|
static UNCH *ltous P((long));
|
|
static VOID sdfixstandard P((UNCH *));
|
|
static int sdparm P((UNCH *, struct parse *));
|
|
static int sdname P((UNCH *, UNCH *));
|
|
static int sdckname P((UNCH *, UNCH *));
|
|
static int sdversion P((UNCH *));
|
|
static int sdcharset P((UNCH *));
|
|
static int sdcsdesc P((UNCH *, int *));
|
|
static int sdpubcapacity P((UNCH *));
|
|
static int sdcapacity P((UNCH *));
|
|
static int sdscope P((UNCH *));
|
|
static VOID setlexical P((void));
|
|
static VOID noemptytag P((void));
|
|
static int sdpubsyntax P((UNCH *));
|
|
static int sdsyntax P((UNCH *));
|
|
static int sdxsyntax P((UNCH *));
|
|
static int sdtranscharnum P((UNCH *));
|
|
static int sdtranschar P((int));
|
|
static int sdshunchar P((UNCH *));
|
|
static int sdsynref P((UNCH *));
|
|
static int sdfunction P((UNCH *));
|
|
static int sdnaming P((UNCH *));
|
|
static int sddelim P((UNCH *));
|
|
static int sdnames P((UNCH *));
|
|
static int sdquantity P((UNCH *));
|
|
static int sdfeatures P((UNCH *));
|
|
static int sdappinfo P((UNCH *));
|
|
|
|
static VOID bufsalloc P((void));
|
|
static VOID bufsrealloc P((void));
|
|
|
|
/* Parse the SGML declaration. Return non-zero if there was some appinfo. */
|
|
|
|
int sgmldecl()
|
|
{
|
|
int i;
|
|
int errsw = 0;
|
|
UNCH endbuf[REFNAMELEN+2]; /* buffer for parsing terminating > */
|
|
static int (*section[]) P((UNCH *)) = {
|
|
sdversion,
|
|
sdcharset,
|
|
sdcapacity,
|
|
sdscope,
|
|
sdsyntax,
|
|
sdfeatures,
|
|
sdappinfo,
|
|
};
|
|
/* These are needed if we use mderr. */
|
|
parmno = 0;
|
|
mdname = sgmlkey;
|
|
subdcl = NULL;
|
|
for (i = 0; i < SIZEOF(section); i++)
|
|
if ((*section[i])(tbuf) == FAIL) {
|
|
errsw = 1;
|
|
break;
|
|
}
|
|
if (!errsw)
|
|
setlexical();
|
|
bufsrealloc();
|
|
/* Parse the >. Don't overwrite the appinfo. */
|
|
if (!errsw)
|
|
sdparm(endbuf, 0);
|
|
/* We must exit if we hit end of document. */
|
|
if (pcbsd.action == EOD_)
|
|
exiterr(161, &pcbsd);
|
|
if (!errsw && pcbsd.action != ESGD)
|
|
sderr(126, (UNCH *)0, (UNCH *)0);
|
|
return appinfosw;
|
|
}
|
|
|
|
/* Parse the literal (which should contain the version of the
|
|
standard) at the beginning of a SGML declaration. */
|
|
|
|
static int sdversion(tbuf)
|
|
UNCH *tbuf;
|
|
{
|
|
if (sdparm(tbuf, &pcblitv) != LIT1) {
|
|
sderr(123, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
sdfixstandard(tbuf);
|
|
if (ustrcmp(tbuf, standard) != 0)
|
|
sderr(E_BADVERSION, tbuf, standard);
|
|
return SUCCESS;
|
|
}
|
|
|
|
/* Parse the CHARSET section. Use one token lookahead. */
|
|
|
|
static int sdcharset(tbuf)
|
|
UNCH *tbuf;
|
|
{
|
|
int i;
|
|
int status[256];
|
|
|
|
if (sdname(tbuf, kcharset) == FAIL) return FAIL;
|
|
(void)sdparm(tbuf, 0);
|
|
|
|
if (sdcsdesc(tbuf, status) == FAIL)
|
|
return FAIL;
|
|
|
|
for (i = 128; i < 256; i++)
|
|
if (status[i] != UNDESC)
|
|
break;
|
|
if (i >= 256) {
|
|
/* Only a 7-bit character set was described. Fill it out to 8-bits. */
|
|
for (i = 128; i < 256; i++)
|
|
status[i] = UNUSED;
|
|
#if 0
|
|
sderr(E_7BIT, (UNCH *)0, (UNCH *)0);
|
|
#endif
|
|
}
|
|
/* Characters that are declared UNUSED in the document character set
|
|
are assigned to non-SGML. */
|
|
for (i = 0; i < 256; i++) {
|
|
if (status[i] == UNDESC) {
|
|
sderr(E_CHARMISSING, ltous((long)i), (UNCH *)0);
|
|
char_flags[i] |= CHAR_NONSGML;
|
|
}
|
|
else if (status[i] == UNUSED)
|
|
char_flags[i] |= CHAR_NONSGML;
|
|
}
|
|
done_nonsgml = 1;
|
|
return SUCCESS;
|
|
}
|
|
|
|
/* Parse a character set description. Uses one character lookahead. */
|
|
|
|
static int sdcsdesc(tbuf, status)
|
|
UNCH *tbuf;
|
|
int *status;
|
|
{
|
|
int i;
|
|
int nsets = 0;
|
|
struct fpi fpi;
|
|
|
|
for (i = 0; i < 256; i++)
|
|
status[i] = UNDESC;
|
|
|
|
for (;;) {
|
|
int nchars;
|
|
int *baseset = 0;
|
|
|
|
if (pcbsd.action != NAS1) {
|
|
if (nsets == 0) {
|
|
sderr(120, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
break;
|
|
}
|
|
if (!matches(tbuf, kbaseset)) {
|
|
if (nsets == 0) {
|
|
sderr(118, tbuf+1, kbaseset);
|
|
return FAIL;
|
|
}
|
|
break;
|
|
}
|
|
nsets++;
|
|
MEMZERO((UNIV)&fpi, FPISZ);
|
|
if (sdparm(tbuf, &pcblitv) != LIT1) {
|
|
sderr(123, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
fpi.fpipubis = tbuf;
|
|
/* Give a warning if it is not a CHARSET fpi. */
|
|
if (parsefpi(&fpi))
|
|
sderr(E_FORMAL, (UNCH *)0, (UNCH *)0);
|
|
else if (fpi.fpic != FPICHARS)
|
|
sderr(E_BADCLASS, kcharset, (UNCH *)0);
|
|
else {
|
|
fpi.fpipubis[fpi.fpil + fpi.fpill] = '\0';
|
|
baseset = (int *)pmaplookup(charset_map,
|
|
(char *)fpi.fpipubis + fpi.fpil);
|
|
if (!baseset)
|
|
sderr(E_UNKNOWNSET, fpi.fpipubis + fpi.fpil, (UNCH *)0);
|
|
}
|
|
if (sdname(tbuf, kdescset) == FAIL) return FAIL;
|
|
nchars = 0;
|
|
for (;;) {
|
|
long start, count;
|
|
long basenum;
|
|
if (sdparm(tbuf, 0) != NUM1)
|
|
break;
|
|
start = atol((char *)tbuf);
|
|
if (sdparm(tbuf, 0) != NUM1) {
|
|
sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
count = atol((char *)tbuf);
|
|
switch (sdparm(tbuf, &pcblitv)) {
|
|
case NUM1:
|
|
basenum = atol((char *)tbuf);
|
|
break;
|
|
case LIT1:
|
|
basenum = UNKNOWN;
|
|
break;
|
|
case NAS1:
|
|
if (matches(tbuf, kunused)) {
|
|
basenum = UNUSED;
|
|
break;
|
|
}
|
|
/* fall through */
|
|
default:
|
|
sderr(E_CHARDESC, ltous(start), (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
if (start + count > 256)
|
|
sderr(E_CHARRANGE, (UNCH *)0, (UNCH *)0);
|
|
else {
|
|
int i;
|
|
int lim = (int)start + count;
|
|
for (i = (int)start; i < lim; i++) {
|
|
if (status[i] != UNDESC)
|
|
sderr(E_CHARDUP, ltous((long)i), (UNCH *)0);
|
|
else if (basenum == UNUSED || basenum == UNKNOWN)
|
|
status[i] = (int)basenum;
|
|
else if (baseset == 0)
|
|
status[i] = UNKNOWN_SET;
|
|
else {
|
|
int n = basenum + (i - start);
|
|
if (n < 0 || n > 255)
|
|
sderr(E_CHARRANGE, (UNCH *)0, (UNCH *)0);
|
|
else if (baseset[n] == UNUSED)
|
|
sderr(E_BADBASECHAR, ltous((long)n), (UNCH *)0);
|
|
else
|
|
status[i] = baseset[n];
|
|
}
|
|
}
|
|
}
|
|
nchars++;
|
|
}
|
|
if (nchars == 0) {
|
|
sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
}
|
|
return SUCCESS;
|
|
}
|
|
|
|
/* Parse the CAPACITY section. Uses one token lookahead. */
|
|
|
|
static int sdcapacity(tbuf)
|
|
UNCH *tbuf;
|
|
{
|
|
int ncap;
|
|
|
|
if (sdckname(tbuf, kcapacity) == FAIL)
|
|
return FAIL;
|
|
if (sdparm(tbuf, 0) != NAS1) {
|
|
sderr(120, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
if (matches(tbuf, kpublic))
|
|
return sdpubcapacity(tbuf);
|
|
if (!matches(tbuf, ksgmlref)) {
|
|
sderr(E_CAPACITY, tbuf+1, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
memcpy((UNIV)sd.capacity, (UNIV)refcapset, sizeof(sd.capacity));
|
|
ncap = 0;
|
|
for (;;) {
|
|
int capno = -1;
|
|
int i;
|
|
|
|
if (sdparm(tbuf, 0) != NAS1)
|
|
break;
|
|
for (i = 0; i < SIZEOF(captab); i++)
|
|
if (matches(tbuf, captab[i])) {
|
|
capno = i;
|
|
break;
|
|
}
|
|
if (capno < 0)
|
|
break;
|
|
if (sdparm(tbuf, 0) != NUM1) {
|
|
sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
sd.capacity[capno] = atol((char *)tbuf);
|
|
ncap++;
|
|
}
|
|
if (ncap == 0) {
|
|
sderr(E_CAPMISSING, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
|
|
return SUCCESS;
|
|
}
|
|
|
|
/* Parse a CAPACITY section that started with PUBLIC. Must do one
|
|
token lookahead, since sdcapacity() also does. */
|
|
|
|
static int sdpubcapacity(tbuf)
|
|
UNCH *tbuf;
|
|
{
|
|
UNIV ptr;
|
|
if (sdparm(tbuf, &pcblitv) != LIT1) {
|
|
sderr(123, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
sdfixstandard(tbuf);
|
|
ptr = pmaplookup(capset_map, (char *)tbuf);
|
|
if (!ptr)
|
|
sderr(E_CAPSET, tbuf, (UNCH *)0);
|
|
else
|
|
memcpy((UNIV)sd.capacity, (UNIV)ptr, sizeof(sd.capacity));
|
|
(void)sdparm(tbuf, 0);
|
|
return SUCCESS;
|
|
}
|
|
|
|
/* Parse the SCOPE section. Uses no lookahead. */
|
|
|
|
static int sdscope(tbuf)
|
|
UNCH *tbuf;
|
|
{
|
|
if (sdckname(tbuf, kscope) == FAIL)
|
|
return FAIL;
|
|
if (sdparm(tbuf, 0) != NAS1) {
|
|
sderr(120, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
if (matches(tbuf, kdocument))
|
|
;
|
|
else if (matches(tbuf, kinstance))
|
|
sderr(E_INSTANCE, (UNCH *)0, (UNCH *)0);
|
|
else {
|
|
sderr(E_SCOPE, tbuf+1, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
return SUCCESS;
|
|
}
|
|
|
|
/* Parse the SYNTAX section. Uses one token lookahead. */
|
|
|
|
static int sdsyntax(tbuf)
|
|
UNCH *tbuf;
|
|
{
|
|
if (sdname(tbuf, ksyntax) == FAIL) return FAIL;
|
|
if (sdparm(tbuf, 0) != NAS1) {
|
|
sderr(120, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
if (matches(tbuf, kpublic))
|
|
return sdpubsyntax(tbuf);
|
|
return sdxsyntax(tbuf);
|
|
}
|
|
|
|
/* Parse the SYNTAX section which starts with PUBLIC. Uses one token
|
|
lookahead. */
|
|
|
|
static int sdpubsyntax(tbuf)
|
|
UNCH *tbuf;
|
|
{
|
|
int nswitches;
|
|
if (sdparm(tbuf, &pcblitv) != LIT1)
|
|
return FAIL;
|
|
sdfixstandard(tbuf);
|
|
if (ustrcmp(tbuf, CORE_SYNTAX) == 0)
|
|
sd.shortref = 0;
|
|
else if (ustrcmp(tbuf, REFERENCE_SYNTAX) == 0)
|
|
sd.shortref = 1;
|
|
else
|
|
sderr(E_SYNTAX, tbuf, (UNCH *)0);
|
|
if (sdparm(tbuf, 0) != NAS1)
|
|
return SUCCESS;
|
|
if (!matches(tbuf, kswitches))
|
|
return SUCCESS;
|
|
nswitches = 0;
|
|
for (;;) {
|
|
int errsw = 0;
|
|
|
|
if (sdparm(tbuf, 0) != NUM1)
|
|
break;
|
|
if (atol((char *)tbuf) > 255) {
|
|
sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
|
|
errsw = 1;
|
|
}
|
|
if (sdparm(tbuf, 0) != NUM1) {
|
|
sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
if (!errsw) {
|
|
if (atol((char *)tbuf) > 255)
|
|
sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
|
|
}
|
|
nswitches++;
|
|
}
|
|
if (nswitches == 0) {
|
|
sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
sderr(E_SWITCHES, (UNCH *)0, (UNCH *)0);
|
|
return SUCCESS;
|
|
}
|
|
|
|
/* Parse an explicit concrete syntax. Uses one token lookahead. */
|
|
|
|
static
|
|
int sdxsyntax(tbuf)
|
|
UNCH *tbuf;
|
|
{
|
|
static int (*section[]) P((UNCH *)) = {
|
|
sdshunchar,
|
|
sdsynref,
|
|
sdfunction,
|
|
sdnaming,
|
|
sddelim,
|
|
sdnames,
|
|
sdquantity,
|
|
};
|
|
int i;
|
|
|
|
for (i = 0; i < SIZEOF(section); i++)
|
|
if ((*section[i])(tbuf) == FAIL)
|
|
return FAIL;
|
|
return SUCCESS;
|
|
}
|
|
|
|
/* Parse the SHUNCHAR section. Uses one token lookahead. */
|
|
|
|
static
|
|
int sdshunchar(tbuf)
|
|
UNCH *tbuf;
|
|
{
|
|
int i;
|
|
for (i = 0; i < 256; i++)
|
|
char_flags[i] &= ~CHAR_SHUNNED;
|
|
|
|
if (sdckname(tbuf, kshunchar) == FAIL)
|
|
return FAIL;
|
|
|
|
if (sdparm(tbuf, 0) == NAS1) {
|
|
if (matches(tbuf, knone)) {
|
|
(void)sdparm(tbuf, 0);
|
|
return SUCCESS;
|
|
}
|
|
if (matches(tbuf, kcontrols)) {
|
|
for (i = 0; i < 256; i++)
|
|
if (ISASCII(i) && iscntrl(i))
|
|
char_flags[i] |= CHAR_SHUNNED;
|
|
if (sdparm(tbuf, 0) != NUM1)
|
|
return SUCCESS;
|
|
}
|
|
}
|
|
if (pcbsd.action != NUM1) {
|
|
sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
do {
|
|
long n = atol((char *)tbuf);
|
|
if (n > 255)
|
|
sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
|
|
else
|
|
char_flags[(int)n] |= CHAR_SHUNNED;
|
|
} while (sdparm(tbuf, 0) == NUM1);
|
|
return SUCCESS;
|
|
}
|
|
|
|
/* Parse the syntax reference character set. Uses one token lookahead. */
|
|
|
|
static
|
|
int sdsynref(tbuf)
|
|
UNCH *tbuf;
|
|
{
|
|
return sdcsdesc(tbuf, synrefcharset);
|
|
}
|
|
|
|
/* Translate a character number from the syntax reference character set
|
|
to the system character set. If it can't be done, give an error message
|
|
and return -1. */
|
|
|
|
static
|
|
int sdtranscharnum(tbuf)
|
|
UNCH *tbuf;
|
|
{
|
|
long n = atol((char *)tbuf);
|
|
if (n > 255) {
|
|
sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
|
|
return -1;
|
|
}
|
|
return sdtranschar((int)n);
|
|
}
|
|
|
|
|
|
static
|
|
int sdtranschar(n)
|
|
int n;
|
|
{
|
|
int ch = synrefcharset[n];
|
|
if (ch >= 0)
|
|
return ch;
|
|
switch (ch) {
|
|
case UNUSED:
|
|
sderr(E_SYNREFUNUSED, ltous((long)n), (UNCH *)0);
|
|
break;
|
|
case UNDESC:
|
|
sderr(E_SYNREFUNDESC, ltous((long)n), (UNCH *)0);
|
|
break;
|
|
case UNKNOWN:
|
|
sderr(E_SYNREFUNKNOWN, ltous((long)n), (UNCH *)0);
|
|
break;
|
|
case UNKNOWN_SET:
|
|
sderr(E_SYNREFUNKNOWNSET, ltous((long)n), (UNCH *)0);
|
|
break;
|
|
default:
|
|
abort();
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
|
|
/* Parse the function section. Uses two tokens lookahead. "NAMING"
|
|
could be a function name. */
|
|
|
|
static
|
|
int sdfunction(tbuf)
|
|
UNCH *tbuf;
|
|
{
|
|
static UNCH *fun[] = { kre, krs, kspace };
|
|
static int funval[] = { RECHAR, RSCHAR, ' ' };
|
|
int i;
|
|
int had_tab = 0;
|
|
int changed = 0; /* attempted to change reference syntax */
|
|
|
|
if (sdckname(tbuf, kfunction) == FAIL)
|
|
return FAIL;
|
|
for (i = 0; i < SIZEOF(fun); i++) {
|
|
int ch;
|
|
if (sdname(tbuf, fun[i]) == FAIL)
|
|
return FAIL;
|
|
if (sdparm(tbuf, 0) != NUM1) {
|
|
sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
ch = sdtranscharnum(tbuf);
|
|
if (ch >= 0 && ch != funval[i])
|
|
changed = 1;
|
|
}
|
|
for (;;) {
|
|
int tabsw = 0;
|
|
int namingsw = 0;
|
|
if (sdparm(tbuf, 0) != NAS1) {
|
|
sderr(120, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
if (matches(tbuf, (UNCH *)"TAB")) {
|
|
tabsw = 1;
|
|
if (had_tab)
|
|
sderr(E_FUNDUP, (UNCH *)0, (UNCH *)0);
|
|
}
|
|
else {
|
|
for (i = 0; i < SIZEOF(fun); i++)
|
|
if (matches(tbuf, fun[i]))
|
|
sderr(E_BADFUN, fun[i], (UNCH *)0);
|
|
if (matches(tbuf, knaming))
|
|
namingsw = 1;
|
|
else
|
|
changed = 1;
|
|
}
|
|
if (sdparm(tbuf, 0) != NAS1) {
|
|
sderr(120, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
if (namingsw) {
|
|
if (matches(tbuf, klcnmstrt))
|
|
break;
|
|
changed = 1;
|
|
}
|
|
if (sdparm(tbuf, 0) != NUM1) {
|
|
sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
if (tabsw && !had_tab) {
|
|
int ch = sdtranscharnum(tbuf);
|
|
if (ch >= 0 && ch != TABCHAR)
|
|
changed = 1;
|
|
had_tab = 1;
|
|
}
|
|
|
|
}
|
|
if (!had_tab)
|
|
changed = 1;
|
|
if (changed)
|
|
sderr(E_FUNCHAR, (UNCH *)0, (UNCH *)0);
|
|
return SUCCESS;
|
|
}
|
|
|
|
/* Parse the NAMING section. Uses no lookahead. */
|
|
|
|
static
|
|
int sdnaming(tbuf)
|
|
UNCH *tbuf;
|
|
{
|
|
int i;
|
|
int bad = 0;
|
|
static UNCH *classes[] = { klcnmstrt, kucnmstrt, klcnmchar, kucnmchar };
|
|
static UNCH *types[] = { kgeneral, kentity };
|
|
|
|
#define NCLASSES SIZEOF(classes)
|
|
|
|
int bufsize = 4; /* allocated size of buf */
|
|
UNCH *buf = (UNCH *)rmalloc(bufsize); /* holds characters
|
|
in naming classes */
|
|
int bufi = 0; /* next index into buf */
|
|
int start[NCLASSES]; /* index of first character for each class */
|
|
int count[NCLASSES]; /* number of characters for each class */
|
|
|
|
for (i = 0; i < NCLASSES; i++) {
|
|
UNCH *s;
|
|
|
|
if (sdckname(tbuf, classes[i]) == FAIL) {
|
|
frem((UNIV)buf);
|
|
return FAIL;
|
|
}
|
|
if (sdparm(tbuf, &pcblitp) != LIT1) {
|
|
sderr(123, (UNCH *)0, (UNCH *)0);
|
|
frem((UNIV)buf);
|
|
return FAIL;
|
|
}
|
|
start[i] = bufi;
|
|
|
|
for (s = tbuf; *s; s++) {
|
|
int c = *s;
|
|
if (c == DELNONCH) {
|
|
c = UNSHIFTNON(*s);
|
|
s++;
|
|
}
|
|
c = sdtranschar(c);
|
|
if (c < 0)
|
|
bad = 1;
|
|
else if ((char_flags[c] & (CHAR_SIGNIFICANT | CHAR_MAGIC))
|
|
&& c != '.' && c != '-') {
|
|
int class = lextoke[c];
|
|
if (class == SEP || class == SP || class == NMC
|
|
|| class == NMS || class == NU)
|
|
sderr(E_NMBAD, ltous((long)c), (UNCH *)0);
|
|
else
|
|
sderr(E_NMUNSUP, ltous((long)c), (UNCH *)0);
|
|
bad = 1;
|
|
}
|
|
if (bufi >= bufsize)
|
|
buf = (UNCH *)rrealloc((UNIV)buf, bufsize *= 2);
|
|
buf[bufi++] = c;
|
|
}
|
|
|
|
count[i] = bufi - start[i];
|
|
(void)sdparm(tbuf, 0);
|
|
}
|
|
if (!bad && count[0] != count[1]) {
|
|
sderr(E_NMSTRTCNT, (UNCH *)0, (UNCH *)0);
|
|
bad = 1;
|
|
}
|
|
if (!bad && count[2] != count[3]) {
|
|
sderr(E_NMCHARCNT, (UNCH *)0, (UNCH *)0);
|
|
bad = 1;
|
|
}
|
|
if (!bad) {
|
|
nlextoke = (UNCH *)rmalloc(256);
|
|
memcpy((UNIV)nlextoke, lextoke, 256);
|
|
nlextoke['.'] = nlextoke['-'] = INV;
|
|
|
|
nlextran = (UNCH *)rmalloc(256);
|
|
memcpy((UNIV)nlextran, lextran, 256);
|
|
|
|
for (i = 0; i < count[0]; i++) {
|
|
UNCH lc = buf[start[0] + i];
|
|
UNCH uc = buf[start[1] + i];
|
|
nlextoke[lc] = NMS;
|
|
nlextoke[uc] = NMS;
|
|
nlextran[lc] = uc;
|
|
}
|
|
|
|
for (i = 0; i < count[2]; i++) {
|
|
UNCH lc = buf[start[2] + i];
|
|
UNCH uc = buf[start[3] + i];
|
|
if (nlextoke[lc] == NMS) {
|
|
sderr(E_NMDUP, ltous((long)lc), (UNCH *)0);
|
|
bad = 1;
|
|
}
|
|
else if (nlextoke[uc] == NMS) {
|
|
sderr(E_NMDUP, ltous((long)uc), (UNCH *)0);
|
|
bad = 1;
|
|
}
|
|
else {
|
|
nlextoke[lc] = NMC;
|
|
nlextoke[uc] = NMC;
|
|
nlextran[lc] = uc;
|
|
}
|
|
}
|
|
if (nlextoke['-'] != NMC) {
|
|
sderr(E_NMMINUS, (UNCH *)0, (UNCH *)0);
|
|
bad = 1;
|
|
}
|
|
if (bad) {
|
|
if (nlextoke) {
|
|
frem((UNIV)nlextoke);
|
|
nlextoke = 0;
|
|
}
|
|
if (nlextran) {
|
|
frem((UNIV)nlextran);
|
|
nlextran = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
frem((UNIV)buf);
|
|
|
|
if (sdckname(tbuf, knamecase) == FAIL)
|
|
return FAIL;
|
|
for (i = 0; i < SIZEOF(types); ++i) {
|
|
if (sdname(tbuf, types[i]) == FAIL)
|
|
return FAIL;
|
|
if (sdparm(tbuf, 0) != NAS1) {
|
|
sderr(120, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
if (matches(tbuf, kyes))
|
|
sd.namecase[i] = 1;
|
|
else if (matches(tbuf, kno))
|
|
sd.namecase[i] = 0;
|
|
else {
|
|
sderr(E_YESNO, tbuf+1, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
}
|
|
return SUCCESS;
|
|
}
|
|
|
|
/* Parse the DELIM section. Uses one token lookahead. */
|
|
|
|
static
|
|
int sddelim(tbuf)
|
|
UNCH *tbuf;
|
|
{
|
|
int changed = 0;
|
|
if (sdname(tbuf, kdelim) == FAIL
|
|
|| sdname(tbuf, kgeneral) == FAIL
|
|
|| sdname(tbuf, ksgmlref) == FAIL)
|
|
return FAIL;
|
|
for (;;) {
|
|
if (sdparm(tbuf, 0) != NAS1) {
|
|
sderr(120, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
if (matches(tbuf, kshortref))
|
|
break;
|
|
if (sdparm(tbuf, &pcblitp) != LIT1) {
|
|
sderr(123, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
changed = 1;
|
|
}
|
|
if (changed) {
|
|
sderr(E_GENDELIM, (UNCH *)0,(UNCH *)0);
|
|
changed = 0;
|
|
}
|
|
if (sdparm(tbuf, 0) != NAS1) {
|
|
sderr(120, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
if (matches(tbuf, ksgmlref))
|
|
sd.shortref = 1;
|
|
else if (matches(tbuf, knone))
|
|
sd.shortref = 0;
|
|
else {
|
|
sderr(118, tbuf+1, ksgmlref); /* probably they forgot SGMLREF */
|
|
return FAIL;
|
|
}
|
|
while (sdparm(tbuf, &pcblitp) == LIT1)
|
|
changed = 1;
|
|
if (changed)
|
|
sderr(E_SRDELIM, (UNCH *)0, (UNCH *)0);
|
|
return SUCCESS;
|
|
}
|
|
|
|
/* Parse the NAMES section. Uses one token lookahead. */
|
|
|
|
static
|
|
int sdnames(tbuf)
|
|
UNCH *tbuf;
|
|
{
|
|
int i;
|
|
if (sdckname(tbuf, knames) == FAIL)
|
|
return FAIL;
|
|
if (sdname(tbuf, ksgmlref) == FAIL)
|
|
return FAIL;
|
|
|
|
while (sdparm(tbuf, 0) == NAS1) {
|
|
int j;
|
|
if (matches(tbuf, kquantity))
|
|
break;
|
|
for (i = 0; i < NKEYS; i++)
|
|
if (matches(tbuf, key[i]))
|
|
break;
|
|
if (i >= NKEYS) {
|
|
sderr(E_BADKEY, tbuf+1, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
if (sdparm(tbuf, &pcblitp) != NAS1) {
|
|
sderr(120, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
if (!newkey) {
|
|
newkey = (UNCH (*)[REFNAMELEN+1])rmalloc((REFNAMELEN+1)*NKEYS);
|
|
MEMZERO((UNIV)newkey, (REFNAMELEN+1)*NKEYS);
|
|
}
|
|
for (j = 0; j < NKEYS; j++) {
|
|
if (matches(tbuf, key[j])) {
|
|
sderr(E_REFNAME, tbuf + 1, (UNCH *)0);
|
|
break;
|
|
}
|
|
if (matches(tbuf, newkey[j])) {
|
|
sderr(E_DUPNAME, tbuf + 1, (UNCH *)0);
|
|
break;
|
|
}
|
|
}
|
|
if (j >= NKEYS)
|
|
ustrcpy(newkey[i], tbuf + 1);
|
|
}
|
|
/* Now install the new keys. */
|
|
if (newkey) {
|
|
for (i = 0; i < NKEYS; i++)
|
|
if (newkey[i][0] != '\0') {
|
|
UNCH temp[REFNAMELEN + 1];
|
|
|
|
ustrcpy(temp, key[i]);
|
|
ustrcpy(key[i], newkey[i]);
|
|
ustrcpy(newkey[i], temp);
|
|
}
|
|
}
|
|
return SUCCESS;
|
|
}
|
|
|
|
/* Parse the QUANTITY section. Uses one token lookahead. */
|
|
|
|
static int sdquantity(tbuf)
|
|
UNCH *tbuf;
|
|
{
|
|
int quantity[NQUANTITY];
|
|
int i;
|
|
|
|
for (i = 0; i < NQUANTITY; i++)
|
|
quantity[i] = -1;
|
|
if (sdckname(tbuf, kquantity) == FAIL)
|
|
return FAIL;
|
|
if (sdname(tbuf, ksgmlref) == FAIL)
|
|
return FAIL;
|
|
while (sdparm(tbuf, 0) == NAS1 && !matches(tbuf, kfeatures)) {
|
|
long n;
|
|
for (i = 0; i < SIZEOF(quantity_names); i++)
|
|
if (matches(tbuf, quantity_names[i]))
|
|
break;
|
|
if (i >= SIZEOF(quantity_names)) {
|
|
sderr(E_BADQUANTITY, tbuf + 1, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
if (sdparm(tbuf, 0) != NUM1) {
|
|
sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
n = atol((char *)tbuf);
|
|
if (n < sd.quantity[i])
|
|
sderr(E_QUANTITY, (UNCH *)quantity_names[i],
|
|
ltous((long)sd.quantity[i]));
|
|
else if (n > max_quantity[i]) {
|
|
sderr(E_QTOOBIG, (UNCH *)quantity_names[i],
|
|
ltous((long)max_quantity[i]));
|
|
quantity[i] = max_quantity[i];
|
|
}
|
|
else
|
|
quantity[i] = (int)n;
|
|
}
|
|
for (i = 0; i < NQUANTITY; i++)
|
|
if (quantity[i] > 0) {
|
|
sd.quantity[i] = quantity[i];
|
|
if (!quantity_changed)
|
|
quantity_changed = (char *)rmalloc(NQUANTITY);
|
|
quantity_changed[i] = 1;
|
|
}
|
|
return SUCCESS;
|
|
}
|
|
|
|
/* Parse the FEATURES section. Uses no lookahead. */
|
|
|
|
static int sdfeatures(tbuf)
|
|
UNCH *tbuf;
|
|
{
|
|
static struct {
|
|
UNCH *name;
|
|
UNCH argtype; /* 0 = no argument, 1 = boolean, 2 = numeric */
|
|
UNIV valp; /* UNCH * if boolean, long * if numeric. */
|
|
} features[] = {
|
|
{ kminimize, 0, 0 },
|
|
{ kdatatag, 1, 0 },
|
|
{ komittag, 1, (UNIV)&sd.omittag },
|
|
{ krank, 1, 0 },
|
|
{ kshorttag, 1, (UNIV)&sd.shorttag },
|
|
{ klink, 0, 0 },
|
|
{ ksimple, 2, 0 },
|
|
{ kimplicit, 1, 0 },
|
|
{ kexplicit, 2, 0 },
|
|
{ kother, 0, 0 },
|
|
{ kconcur, 2, 0 },
|
|
{ ksubdoc, 2, (UNIV)&sd.subdoc },
|
|
{ kformal, 1, (UNIV)&sd.formal },
|
|
};
|
|
|
|
int i;
|
|
|
|
if (sdckname(tbuf, kfeatures) == FAIL)
|
|
return FAIL;
|
|
for (i = 0; i < SIZEOF(features); i++) {
|
|
if (sdname(tbuf, features[i].name) == FAIL) return FAIL;
|
|
if (features[i].argtype > 0) {
|
|
long n;
|
|
if (sdparm(tbuf, 0) != NAS1) {
|
|
sderr(120, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
if (matches(tbuf, kyes)) {
|
|
if (features[i].argtype > 1) {
|
|
if (sdparm(tbuf, 0) != NUM1) {
|
|
sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
n = atol((char *)tbuf);
|
|
if (n == 0)
|
|
sderr(E_ZEROFEATURE, features[i].name, (UNCH *)0);
|
|
}
|
|
else
|
|
n = 1;
|
|
}
|
|
else if (matches(tbuf, kno))
|
|
n = 0;
|
|
else {
|
|
sderr(E_YESNO, tbuf+1, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
if (features[i].valp == 0) {
|
|
if (n > 0)
|
|
sderr(E_NOTSUPPORTED, features[i].name,
|
|
(UNCH *)0);
|
|
}
|
|
else if (features[i].argtype > 1)
|
|
*(long *)features[i].valp = n;
|
|
else
|
|
*(UNCH *)features[i].valp = (UNCH)n;
|
|
}
|
|
}
|
|
if (!sd.shorttag)
|
|
noemptytag();
|
|
return SUCCESS;
|
|
}
|
|
|
|
/* Parse the APPINFO section. Uses no lookahead. */
|
|
|
|
static int sdappinfo(tbuf)
|
|
UNCH *tbuf;
|
|
{
|
|
if (sdname(tbuf, kappinfo) == FAIL) return FAIL;
|
|
switch (sdparm(tbuf, &pcblitv)) {
|
|
case LIT1:
|
|
appinfosw = 1;
|
|
break;
|
|
case NAS1:
|
|
if (matches(tbuf, knone))
|
|
break;
|
|
sderr(118, tbuf+1, knone);
|
|
return FAIL;
|
|
default:
|
|
sderr(E_XNMLIT, knone, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
return SUCCESS;
|
|
}
|
|
|
|
/* Change a prefix of ISO 8879-1986 to ISO 8879:1986. Amendment 1 to
|
|
the standard requires the latter. */
|
|
|
|
static VOID sdfixstandard(tbuf)
|
|
UNCH *tbuf;
|
|
{
|
|
if (strncmp((char *)tbuf, "ISO 8879-1986", 13) == 0) {
|
|
sderr(E_STANDARD, (UNCH *)0, (UNCH *)0);
|
|
tbuf[8] = ':';
|
|
}
|
|
}
|
|
|
|
static int sdname(tbuf, key)
|
|
UNCH *tbuf;
|
|
UNCH *key;
|
|
{
|
|
if (sdparm(tbuf, 0) != NAS1) {
|
|
sderr(120, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
if (!matches(tbuf, key)) {
|
|
sderr(118, tbuf+1, key);
|
|
return FAIL;
|
|
}
|
|
return SUCCESS;
|
|
}
|
|
|
|
static int sdckname(tbuf, key)
|
|
UNCH *tbuf;
|
|
UNCH *key;
|
|
{
|
|
if (pcbsd.action != NAS1) {
|
|
sderr(120, (UNCH *)0, (UNCH *)0);
|
|
return FAIL;
|
|
}
|
|
if (!matches(tbuf, key)) {
|
|
sderr(118, tbuf+1, key);
|
|
return FAIL;
|
|
}
|
|
return SUCCESS;
|
|
}
|
|
|
|
/* Parse a SGML declaration parameter. If lpcb is NULL, pt must be
|
|
REFNAMELEN+2 characters long, otherwise at least LITLEN+2 characters
|
|
long. LPCB should be NULL if a literal is not allowed. */
|
|
|
|
static int sdparm(pt, lpcb)
|
|
UNCH *pt; /* Token buffer. */
|
|
struct parse *lpcb; /* PCB for literal parse. */
|
|
{
|
|
for (;;) {
|
|
parse(&pcbsd);
|
|
if (pcbsd.action != ISIG)
|
|
break;
|
|
sderr(E_SIGNIFICANT, (UNCH *)0, (UNCH *)0);
|
|
}
|
|
++parmno;
|
|
switch (pcbsd.action) {
|
|
case LIT1:
|
|
if (!lpcb) {
|
|
sderr(E_BADLIT, (UNCH *)0, (UNCH *)0);
|
|
REPEATCC;
|
|
return pcbsd.action = INV_;
|
|
}
|
|
parselit(pt, lpcb, REFLITLEN, lex.d.lit);
|
|
return pcbsd.action;
|
|
case LIT2:
|
|
if (!lpcb) {
|
|
sderr(E_BADLIT, (UNCH *)0, (UNCH *)0);
|
|
REPEATCC;
|
|
return pcbsd.action = INV_;
|
|
}
|
|
parselit(pt, lpcb, REFLITLEN, lex.d.lita);
|
|
return pcbsd.action = LIT1;
|
|
case NAS1:
|
|
parsenm(pt, 1);
|
|
return pcbsd.action;
|
|
case NUM1:
|
|
parsetkn(pt, NU, REFNAMELEN);
|
|
return pcbsd.action;
|
|
}
|
|
return pcbsd.action;
|
|
}
|
|
|
|
VOID sdinit()
|
|
{
|
|
int i;
|
|
/* Shunned character numbers in the reference concrete syntax. */
|
|
static UNCH refshun[] = {
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
|
|
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, 255
|
|
};
|
|
UNCH **p;
|
|
/* A character is magic if it is a non-SGML character used for
|
|
some internal purpose in the parser. */
|
|
char_flags[EOS] |= CHAR_MAGIC;
|
|
char_flags[EOBCHAR] |= CHAR_MAGIC;
|
|
char_flags[EOFCHAR] |= CHAR_MAGIC;
|
|
char_flags[GENRECHAR] |= CHAR_MAGIC;
|
|
char_flags[DELNONCH] |= CHAR_MAGIC;
|
|
char_flags[DELCDATA] |= CHAR_MAGIC;
|
|
char_flags[DELSDATA] |= CHAR_MAGIC;
|
|
|
|
/* Figure out the significant SGML characters. */
|
|
for (p = lextabs; *p; p++) {
|
|
UNCH datclass = (*p)[CANON_DATACHAR];
|
|
UNCH nonclass = (*p)[CANON_NONSGML];
|
|
for (i = 0; i < 256; i++)
|
|
if (!(char_flags[i] & CHAR_MAGIC)
|
|
&& (*p)[i] != datclass && (*p)[i] != nonclass)
|
|
char_flags[i] |= CHAR_SIGNIFICANT;
|
|
}
|
|
for (i = 0; i < SIZEOF(refshun); i++)
|
|
char_flags[refshun[i]] |= CHAR_SHUNNED;
|
|
for (i = 0; i < 256; i++)
|
|
if (ISASCII(i) && iscntrl(i))
|
|
char_flags[i] |= CHAR_SHUNNED;
|
|
bufsalloc();
|
|
}
|
|
|
|
|
|
static
|
|
VOID bufsalloc()
|
|
{
|
|
scbs = (struct source *)rmalloc((REFENTLVL+1)*sizeof(struct source));
|
|
tbuf = (UNCH *)rmalloc(REFATTSPLEN+REFLITLEN+1);
|
|
/* entbuf is used for parsing numeric character references */
|
|
entbuf = (UNCH *)rmalloc(REFNAMELEN + 2);
|
|
}
|
|
|
|
static
|
|
VOID bufsrealloc()
|
|
{
|
|
UNS size;
|
|
|
|
if (ENTLVL != REFENTLVL)
|
|
scbs = (struct source *)rrealloc((UNIV)scbs,
|
|
(ENTLVL+1)*sizeof(struct source));
|
|
/* Calculate the size for tbuf. */
|
|
size = LITLEN + ATTSPLEN;
|
|
if (PILEN > size)
|
|
size = PILEN;
|
|
if (BSEQLEN > size)
|
|
size = BSEQLEN;
|
|
if (size != REFATTSPLEN + REFLITLEN)
|
|
tbuf = (UNCH *)rrealloc((UNIV)tbuf, size + 1);
|
|
if (NAMELEN != REFNAMELEN)
|
|
entbuf = (UNCH *)rrealloc((UNIV)entbuf, NAMELEN + 2);
|
|
}
|
|
|
|
|
|
/* Check that the non-SGML characters are compatible with the concrete
|
|
syntax and munge the lexical tables accordingly. If IMPLIED is
|
|
non-zero, then the SGML declaration was implied; in this case, don't
|
|
give error messages about shunned characters not being declared
|
|
non-SGML. Also make any changes that are required by the NAMING section.
|
|
*/
|
|
|
|
static VOID setlexical()
|
|
{
|
|
int i;
|
|
UNCH **p;
|
|
|
|
if (nlextoke) {
|
|
/* Handle characters that were made significant by the
|
|
NAMING section. */
|
|
for (i = 0; i < 256; i++)
|
|
if (nlextoke[i] == NMC || nlextoke[i] == NMS)
|
|
char_flags[i] |= CHAR_SIGNIFICANT;
|
|
}
|
|
|
|
for (i = 0; i < 256; i++)
|
|
if (char_flags[i] & CHAR_SIGNIFICANT) {
|
|
/* Significant SGML characters musn't be non-SGML. */
|
|
if (char_flags[i] & CHAR_NONSGML) {
|
|
UNCH buf[2];
|
|
buf[0] = i;
|
|
buf[1] = '\0';
|
|
sderr(E_NONSGML, buf, (UNCH *)0);
|
|
char_flags[i] &= ~CHAR_NONSGML;
|
|
}
|
|
}
|
|
else {
|
|
/* Shunned characters that are not significant SGML characters
|
|
must be non-SGML. */
|
|
if ((char_flags[i] & (CHAR_SHUNNED | CHAR_NONSGML))
|
|
== CHAR_SHUNNED) {
|
|
sderr(E_SHUNNED, ltous((long)i), (UNCH *)0);
|
|
char_flags[i] |= CHAR_NONSGML;
|
|
}
|
|
}
|
|
|
|
|
|
/* Now munge the lexical tables. */
|
|
for (p = lextabs; *p; p++) {
|
|
UNCH nonclass = (*p)[CANON_NONSGML];
|
|
UNCH datclass = (*p)[CANON_DATACHAR];
|
|
UNCH nmcclass = (*p)[CANON_NMC];
|
|
UNCH nmsclass = (*p)[CANON_NMS];
|
|
UNCH minclass = (*p)[CANON_MIN];
|
|
for (i = 0; i < 256; i++) {
|
|
if (char_flags[i] & CHAR_NONSGML) {
|
|
/* We already know that it's not significant. */
|
|
if (!(char_flags[i] & CHAR_MAGIC))
|
|
(*p)[i] = nonclass;
|
|
}
|
|
else {
|
|
if (char_flags[i] & CHAR_MAGIC) {
|
|
sderr(E_MUSTBENON, ltous((long)i), (UNCH *)0);
|
|
}
|
|
else if (!(char_flags[i] & CHAR_SIGNIFICANT))
|
|
(*p)[i] = datclass;
|
|
else if (nlextoke
|
|
/* This relies on the fact that lextoke
|
|
occurs last in lextabs. */
|
|
&& lextoke[i] != nlextoke[i]) {
|
|
switch (nlextoke[i]) {
|
|
case NMC:
|
|
(*p)[i] = nmcclass;
|
|
break;
|
|
case NMS:
|
|
(*p)[i] = nmsclass;
|
|
break;
|
|
case INV:
|
|
/* This will happen if period is not a
|
|
name character. */
|
|
(*p)[i] = minclass;
|
|
break;
|
|
default:
|
|
abort();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (nlextran) {
|
|
memcpy((UNIV)lextran, (UNIV)nlextran, 256);
|
|
frem((UNIV)nlextran);
|
|
}
|
|
if (nlextoke) {
|
|
frem((UNIV)nlextoke);
|
|
nlextoke = 0;
|
|
}
|
|
|
|
}
|
|
|
|
/* Munge parse tables so that empty start and end tags are not recognized. */
|
|
|
|
static VOID noemptytag()
|
|
{
|
|
static struct parse *pcbs[] = { &pcbconm, &pcbcone, &pcbconr, &pcbconc };
|
|
int i;
|
|
|
|
for (i = 0; i < SIZEOF(pcbs); i++) {
|
|
int maxclass, maxstate;
|
|
int j, k, act;
|
|
UNCH *plex = pcbs[i]->plex;
|
|
UNCH **ptab = pcbs[i]->ptab;
|
|
|
|
/* Figure out the maximum lexical class. */
|
|
maxclass = 0;
|
|
for (j = 0; j < 256; j++)
|
|
if (plex[j] > maxclass)
|
|
maxclass = plex[j];
|
|
|
|
/* Now figure out the maximum state number and at the same time
|
|
change actions. */
|
|
|
|
maxstate = 0;
|
|
|
|
for (j = 0; j <= maxstate; j += 2) {
|
|
for (k = 0; k <= maxclass; k++)
|
|
if (ptab[j][k] > maxstate)
|
|
maxstate = ptab[j][k];
|
|
/* If the '>' class has an empty start or end tag action,
|
|
change it to the action that the NMC class has. */
|
|
act = ptab[j + 1][plex['>']];
|
|
if (act == NET_ || act == NST_)
|
|
ptab[j + 1][plex['>']] = ptab[j + 1][plex['_']];
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Lookup the value of the entry in pmap PTR whose key is KEY. */
|
|
|
|
static UNIV pmaplookup(ptr, key)
|
|
struct pmap *ptr;
|
|
char *key;
|
|
{
|
|
for (; ptr->name; ptr++)
|
|
if (strcmp(key, ptr->name) == 0)
|
|
return ptr->value;
|
|
return 0;
|
|
}
|
|
|
|
/* Return an ASCII representation of N. */
|
|
|
|
static UNCH *ltous(n)
|
|
long n;
|
|
{
|
|
static char buf[sizeof(long)*3 + 2];
|
|
sprintf(buf, "%ld", n);
|
|
return (UNCH *)buf;
|
|
}
|
|
|
|
VOID sgmlwrsd(fp)
|
|
FILE *fp;
|
|
{
|
|
int i;
|
|
int changed;
|
|
char *p;
|
|
char uc[256]; /* upper case characters (with different lower
|
|
case characters) */
|
|
char lcletter[256]; /* LC letters: a-z */
|
|
|
|
fprintf(fp, "<!SGML \"%s\"\n", standard);
|
|
fprintf(fp, "CHARSET\nBASESET \"%s//CHARSET %s//%s\"\nDESCSET\n",
|
|
SYSTEM_CHARSET_OWNER,
|
|
SYSTEM_CHARSET_DESCRIPTION,
|
|
SYSTEM_CHARSET_DESIGNATING_SEQUENCE);
|
|
|
|
if (!done_nonsgml) {
|
|
done_nonsgml = 1;
|
|
for (i = 0; i < 256; i++)
|
|
if ((char_flags[i] & (CHAR_SIGNIFICANT | CHAR_SHUNNED))
|
|
== CHAR_SHUNNED)
|
|
char_flags[i] |= CHAR_NONSGML;
|
|
}
|
|
i = 0;
|
|
while (i < 256) {
|
|
int j;
|
|
for (j = i + 1; j < 256; j++)
|
|
if ((char_flags[j] & CHAR_NONSGML)
|
|
!= (char_flags[i] & CHAR_NONSGML))
|
|
break;
|
|
if (char_flags[i] & CHAR_NONSGML)
|
|
fprintf(fp, "%d %d UNUSED\n", i, j - i);
|
|
else
|
|
fprintf(fp, "%d %d %d\n", i, j - i, i);
|
|
i = j;
|
|
}
|
|
fprintf(fp, "CAPACITY\n");
|
|
changed = 0;
|
|
for (i = 0; i < NCAPACITY; i++)
|
|
if (refcapset[i] != sd.capacity[i]) {
|
|
if (!changed) {
|
|
fprintf(fp, "SGMLREF\n");
|
|
changed = 1;
|
|
}
|
|
fprintf(fp, "%s %ld\n", captab[i], sd.capacity[i]);
|
|
}
|
|
if (!changed)
|
|
fprintf(fp, "PUBLIC \"%s\"\n", capset_map[0].name);
|
|
fprintf(fp, "SCOPE DOCUMENT\n");
|
|
|
|
fprintf(fp, "SYNTAX\nSHUNCHAR");
|
|
for (i = 0; i < 256; i++)
|
|
if (char_flags[i] & CHAR_SHUNNED)
|
|
fprintf(fp, " %d", i);
|
|
fprintf(fp, "\n");
|
|
fprintf(fp, "BASESET \"%s//CHARSET %s//%s\"\nDESCSET 0 256 0\n",
|
|
SYSTEM_CHARSET_OWNER,
|
|
SYSTEM_CHARSET_DESCRIPTION,
|
|
SYSTEM_CHARSET_DESIGNATING_SEQUENCE);
|
|
|
|
fprintf(fp, "FUNCTION\nRE 13\nRS 10\nSPACE 32\nTAB SEPCHAR 9\n");
|
|
|
|
MEMZERO((UNIV)uc, 256);
|
|
for (i = 0; i < 256; i++)
|
|
if (lextran[i] != i)
|
|
uc[lextran[i]] = 1;
|
|
|
|
MEMZERO((UNIV)lcletter, 256);
|
|
for (p = "abcdefghijklmnopqrstuvwxyz"; *p; p++)
|
|
lcletter[(unsigned char)*p]= 1;
|
|
|
|
fprintf(fp, "NAMING\n");
|
|
fputs("LCNMSTRT \"", fp);
|
|
for (i = 0; i < 256; i++)
|
|
if (lextoke[i] == NMS && !uc[i] && !lcletter[i])
|
|
fprintf(fp, "&#%d;", i);
|
|
fputs("\"\n", fp);
|
|
fputs("UCNMSTRT \"", fp);
|
|
for (i = 0; i < 256; i++)
|
|
if (lextoke[i] == NMS && !uc[i] && !lcletter[i])
|
|
fprintf(fp, "&#%d;", lextran[i]);
|
|
fputs("\"\n", fp);
|
|
fputs("LCNMCHAR \"", fp);
|
|
for (i = 0; i < 256; i++)
|
|
if (lextoke[i] == NMC && !uc[i])
|
|
fprintf(fp, "&#%d;", i);
|
|
fputs("\"\n", fp);
|
|
fputs("UCNMCHAR \"", fp);
|
|
for (i = 0; i < 256; i++)
|
|
if (lextoke[i] == NMC && !uc[i])
|
|
fprintf(fp, "&#%d;", lextran[i]);
|
|
fputs("\"\n", fp);
|
|
|
|
fprintf(fp, "NAMECASE\nGENERAL %s\nENTITY %s\n",
|
|
sd.namecase[0] ? "YES" : "NO",
|
|
sd.namecase[1] ? "YES" : "NO");
|
|
fprintf(fp, "DELIM\nGENERAL SGMLREF\nSHORTREF %s\n",
|
|
sd.shortref ? "SGMLREF" : "NONE");
|
|
fprintf(fp, "NAMES SGMLREF\n");
|
|
if (newkey) {
|
|
/* The reference key was saved in newkey. */
|
|
for (i = 0; i < NKEYS; i++)
|
|
if (newkey[i][0])
|
|
fprintf(fp, "%s %s\n", newkey[i], key[i]);
|
|
}
|
|
fprintf(fp, "QUANTITY SGMLREF\n");
|
|
if (quantity_changed)
|
|
for (i = 0; i < NQUANTITY; i++)
|
|
if (quantity_changed[i])
|
|
fprintf(fp, "%s %d\n", quantity_names[i], sd.quantity[i]);
|
|
fprintf(fp,
|
|
"FEATURES\nMINIMIZE\nDATATAG NO OMITTAG %s RANK NO SHORTTAG %s\n",
|
|
sd.omittag ? "YES" : "NO",
|
|
sd.shorttag ? "YES" : "NO");
|
|
fprintf(fp, "LINK SIMPLE NO IMPLICIT NO EXPLICIT NO\n");
|
|
fprintf(fp, "OTHER CONCUR NO ");
|
|
if (sd.subdoc > 0)
|
|
fprintf(fp, "SUBDOC YES %ld ", sd.subdoc);
|
|
else
|
|
fprintf(fp, "SUBDOC NO ");
|
|
fprintf(fp, "FORMAL %s\n", sd.formal ? "YES" : "NO");
|
|
fprintf(fp, "APPINFO NONE");
|
|
fprintf(fp, ">\n");
|
|
}
|
|
|
|
/*
|
|
Local Variables:
|
|
c-indent-level: 5
|
|
c-continued-statement-offset: 5
|
|
c-brace-offset: -5
|
|
c-argdecl-indent: 0
|
|
c-label-offset: -5
|
|
End:
|
|
*/
|