982 lines
36 KiB
C
982 lines
36 KiB
C
/*
|
|
* CDE - Common Desktop Environment
|
|
*
|
|
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
|
|
*
|
|
* These libraries and programs are free software; you can
|
|
* redistribute them and/or modify them under the terms of the GNU
|
|
* Lesser General Public License as published by the Free Software
|
|
* Foundation; either version 2 of the License, or (at your option)
|
|
* any later version.
|
|
*
|
|
* These libraries and programs are distributed in the hope that
|
|
* they will be useful, but WITHOUT ANY WARRANTY; without even the
|
|
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
* PURPOSE. See the GNU Lesser General Public License for more
|
|
* details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with these librararies and programs; if not, write
|
|
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
|
|
* Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
/* $XConsortium: pars1.c /main/3 1996/06/19 17:16:31 drk $ */
|
|
#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */
|
|
#define GI (tags[ts].tetd->etdgi+1) /* GI of current element. */
|
|
#define NEWGI (newetd->etdgi+1) /* GI of new tag. */
|
|
|
|
static VOID doincludes P((void));
|
|
static int pentname P((char *));
|
|
static struct mpos *newmpos P((void));
|
|
static VOID commbufs P((void));
|
|
static VOID checkdtd P((void));
|
|
|
|
/* PARSECON: Parse content of an element.
|
|
*/
|
|
int parsecon(tbuf, pcb)
|
|
UNCH *tbuf; /* Work area for tokenization. */
|
|
struct parse *pcb; /* Parse control block for this parse. */
|
|
{
|
|
int srn; /* SHORTREF delimiter number (1-32). */
|
|
int refrc; /* Return code from sentref, stagetd, etc. */
|
|
|
|
TRACECON(etagimct, dostag, datarc, pcb, conrefsw, didreq);
|
|
if (eodsw) return(EOD_);
|
|
if (didreq && (conrefsw & TAGREF)) {didreq = 0; goto conr;}
|
|
if (etagimct>0) {etagimsw = --etagimct ? 1 : 0; destack(); return(ETG_);}
|
|
if (dostag) {
|
|
conrefsw = conrefsv;
|
|
etisw = etiswsv;
|
|
if (charmode) {dostag = 0; return datarc;}
|
|
return stag(datarc);
|
|
}
|
|
if (conrefsw) {
|
|
conr:
|
|
destack();
|
|
conrefsw = 0;
|
|
return ETG_;
|
|
}
|
|
else if (eofsw) return(EOD_);
|
|
|
|
datarc = 0;
|
|
while (1) {
|
|
parse(pcb);
|
|
srn = (int)pcb->action - SRMIN; /* Just in case it's a SHORTREF. */
|
|
switch (pcb->action) {
|
|
case DCE_: /* Data character in element content. */
|
|
/* The data character might be a non-SGML character so
|
|
reprocess it using pcbconm. */
|
|
REPEATCC;
|
|
pcb = conpcb = &pcbconm;
|
|
pcb->newstate = pcbcnet;
|
|
continue;
|
|
case DAS_: /* Current character begins data. */
|
|
data = FPOS;
|
|
continue;
|
|
|
|
case NLF_: /* NET or SR returns data in lookahead buffer. */
|
|
datalen = (UNS)(ptcon - data); REPEATCC;
|
|
goto rcc;
|
|
|
|
case LAF_: /* Return data in lookahead buffer: mixed. */
|
|
datalen = (UNS)(ptcon+1 - data);
|
|
goto rcc;
|
|
|
|
case NON_: /* Single nonchar in nonchbuf. */
|
|
datalen = 2; data = nonchbuf;
|
|
goto nrcc;
|
|
|
|
case DAR_: /* Return data except for last char. */
|
|
REPEATCC;
|
|
case DAF_: /* Return data in source entity buffer. */
|
|
datalen = (UNS)(FPOS - data);
|
|
rcc:
|
|
REPEATCC;
|
|
case DEF_: /* Return data in data entity. */
|
|
nrcc:
|
|
datarc = DAF_;
|
|
if (pcb==&pcbcone) {
|
|
pcbconm.newstate = pcbcnet;
|
|
conpcb = &pcbconm;
|
|
}
|
|
if (charmode) return(datarc);
|
|
stagmin = MINNONE; stagreal = newetd = ETDCDATA;
|
|
return(stag(datarc));
|
|
|
|
case LAS_: /* Start lookahead buffer with current char. */
|
|
*(ptcon = data = tbuf+1) = *FPOS;
|
|
continue;
|
|
|
|
case LAM_: /* Move character to lookahead buffer. */
|
|
*++ptcon = *FPOS;
|
|
continue;
|
|
|
|
case STG_: /* Process non-null start-tag. */
|
|
CTRSET(tagctr); /* Start counting tag length. */
|
|
parsenm(tbuf, NAMECASE); /* Get the GI. */
|
|
newetd = etdref(tbuf);
|
|
if (newetd && newetd->adl) {
|
|
parseatt(newetd->adl, tbuf);
|
|
adlval((int)ADN(al), newetd);
|
|
}
|
|
parsetag(&pcbstag); /* Parse the tag ending. */
|
|
if ((CTRGET(tagctr)-tagdelsw)>=TAGLEN)
|
|
sgmlerr(66, &pcbstag, (UNCH *)0, (UNCH *)0);
|
|
if (!newetd) {
|
|
sgmlerr(132, pcb, tbuf+1, (UNCH *)0);
|
|
continue;
|
|
}
|
|
return(stagetd(&pcbstag));
|
|
|
|
case NST_: /* Process null start-tag. */
|
|
return nstetd();
|
|
|
|
case ETC_: /* End-tag in CDATA or RCDATA. */
|
|
case ETG_: /* Process non-null end-tag. */
|
|
newetd = etdref(parsenm(tbuf, NAMECASE)); /* Get the GI. */
|
|
parsetag(&pcbetag); /* Parse tag end. */
|
|
if (!newetd) /* Error: undefined.*/
|
|
sgmlerr(11, &pcbetag, tbuf+1, (UNCH *)0);
|
|
else if (etagetd(&pcbetag)>=0) return ETG_;/* Open element. */
|
|
if (pcb->action!=ETC_) continue;
|
|
/* Tag is undefined or not for an open element and we are in
|
|
a CDATA or RCDATA element; issue message and treat as
|
|
null end-tag (</>).
|
|
*/
|
|
sgmlerr(57, &pcbetag, (UNCH *)0, (UNCH *)0);
|
|
case NET_: /* Process null end-tag. */
|
|
if ((refrc = netetd(conpcb))!=0) return ETG_;
|
|
continue;
|
|
|
|
case NED_: /* Process null end-tag delimiter. */
|
|
etagmin = MINNET;
|
|
newetd = etagreal = ETDNET;
|
|
etagimct = etag();
|
|
etagimsw = etagimct ? 1 : 0; destack();
|
|
return ETG_;
|
|
case GTR_:
|
|
if (entget()!=-1) {
|
|
data = FPOS;
|
|
continue;
|
|
}
|
|
/* fall through */
|
|
case EOD_: /* End of primary file. */
|
|
if (ts<1) return(EOD_); /* Normal end: stack is empty. */
|
|
etagimct = ts-1; /* Treat as end-tag for top tag on stack. */
|
|
etagmin = MINETAG; etagreal = tags[0].tetd;
|
|
destack();
|
|
eofsw = 1; /* Return EOD_ after destacking all. */
|
|
return ETG_;
|
|
|
|
/* Short references ending with blanks:
|
|
If the blank sequence is followed by RE, go do SR7 or SR6.
|
|
If the entity is undefined and we are in mixed content,
|
|
the blanks must be returned as data. If not, they
|
|
can be ignored.
|
|
*/
|
|
case SR9_: /* Process SR9 (two or more blanks). */
|
|
REPEATCC; /* Make first blank the CC. */
|
|
case SR4_: /* Process SR4 (RS, blanks). */
|
|
parseseq(tbuf, BSEQLEN); /* Squeeze out all blanks. */
|
|
if (*FPOS=='\r') {srn = (srn==9) ? 7 : 6; data = tbuf; goto sr6;}
|
|
else REPEATCC;
|
|
if ((refrc = shortref(srn, pcb))==DEF_) goto nrcc;
|
|
if (refrc>0) return refrc;
|
|
if (refrc==ENTUNDEF && pcb==&pcbconm)
|
|
{data = tbuf; goto nrcc;}
|
|
continue;
|
|
|
|
/* Short references ending with RE:
|
|
If the reference is defined, the RE is ignored.
|
|
For RE and RS RE,
|
|
no special action is needed if the reference is undefined,
|
|
as the RE will be processed immediately as the current character.
|
|
For B RE and RS B RE,
|
|
the input is primed with a special character that will
|
|
be treated as an RE that cannot be a short reference.
|
|
*/
|
|
case SR7_: /* Process SR7 (blanks, RE). */
|
|
datalen = (UNS)(FPOS - data);
|
|
case SR2_: /* Process SR2 (RE). */
|
|
case SR5_: /* Process SR5 (RS, RE). */
|
|
sr6: /* Process SR6 (RS, blanks, RE). */
|
|
if ((refrc = shortref(srn, pcb))!=ENTUNDEF) {
|
|
if (refrc==DEF_) goto nrcc; /* Defined: data entity. */
|
|
if (refrc>0) return refrc; /* Defined: tag entity. */
|
|
continue; /* Defined: not tag. */
|
|
}
|
|
if (pcb!=&pcbconm) continue; /* Not mixed; ignore chars. */
|
|
if (srn>=6) /* Return blanks as data. */
|
|
{*FPOS = lex.d.genre; REPEATCC; goto nrcc;}
|
|
case REF_: /* Undefined SR with RE; return record end. */
|
|
datarc = REF_;
|
|
if (charmode) return(datarc);
|
|
#if 0
|
|
/* The standard says this situation can force a tag.
|
|
See 323:3-6, 412:1-7. */
|
|
/* If RE would be ignored, don't treat it as start-tag
|
|
because it could force a required tag; but do change
|
|
state to show that an RE was ignored.
|
|
*/
|
|
if (scbsgml[pss].snext==scbsgmst) {
|
|
scbsgml[pss].snext = scbsgmnr;
|
|
TRACEGML(scbsgml, pss, conactsw, conact);
|
|
continue;
|
|
}
|
|
#endif
|
|
stagmin = MINNONE; stagreal = newetd = ETDCDATA;
|
|
return(stag(datarc));
|
|
|
|
case SR3_: /* Process SR3 (RS). */
|
|
REPEATCC;
|
|
if ((refrc = shortref(srn, pcb))==DEF_) goto nrcc;
|
|
if (refrc>0) return refrc;
|
|
continue;
|
|
|
|
case RBR_: /* Two right brackets */
|
|
srn = 26;
|
|
REPEATCC;
|
|
/* fall through */
|
|
case SR1_: /* Process SR1 (TAB). */
|
|
case SR8_: /* Process SR8 (space). */
|
|
case SR19: /* Process SR19 (-). */
|
|
case SR26: /* Process SR26 (]). */
|
|
REPEATCC;
|
|
goto srproc;
|
|
|
|
case FCE_: /* Process free character (SR11-18, SR21-32). */
|
|
fce[0] = *FPOS;
|
|
srn = mapsrch(&lex.s.dtb[lex.s.fce], fce);
|
|
case SR10: /* Process SR10 ("). */
|
|
case SR11: /* Process SR11 (#). */
|
|
case SR20: /* Process SR20 (-). */
|
|
case SR25: /* Process SR25 ([). */
|
|
srproc:
|
|
if ((refrc = shortref(srn, pcb))==DEF_) goto nrcc;
|
|
if (refrc>0) return refrc;
|
|
if (refrc==ENTUNDEF) { /* Treat the SR as data. */
|
|
data = FPOS - (srn==lex.s.hyp2);/* Two data chars if SR20.*/
|
|
if (pcb!=&pcbconm) { /* If not in mixed content: */
|
|
if (srn>=lex.s.data) { /* Change PCB. */
|
|
pcb = conpcb = &pcbconm;
|
|
pcb->newstate = pcbcnda;
|
|
}
|
|
}
|
|
else pcb->newstate = pcbcnda;/* Now in data found state. */
|
|
}
|
|
continue;
|
|
|
|
case ERX_: /* Entity ref in RCDATA: cancel ending delims.*/
|
|
lexcon[lex.d.tago] = lex.l.fre;
|
|
lexcon[lex.d.net] = lex.l.nonet;
|
|
lexlms[lex.d.msc] = lex.l.fre;
|
|
continue;
|
|
|
|
case EE_: /* Entity end in RCDATA: check nesting. */
|
|
if (es<rcessv) {synerr(37, pcb); rcessv = es;}
|
|
/* If back at top level, re-enable the ending delimiters. */
|
|
if (es==rcessv) {
|
|
lexcon[lex.d.tago] = lex.l.tago;
|
|
lexcon[lex.d.net] = etictr ? lex.l.net : lex.l.nonet;
|
|
lexlms[lex.d.msc] = lex.l.msc;
|
|
}
|
|
continue;
|
|
|
|
case PIE_: /* PI entity: same as PIS_. */
|
|
return PIS_;
|
|
|
|
case RSR_: /* Record start: ccnt=0; ++rcnt.*/
|
|
++RCNT; CTRSET(RSCC);
|
|
default:
|
|
return (int)pcb->action; /* Default (MD_ MDC_ MSS_ MSE_ PIS_). */
|
|
}
|
|
}
|
|
}
|
|
/* STAGETD: Process start-tag etd.
|
|
*/
|
|
int stagetd(pcb)
|
|
struct parse *pcb; /* Parse control block for this parse. */
|
|
{
|
|
if (!newetd->etdmod) {
|
|
sgmlerr(43, pcb, newetd->etdgi+1, (UNCH *)0);
|
|
++ds.etdercnt;
|
|
etdset(newetd, (UNCH)SMO+EMO+ETDOCC, &undechdr,
|
|
(PETD *)0, (PETD *)0, (PECB *)0);
|
|
TRACEETD(newetd);
|
|
}
|
|
stagmin = MINNONE; stagreal = newetd;
|
|
return stag(0);
|
|
}
|
|
/* NSTETD: Process null start-tag etd.
|
|
*/
|
|
int nstetd()
|
|
{
|
|
newetd = ts>0 ? tags[ts].tetd
|
|
: tags[0].tetd->etdmod[2].tu.thetd;
|
|
stagmin = MINNULL; stagreal = ETDNULL;
|
|
etisw = 0;
|
|
return stag(0);
|
|
}
|
|
/* ETAGETD: Process end-tag etd.
|
|
*/
|
|
int etagetd(pcb)
|
|
struct parse *pcb; /* Parse control block for this parse. */
|
|
{
|
|
etagmin = MINNONE; etagreal = newetd;
|
|
if ((etagimct = etag())<0) {
|
|
sgmlerr(E_ETAG, pcb, NEWGI, tags[ts].tetd->etdgi+1);
|
|
return etagimct;
|
|
}
|
|
etagimsw = etagimct ? 1 : 0; destack();
|
|
return ETG_;
|
|
}
|
|
/* NETETD: Process null end-tag etd.
|
|
*/
|
|
int netetd(pcb)
|
|
struct parse *pcb; /* Parse control block for this parse. */
|
|
{
|
|
if (ts<1) {
|
|
sgmlerr(51, pcb, (UNCH *)0, (UNCH *)0);
|
|
return 0;
|
|
}
|
|
etagmin = MINNULL; etagreal = ETDNULL;
|
|
etagimsw = 0; destack();
|
|
return ETG_;
|
|
}
|
|
/* SHORTREF: Process a short (alternative) reference to an entity.
|
|
Returns ENTUNDEF if entity is not defined, otherwise returns
|
|
the return code from stagetd or etagetd if the entity was
|
|
a tag, or zero if an error occurred somewhere.
|
|
*/
|
|
int shortref(srn, pcb)
|
|
int srn; /* Short reference number. */
|
|
struct parse *pcb; /* Parse control block for this parse. */
|
|
{
|
|
int rc; /* Return code from entopen. */
|
|
|
|
if (tags[ts].tsrm==SRMNULL || !tags[ts].tsrm[srn]) return ENTUNDEF;
|
|
if (!tags[ts].tsrm[srn]->estore) {
|
|
sgmlerr(93, pcb, tags[ts].tsrm[srn]->ename+1,
|
|
tags[ts].tsrm[0]->ename+1);
|
|
return(ENTUNDEF);
|
|
}
|
|
rc = entopen(tags[ts].tsrm[srn]);
|
|
if (rc==ENTDATA) return DEF_;
|
|
if (rc==ENTPI) return PIS_;
|
|
return(0);
|
|
}
|
|
/* PARSEPRO: Parse prolog.
|
|
Note: ptpro cannot overrun tbuf (and therefore needn't be
|
|
tested), as long as the buffer exceeds the longest
|
|
lookahead sequence in the content parse tables.
|
|
*/
|
|
int parsepro()
|
|
{
|
|
struct parse *oldpcb;
|
|
|
|
while (1) {
|
|
int rc; /* Return code: DAF MSS DCE */
|
|
switch (parse(propcb)) {
|
|
|
|
case LAS_: /* Start lookahead buffer with current char. */
|
|
*(ptpro = data = tbuf+1) = *FPOS;
|
|
continue;
|
|
case LAM_: /* Move character to lookahead buffer. */
|
|
*++ptpro = *FPOS;
|
|
continue;
|
|
case LAF_: /* Return data in lookahead buffer. */
|
|
datalen = (UNS)(ptpro+1 - data);
|
|
REPEATCC;
|
|
rc = DAF_;
|
|
break; /* Prolog ended; data pending. */
|
|
|
|
case DTD_: /* Process document type declaration. */
|
|
parsenm(tbuf, NAMECASE); /* Get declaration name. */
|
|
if (!ustrcmp(tbuf+1, sgmlkey)
|
|
&& !dtdsw && !sgmlsw++) {
|
|
#if 0
|
|
parse(&pcbmdi);
|
|
#endif
|
|
/* If we got some appinfo, return. */
|
|
if (sgmldecl())
|
|
return APP_;
|
|
continue;
|
|
}
|
|
if (!ustrcmp(tbuf+1, key[KDOCTYPE]) && !dtdsw++) {
|
|
startdtd();
|
|
mddtds(tbuf);
|
|
continue;
|
|
}
|
|
sgmlerr(E_MDNAME, propcb, tbuf+1, (UNCH *)0);
|
|
continue;
|
|
case DTE_: /* DOCTYPE declaration (and prolog) ended. */
|
|
REPEATCC; /* Put back char that followed MSC. */
|
|
if (es != 0)
|
|
sgmlerr(143, propcb, (UNCH *)0, (UNCH *)0);
|
|
else if (dtdrefsw) {/* Process referenced DTD before real DTE. */
|
|
dtdrefsw = 0; /* Keep us from coming through here again. */
|
|
REPEATCC; /* Put back MSC so it follows referenced DTD. */
|
|
entref(indtdent);
|
|
}
|
|
else mddtde(tbuf);
|
|
continue;
|
|
|
|
case MD_:
|
|
/* Process markup declaration within DTD or LPD. */
|
|
parsenm(tbuf, NAMECASE); /* Get declaration name. */
|
|
if (!ustrcmp(tbuf+1, key[KENTITY]))
|
|
mdentity(tbuf);
|
|
else if (!ustrcmp(tbuf+1, key[KUSEMAP]))
|
|
mdsrmuse(tbuf);
|
|
else if (!ustrcmp(tbuf+1, key[KATTLIST]))
|
|
mdadl(tbuf);
|
|
else if (!ustrcmp(tbuf+1, key[KSHORTREF]))
|
|
mdsrmdef(tbuf);
|
|
else if (!ustrcmp(tbuf+1, key[KELEMENT]))
|
|
mdelem(tbuf);
|
|
else if (!ustrcmp(tbuf+1, key[KNOTATION]))
|
|
mdnot(tbuf);
|
|
else
|
|
sgmlerr(E_MDNAME, propcb, tbuf+1, (UNCH *)0);
|
|
continue;
|
|
case MDC_: /* Process markup declaration comment. */
|
|
sgmlsw++; /* SGML declaration not allowed after comment */
|
|
parsemd(tbuf, NAMECASE, (struct parse *)0, NAMELEN);
|
|
continue;
|
|
|
|
case MSS_: /* Process marked section start. */
|
|
oldpcb = propcb;
|
|
propcb = mdms(tbuf, propcb);
|
|
if (propcb==&pcbmsc || propcb==&pcbmsrc) {
|
|
if (oldpcb == &pcbmds)
|
|
sgmlerr(135, oldpcb, (UNCH *)0, (UNCH *)0);
|
|
conpcb = propcb;
|
|
rc = DCE_;
|
|
break;
|
|
}
|
|
continue;
|
|
case MSE_: /* Process marked section end. */
|
|
if (mdmse()) propcb = &pcbmds;
|
|
continue;
|
|
case MSP_: /* Marked section start in prolog outside DTD */
|
|
rc = MSS_;
|
|
break;
|
|
case PIE_: /* PI entity: same as PIS_. */
|
|
return(PIS_);
|
|
|
|
case EOD_: /* Return end of primary entity. */
|
|
if (!sw.onlypro || propcb != &pcbpro || !dtdsw)
|
|
sgmlerr(127, propcb, (UNCH *)0, (UNCH *)0);
|
|
else {
|
|
setdtype();
|
|
checkdtd();
|
|
}
|
|
return propcb->action;
|
|
case PIS_: /* Return processing instruction (string). */
|
|
sgmlsw++; /* SGML declaration not allowed after PI */
|
|
return((int)propcb->action); /* Prolog will continue later. */
|
|
|
|
case CIR_: /* Chars ignored; trying to resume parse. */
|
|
synerr(E_RESTART, propcb);
|
|
REPEATCC;
|
|
continue;
|
|
case STE_: /* Start tag ended prolog */
|
|
REPEATCC;
|
|
REPEATCC;
|
|
rc = STE_;
|
|
break;
|
|
case PEP_: /* Previous character ended prolog. */
|
|
REPEATCC;
|
|
case DCE_: /* Data character ended prolog. */
|
|
REPEATCC;
|
|
rc = DCE_;
|
|
break;
|
|
case EE_: /* Illegal entity end in ignored marked section. */
|
|
/* An error message has already been given. */
|
|
continue;
|
|
default:
|
|
abort();
|
|
} /* switch */
|
|
setdtype(); /* First pass only: set document type. */
|
|
checkdtd();
|
|
if (sw.onlypro)
|
|
return EOD_;
|
|
TRACESET(); /* Set trace switches. */
|
|
endprolog();
|
|
/* *DOC is first element; stack it at level 0. */
|
|
stack(newetd = nextetd = stagreal = etagreal = docetd);
|
|
return(rc);
|
|
} /* while */
|
|
}
|
|
|
|
/* Allocate buffers that are used in the DTD. */
|
|
|
|
VOID startdtd()
|
|
{
|
|
nmgrp = (struct etd **)rmalloc((GRPCNT+1)*sizeof(struct etd *));
|
|
nnmgrp = (PDCB *)rmalloc((GRPCNT+1)*sizeof(PDCB));
|
|
gbuf = (struct thdr *)rmalloc((GRPGTCNT+3)*sizeof(struct thdr));
|
|
/* The extra 1 is for parsing the name of a parameter entity in
|
|
mdentity(). */
|
|
nmbuf = (UNCH *)rmalloc(NAMELEN+3);
|
|
pubibuf = (UNCH *)rmalloc(LITLEN+1);
|
|
sysibuf = (UNCH *)rmalloc(LITLEN+1);
|
|
commbufs();
|
|
doincludes();
|
|
}
|
|
|
|
static
|
|
VOID checkdtd()
|
|
{
|
|
struct dcncb *np;
|
|
|
|
if (sw.swundef) {
|
|
int i;
|
|
struct etd *ep;
|
|
struct srh *sp;
|
|
|
|
for (i = 0; i < ETDHASH; i++)
|
|
for (ep = etdtab[i]; ep; ep = ep->etdnext)
|
|
if (!ep->etdmod)
|
|
sgmlerr(140, (struct parse *)0, ep->etdgi + 1,
|
|
(UNCH *)0);
|
|
for (sp = srhtab[0]; sp; sp = sp->enext)
|
|
if (sp->srhsrm[0] == 0)
|
|
sgmlerr(152, (struct parse *)0, sp->ename + 1,
|
|
(UNCH *)0);
|
|
}
|
|
for (np = dcntab[0]; np; np = np->enext)
|
|
if (!np->defined)
|
|
sgmlerr(192, (struct parse *)0, np->ename + 1, (UNCH *)0);
|
|
}
|
|
|
|
/* Return non-zero if s is a valid parameter entity name.
|
|
If so put a transformed name in entbuf. */
|
|
|
|
static
|
|
int pentname(s)
|
|
char *s;
|
|
{
|
|
int i;
|
|
if (lextoke[(UNCH)*s] != NMS)
|
|
return 0;
|
|
entbuf[2] = ENTCASE ? lextran[(UNCH)*s] : (UNCH)*s;
|
|
for (i = 1; s[i]; i++) {
|
|
if (i > NAMELEN - 1)
|
|
return 0;
|
|
if (lextoke[(UNCH)s[i]] < NMC || s[i] == EOBCHAR)
|
|
return 0;
|
|
entbuf[i + 2] = ENTCASE ? lextran[(UNCH)s[i]] : (UNCH)s[i];
|
|
}
|
|
entbuf[1] = lex.d.pero;
|
|
entbuf[i + 2] = '\0';
|
|
entbuf[0] = (UNCH)(i + 3); /* length byte, PERO and '\0' */
|
|
return 1;
|
|
}
|
|
|
|
/* Handle sw.includes. */
|
|
|
|
static
|
|
VOID doincludes()
|
|
{
|
|
char **p;
|
|
if (!sw.includes)
|
|
return;
|
|
for (p = sw.includes; *p; p++) {
|
|
if (pentname(*p)) {
|
|
if (!entfind(entbuf)) {
|
|
union etext etx;
|
|
etx.c = savestr(key[KINCLUDE]);
|
|
entdef(entbuf, ESM, &etx);
|
|
++ds.ecbcnt;
|
|
ds.ecbtext += ustrlen(key[KINCLUDE]);
|
|
}
|
|
}
|
|
else
|
|
sgmlerr(138, (struct parse *)0, (UNCH *)*p, (UNCH *)0);
|
|
}
|
|
}
|
|
|
|
/* Allocate buffers that are use both in the DTD and the instance. */
|
|
|
|
static
|
|
VOID commbufs()
|
|
{
|
|
al = (struct ad *)rmalloc((ATTCNT+2)*sizeof(struct ad));
|
|
lbuf = (UNCH *)rmalloc(LITLEN + 1);
|
|
}
|
|
|
|
static
|
|
struct mpos *newmpos()
|
|
{
|
|
int j;
|
|
unsigned long *h;
|
|
struct mpos *p = (struct mpos *)rmalloc((GRPLVL+2)*sizeof(struct mpos));
|
|
|
|
assert(grplongs > 0);
|
|
h = (unsigned long *)rmalloc((GRPLVL+2)*grplongs*sizeof(unsigned long));
|
|
for (j = 0; j < GRPLVL+2; j++) {
|
|
p[j].h = h;
|
|
h += grplongs;
|
|
}
|
|
return p;
|
|
}
|
|
|
|
/* Perform end of prolog buffer allocation. */
|
|
|
|
VOID endprolog()
|
|
{
|
|
int i;
|
|
|
|
ambigfree();
|
|
if (dtdsw) {
|
|
frem((UNIV)nmgrp);
|
|
frem((UNIV)nnmgrp);
|
|
frem((UNIV)gbuf);
|
|
frem((UNIV)nmbuf);
|
|
frem((UNIV)sysibuf);
|
|
frem((UNIV)pubibuf);
|
|
}
|
|
else {
|
|
commbufs();
|
|
doincludes();
|
|
}
|
|
scbsgml = (struct restate *)rmalloc((TAGLVL+1)*sizeof(struct restate));
|
|
tags = (struct tag *)rmalloc((TAGLVL+1)*sizeof(struct tag));
|
|
grplongs = (GRPCNT + LONGBITS - 1)/LONGBITS;
|
|
for (i = 0; i < TAGLVL+1; i++)
|
|
tags[i].tpos = newmpos();
|
|
savedpos = newmpos();
|
|
}
|
|
|
|
/* SETDTYPE: Establish specified or default document type.
|
|
*/
|
|
VOID setdtype()
|
|
{
|
|
/* Initialize default model hdr for declared content. */
|
|
undechdr.ttype = MANY+MCHARS+MGI; /* Declared content is ANY. */
|
|
undechdr.tu.tnum = 0; /* No content model. */
|
|
|
|
/* Initialize content model and etd for *DOC. */
|
|
prcon[0].ttype = MGI; /* Model is an element model. */
|
|
prcon[0].tu.tnum = 2; /* A single group with a single GI in it. */
|
|
prcon[1].ttype = TTSEQ; /* Non-repeatable SEQ group. */
|
|
prcon[1].tu.tnum = 1; /* Only one token in group. */
|
|
prcon[2].ttype = TTETD; /* Token is an etd. */
|
|
docetd = etddef(indocetd); /* etd for document as a whole. */
|
|
etdset(docetd, ETDOCC, prcon, (PETD *)0, (PETD *)0, SRMNULL);
|
|
|
|
/* Put specified or default document type etd in *DOC model. */
|
|
if (!dtype) {
|
|
sgmlerr(E_DOCTYPE, propcb, (UNCH *)0, (UNCH *)0);
|
|
dtype = indefetd;
|
|
}
|
|
prcon[2].tu.thetd = etddef(dtype);
|
|
if (!prcon[2].tu.thetd->etdmod) {
|
|
if (dtype != indefetd)
|
|
sgmlerr(52, propcb, dtype+1, (UNCH *)0);
|
|
++ds.etdercnt;
|
|
etdset(prcon[2].tu.thetd, (UNCH)SMO+EMO+ETDUSED+ETDOCC, &undechdr,
|
|
(PETD *)0, (PETD *)0, (PECB *)0);
|
|
}
|
|
TRACEETD(docetd);
|
|
TRACEMOD(prcon);
|
|
TRACEETD(prcon[2].tu.thetd);
|
|
return;
|
|
}
|
|
/* PARSETAG: Tag end parser for SGML documents.
|
|
For start-tags, it
|
|
sets etisw to TAGNET if tag ended with ETI; otherwise to 0.
|
|
*/
|
|
VOID parsetag(pcb)
|
|
struct parse *pcb; /* Parse control block: pcbstag or pcbetag. */
|
|
{
|
|
tagdelsw = 1; /* Assume tag had an ETI or TAGC. */
|
|
switch (parse(pcb)) {
|
|
case ETIC: /* Tag closed with ETI. */
|
|
if (!sd.shorttag) synerr(194, pcb);
|
|
etisw = TAGNET; /* Set switch for stack entry flag. */
|
|
return;
|
|
case DSC:
|
|
synerr(9, pcb);
|
|
REPEATCC;
|
|
etisw = 0;
|
|
return;
|
|
case NVS: /* Att name or value token found. */
|
|
case NTV: /* Name token value found. */
|
|
synerr(E_POSSATT, pcb);
|
|
pcb->newstate = 0; /* Reset parse state. */
|
|
REPEATCC; /* Put it back for next read. */
|
|
tagdelsw = 0; /* Tag had no closing delimiter. */
|
|
etisw = 0; /* Don't flag stack entry. */
|
|
return;
|
|
case TAGO: /* Tag closing implied by TAGO. */
|
|
if (!sd.shorttag) synerr(193, pcb);
|
|
REPEATCC; /* Put it back for next read. */
|
|
tagdelsw = 0; /* Tag had no closing delimiter. */
|
|
case TAGC: /* Normal close. */
|
|
default: /* Invalid character (msg was sent). */
|
|
etisw = 0; /* Don't flag stack entry. */
|
|
return;
|
|
}
|
|
}
|
|
/* STAG: Check whether a start-tag is valid at this point in the document
|
|
structure, or whether other tags must precede it.
|
|
Special case processing is done for the fake tag, #CDATA, as
|
|
it is never stacked.
|
|
*/
|
|
int stag(dataret)
|
|
int dataret; /* Data pending: DAF_ REF_ 0=not #PCDATA. */
|
|
{
|
|
int rc, realrc; /* Return code from context or other test. */
|
|
int mexts = 0; /* >0=stack level of minus grp; -1=plus; 0=none.*/
|
|
|
|
badresw = pexsw = 0;
|
|
/* If real element (i.e., not #PCDATA) set mexts and test if empty. */
|
|
if (dataret==0) {
|
|
mexts = pexmex(newetd);
|
|
/* If element is declared empty, it is same as a conref. */
|
|
if (GET(newetd->etdmod->ttype, MNONE)) conrefsw = TAGREF;
|
|
}
|
|
if (GET(tags[ts].tetd->etdmod->ttype, MANY))
|
|
rc = mexts>0 ? RCMEX : RCHIT;
|
|
else rc = context(newetd, tags[ts].tetd->etdmod, tags[ts].tpos,
|
|
&tags[ts].status, mexts);
|
|
TRACESTG(newetd, dataret, rc, nextetd, mexts);
|
|
|
|
switch (rc) {
|
|
case RCEND: /* End current element, then retry start-tag. */
|
|
if (ts<1) realrc = RCMISS;
|
|
else realrc = RCEND;
|
|
break;
|
|
case RCREQ: /* Stack compulsory GI, then retry start-tag. */
|
|
realrc = RCREQ;
|
|
break;
|
|
case RCMISS: /* Start-tag invalid (#PCDATA or real). */
|
|
if (ts>0 && GET(tags[ts].tetd->etdmod->ttype, MANY))
|
|
realrc = RCEND;
|
|
else realrc = RCMISS;
|
|
break;
|
|
case RCMEX: /* Start-tag invalid (minus exception). */
|
|
etagimct = ts - mexts;
|
|
realrc = RCEND;
|
|
break;
|
|
case RCHITMEX: /* Invalid minus exclusion for required element. */
|
|
#if 0 /* This will have been detected by exclude.c. */
|
|
sgmlerr(E_MEXERR, &pcbstag, NEWGI, tags[mexts].tetd->etdgi+1);
|
|
#endif
|
|
case RCHIT: /* Start-tag was valid. */
|
|
realrc = RCHIT;
|
|
break;
|
|
case RCPEX: /* Start-tag valid only because of plus exception. */
|
|
pexsw = TAGPEX;
|
|
realrc = RCHIT;
|
|
break;
|
|
default:
|
|
abort();
|
|
}
|
|
|
|
switch (realrc) {
|
|
case RCEND: /* End current element, then retry start-tag. */
|
|
if (didreq) sgmlerr(07, &pcbstag, nextetd->etdgi+1, (UNCH *)0);
|
|
didreq = 0; /* No required start-tag done. */
|
|
dostag = 1; etiswsv = etisw; /* Save real start-tag status. */
|
|
conrefsv = conrefsw; /* Save real start-tag conref. */
|
|
conrefsw = 0; /* Current element is not empty. */
|
|
etagmin = MINSTAG; destack(); /* Process omitted end-tag. */
|
|
return ETG_;
|
|
case RCREQ: /* Stack compulsory GI, then retry start-tag. */
|
|
if (!BADPTR(nextetd)) {
|
|
#if 0 /* This will have been detected in exclude.c. */
|
|
if ((mexts = pexmex(nextetd))>0)
|
|
sgmlerr(E_MEXERR, &pcbstag, nextetd->etdgi+1,
|
|
tags[mexts].tetd->etdgi+1);
|
|
#endif
|
|
if (!nextetd->etdmod) {
|
|
sgmlerr(53, &pcbstag, nextetd->etdgi+1, (UNCH *)0);
|
|
etdset(nextetd, (UNCH)SMO+EMO+ETDOCC, &undechdr,
|
|
(PETD *)0, (PETD *)0, (PECB *)0);
|
|
++ds.etdercnt;
|
|
TRACEETD(nextetd);
|
|
}
|
|
}
|
|
if (BITOFF(nextetd->etdmin, SMO)) {
|
|
if (!BADPTR(stagreal))
|
|
sgmlerr(21, &pcbstag, nextetd->etdgi+1, stagreal->etdgi+1);
|
|
else if (stagreal==ETDCDATA)
|
|
sgmlerr(49, &pcbstag, nextetd->etdgi+1, (UNCH *)0);
|
|
else sgmlerr(50, &pcbstag, nextetd->etdgi+1, (UNCH *)0);
|
|
}
|
|
didreq = 1; /* Required start-tag done. */
|
|
dostag = 1; etiswsv = etisw; /* Save real start-tag status. */
|
|
etisw = 0; conrefsv = conrefsw; /* Save real start-tag conref. */
|
|
/* If element is declared empty, it is same as a conref. */
|
|
conrefsw = (GET(nextetd->etdmod->ttype, MNONE)) ? TAGREF : 0;
|
|
stack(nextetd); /* Process omitted start-tag. */
|
|
return STG_;
|
|
case RCMISS: /* Start-tag invalid (#PCDATA or actual). */
|
|
dostag = 0; contersw |= 1; didreq = 0;
|
|
if (dataret) {
|
|
if (dataret==REF_) badresw = 1;
|
|
else sgmlerr(E_CHARS, conpcb, tags[ts].tetd->etdgi+1, (UNCH *)0);
|
|
return dataret;
|
|
}
|
|
sgmlerr(E_CONTEXT, &pcbstag, NEWGI, tags[ts].tetd->etdgi+1);
|
|
if (stagmin!=MINNULL) stagmin = MINNONE; stack(newetd);
|
|
return STG_;
|
|
case RCHIT: /* Start-tag was valid. */
|
|
dostag = 0; didreq = 0;
|
|
if (dataret) return dataret;
|
|
stack(newetd);
|
|
return STG_;
|
|
}
|
|
return NOP_; /* To avoid Borland C++ warning */
|
|
}
|
|
/* PEXMEX: See if a GI is in a plus or minus exception group on the stack.
|
|
If in a minus, returns stack level of minus group; otherwise,
|
|
returns -1 if in a plus and not a minus, and zero if in neither.
|
|
*/
|
|
int pexmex(curetd)
|
|
struct etd *curetd; /* The etd for this GI. */
|
|
{
|
|
int tsl; /* Temporary stack level for looping. */
|
|
int pex = 0; /* 1=found in plus grp; 0=not. */
|
|
|
|
for (tsl = ts; tsl>0; --tsl) {
|
|
if (tags[tsl].tetd->etdmex && ingrp(tags[tsl].tetd->etdmex, curetd))
|
|
return(tsl);
|
|
if (tags[tsl].tetd->etdpex && ingrp(tags[tsl].tetd->etdpex, curetd))
|
|
pex = -1;
|
|
}
|
|
return(pex);
|
|
}
|
|
/* STACK: Add a new entry to the tag stack.
|
|
If there is no room, issue a message and reuse last position.
|
|
*/
|
|
VOID stack(curetd)
|
|
struct etd *curetd; /* The etd for this entry. */
|
|
{
|
|
/* Stack the new element type definition (error if no room). */
|
|
if (++ts>TAGLVL)
|
|
sgmlerr(E_STAGMAX, conpcb, curetd->etdgi+1, tags[--ts].tetd->etdgi+1);
|
|
tags[ts].tetd = curetd;
|
|
|
|
/* Set flags: plus exception + tag had ETI + context error + empty. */
|
|
tags[ts].tflags = (UNCH)pexsw + etisw + contersw + conrefsw; contersw = 0;
|
|
|
|
/* If tag had ETI, update ETI counter and enable NET if first ETI. */
|
|
if (etisw && ++etictr==1) lexcon[lex.d.net] = lexcnm[lex.d.net] = lex.l.net;
|
|
|
|
/* If etd has ALT table, use it; otherwise, use last element's ALT. */
|
|
if (curetd->etdsrm) {
|
|
if (curetd->etdsrm != SRMNULL && curetd->etdsrm[0] == NULL) {
|
|
/* Map hasn't been defined. Ignore it. */
|
|
sgmlerr(159, &pcbstag, curetd->etdgi + 1, (UNCH *)0);
|
|
curetd->etdsrm = 0;
|
|
tags[ts].tsrm = tags[ts-1].tsrm;
|
|
}
|
|
else
|
|
tags[ts].tsrm = curetd->etdsrm;
|
|
}
|
|
else
|
|
tags[ts].tsrm = tags[ts-1].tsrm;
|
|
|
|
/* Initialize rest of stack entry. */
|
|
tags[ts].status = 0;
|
|
tags[ts].tpos[0].g = 1; /* M: Index in model of next token to test.*/
|
|
tags[ts].tpos[0].t = 1; /* P: Index in tpos of current group. */
|
|
HITCLEAR(tags[ts].tpos[0].h);
|
|
tags[ts].tpos[1].g = 1; /* Index of group in model (dummy grp). */
|
|
tags[ts].tpos[1].t = 1; /* 1st token is next in grp to be tested. */
|
|
HITCLEAR(tags[ts].tpos[1].h); /* No hits yet as yet. */
|
|
TRACESTK(&tags[ts], ts, etictr);
|
|
|
|
exclude();
|
|
return;
|
|
}
|
|
/* ETAG: Check validity of an end-tag by seeing if it matches any tag
|
|
on the stack. If so, return the offset of the match from the
|
|
current entry (0=current). If there is no match, issue a message
|
|
and return an error code (-1).
|
|
If the newetd is ETDNET, a NET delimiter was found, so check for
|
|
a tag that ended with ETI instead of a matching GI.
|
|
*/
|
|
int etag()
|
|
{
|
|
int tsl = ts+1; /* Temporary stack level for looping. */
|
|
|
|
/* See if end-tag is anywhere on stack, starting at current entry. */
|
|
while (--tsl) {
|
|
if (newetd!=ETDNET ? newetd==tags[tsl].tetd : tags[tsl].tflags) {
|
|
TRACEETG(&tags[ts], newetd, tsl, ts-tsl);
|
|
return(ts-tsl);
|
|
}
|
|
}
|
|
return (-1); /* End-tag didn't match any start-tag. */
|
|
}
|
|
/* DESTACK:
|
|
Call ECONTEXT to see if element can be ended at this point.
|
|
and issue message if there are required tags left.
|
|
Remove the current entry from the tag stack.
|
|
Issue an error if the destacked element was not minimizable
|
|
and its end-tag was omitted.
|
|
*/
|
|
VOID destack()
|
|
{
|
|
register int ecode = 0; /* Error code (0=o.k.). */
|
|
UNCH *eparm2 = NULL; /* Second parameter of error message. */
|
|
register int minmsgsw; /* 1=message if tag omitted; 0=no message. */
|
|
|
|
/* If element has a content model (i.e., not a keyword) and there
|
|
are required tags left, and no CONREF attribute was specified,
|
|
issue an error message.
|
|
*/
|
|
if (!GET(tags[ts].tetd->etdmod->ttype, MKEYWORD)
|
|
&& !conrefsw
|
|
&& !econtext(tags[ts].tetd->etdmod, tags[ts].tpos, &tags[ts].status)) {
|
|
if (BADPTR(nextetd))
|
|
sgmlerr(54, conpcb, tags[ts].tetd->etdgi+1, (UNCH *)0);
|
|
else
|
|
sgmlerr(30, conpcb, tags[ts].tetd->etdgi+1, nextetd->etdgi+1);
|
|
}
|
|
/* If the current tag ended with ETI, decrement the etictr.
|
|
If etictr is now zero, disable the NET delimiter.
|
|
*/
|
|
if (GET(tags[ts--].tflags, TAGNET) && --etictr==0)
|
|
lexcon[lex.d.net] = lexcnm[lex.d.net] = lex.l.nonet;
|
|
|
|
minmsgsw = BITOFF(tags[ts+1].tetd->etdmin, EMO);
|
|
if (!conrefsw && minmsgsw && (etagimsw || etagmin==MINETAG)) {
|
|
/* Minimization caused by NET delimiter. */
|
|
if (BADPTR(etagreal)) ecode = 46;
|
|
/* Minimization caused by a containing end-tag. */
|
|
else {ecode = 20; eparm2 = etagreal->etdgi+1;}
|
|
}
|
|
else if (!conrefsw && etagmin==MINSTAG && (minmsgsw || ts<=0)) {
|
|
/* Minimization caused by out-of-context start-tag. */
|
|
if (!BADPTR(stagreal)) {
|
|
ecode = ts>0 ? 39 : 89;
|
|
eparm2 = stagreal->etdgi+1;
|
|
}
|
|
/* Minimization caused by out-of-context data. */
|
|
else if (stagreal==ETDCDATA) ecode = ts>0 ? 47 : 95;
|
|
/* Minimization caused by out-of-context short start-tag. */
|
|
else ecode = ts>0 ? 48 : 96;
|
|
if (ts<=0 && ecode) eodsw = 1;
|
|
}
|
|
if (ecode) sgmlerr((UNS)ecode, conpcb, tags[ts+1].tetd->etdgi+1, eparm2);
|
|
/* TEMP: See if parser bug caused stack to go below zero. */
|
|
else if (ts<0) {sgmlerr(64, conpcb, (UNCH *)0, (UNCH *)0); ts = 0;}
|
|
TRACEDSK(&tags[ts], &tags[ts+1], ts, etictr);
|
|
}
|
|
/*
|
|
Local Variables:
|
|
c-indent-level: 5
|
|
c-continued-statement-offset: 5
|
|
c-brace-offset: -5
|
|
c-argdecl-indent: 0
|
|
c-label-offset: -5
|
|
comment-column: 30
|
|
End:
|
|
*/
|