Initial import of the CDE 2.1.30 sources from the Open Group.
This commit is contained in:
468
cde/programs/nsgmls/parseCommon.C
Normal file
468
cde/programs/nsgmls/parseCommon.C
Normal file
@@ -0,0 +1,468 @@
|
||||
/* $XConsortium: parseCommon.C /main/1 1996/07/29 17:09:12 cde-hp $ */
|
||||
// Copyright (c) 1994 James Clark
|
||||
// See the file COPYING for copying permission.
|
||||
|
||||
#include "splib.h"
|
||||
#include "Parser.h"
|
||||
#include "token.h"
|
||||
#include "MessageArg.h"
|
||||
#include "ParserMessages.h"
|
||||
#include "constant.h"
|
||||
#include "NumericCharRefOrigin.h"
|
||||
|
||||
#ifdef SP_NAMESPACE
|
||||
namespace SP_NAMESPACE {
|
||||
#endif
|
||||
|
||||
Boolean Parser::parseProcessingInstruction()
|
||||
{
|
||||
currentInput()->startToken();
|
||||
Location location(currentLocation());
|
||||
StringC buf;
|
||||
for (;;) {
|
||||
Token token = getToken(piMode);
|
||||
if (token == tokenPic)
|
||||
break;
|
||||
switch (token) {
|
||||
case tokenEe:
|
||||
message(ParserMessages::processingInstructionEntityEnd);
|
||||
return 0;
|
||||
case tokenUnrecognized:
|
||||
reportNonSgmlCharacter();
|
||||
// fall through
|
||||
case tokenChar:
|
||||
buf += *currentInput()->currentTokenStart();
|
||||
if (buf.size()/2 > syntax().pilen()) {
|
||||
message(ParserMessages::processingInstructionLength,
|
||||
NumberMessageArg(syntax().pilen()));
|
||||
message(ParserMessages::processingInstructionClose);
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (buf.size() > syntax().pilen())
|
||||
message(ParserMessages::processingInstructionLength,
|
||||
NumberMessageArg(syntax().pilen()));
|
||||
noteMarkup();
|
||||
eventHandler().pi(new (eventAllocator()) ImmediatePiEvent(buf, location));
|
||||
return 1;
|
||||
}
|
||||
|
||||
Boolean Parser::parseLiteral(Mode litMode,
|
||||
Mode liteMode,
|
||||
size_t maxLength,
|
||||
const MessageType1 &tooLongMessage,
|
||||
unsigned flags,
|
||||
Text &text)
|
||||
{
|
||||
unsigned startLevel = inputLevel();
|
||||
Mode currentMode = litMode;
|
||||
// If the literal gets to be longer than this, then we assume
|
||||
// that the closing delimiter has been omitted if we're at the end
|
||||
// of a line and at the starting input level.
|
||||
size_t reallyMaxLength = (maxLength > size_t(-1)/2
|
||||
? size_t(-1)
|
||||
: maxLength * 2);
|
||||
text.clear();
|
||||
Location startLoc(currentLocation());
|
||||
if (flags & literalDelimInfo)
|
||||
text.addStartDelim(currentLocation());
|
||||
for (;;) {
|
||||
int done = 0;
|
||||
Token token = getToken(currentMode);
|
||||
switch (token) {
|
||||
case tokenEe:
|
||||
if (inputLevel() == startLevel) {
|
||||
message(ParserMessages::literalLevel);
|
||||
return 0;
|
||||
}
|
||||
text.addEntityEnd(currentLocation());
|
||||
popInputStack();
|
||||
if (inputLevel() == startLevel)
|
||||
currentMode = litMode;
|
||||
break;
|
||||
case tokenUnrecognized:
|
||||
if (reportNonSgmlCharacter())
|
||||
break;
|
||||
message(ParserMessages::literalMinimumData,
|
||||
StringMessageArg(currentToken()));
|
||||
break;
|
||||
case tokenRs:
|
||||
text.ignoreChar(currentChar(), currentLocation());
|
||||
break;
|
||||
case tokenRe:
|
||||
if (text.size() > reallyMaxLength && inputLevel() == startLevel) {
|
||||
#if 0
|
||||
message(tooLongMessage, NumberMessageArg(maxLength));
|
||||
#endif
|
||||
// guess that the closing delimiter has been omitted
|
||||
Messenger::setNextLocation(startLoc);
|
||||
message(ParserMessages::literalClosingDelimiter);
|
||||
return 0;
|
||||
}
|
||||
// fall through
|
||||
case tokenSepchar:
|
||||
if ((flags & literalSingleSpace)
|
||||
&& (text.size() == 0 || text.lastChar() == syntax().space()))
|
||||
text.ignoreChar(currentChar(), currentLocation());
|
||||
else
|
||||
text.addChar(syntax().space(),
|
||||
Location(new ReplacementOrigin(currentLocation(),
|
||||
currentChar()),
|
||||
0));
|
||||
break;
|
||||
case tokenSpace:
|
||||
if ((flags & literalSingleSpace)
|
||||
&& (text.size() == 0 || text.lastChar() == syntax().space()))
|
||||
text.ignoreChar(currentChar(), currentLocation());
|
||||
else
|
||||
text.addChar(currentChar(), currentLocation());
|
||||
break;
|
||||
case tokenCroDigit:
|
||||
{
|
||||
Char c;
|
||||
Location loc;
|
||||
if (!parseNumericCharRef(c, loc))
|
||||
return 0;
|
||||
if (flags & literalDataTag) {
|
||||
if (!syntax().isSgmlChar(c))
|
||||
message(ParserMessages::dataTagPatternNonSgml);
|
||||
else if (syntax().charSet(Syntax::functionChar)->contains(c))
|
||||
message(ParserMessages::dataTagPatternFunction);
|
||||
}
|
||||
if ((flags & literalSingleSpace)
|
||||
&& c == syntax().space()
|
||||
&& (text.size() == 0 || text.lastChar() == syntax().space()))
|
||||
text.ignoreChar(c, loc);
|
||||
else
|
||||
text.addChar(c, loc);
|
||||
}
|
||||
break;
|
||||
case tokenCroNameStart:
|
||||
if (!parseNamedCharRef())
|
||||
return 0;
|
||||
break;
|
||||
case tokenEroGrpo:
|
||||
message(inInstance() ? ParserMessages::eroGrpoStartTag : ParserMessages::eroGrpoProlog);
|
||||
break;
|
||||
case tokenLit:
|
||||
case tokenLita:
|
||||
if (flags & literalDelimInfo)
|
||||
text.addEndDelim(currentLocation(), token == tokenLita);
|
||||
done = 1;
|
||||
break;
|
||||
case tokenEroNameStart:
|
||||
case tokenPeroNameStart:
|
||||
{
|
||||
ConstPtr<Entity> entity;
|
||||
Ptr<EntityOrigin> origin;
|
||||
if (!parseEntityReference(token == tokenPeroNameStart,
|
||||
(flags & literalNoProcess) ? 2 : 0,
|
||||
entity, origin))
|
||||
return 0;
|
||||
if (!entity.isNull())
|
||||
entity->litReference(text, *this, origin,
|
||||
(flags & literalSingleSpace) != 0);
|
||||
if (inputLevel() > startLevel)
|
||||
currentMode = liteMode;
|
||||
}
|
||||
break;
|
||||
case tokenPeroGrpo:
|
||||
message(ParserMessages::peroGrpoProlog);
|
||||
break;
|
||||
case tokenChar:
|
||||
if (text.size() > reallyMaxLength && inputLevel() == startLevel
|
||||
&& currentChar() == syntax().standardFunction(Syntax::fRE)) {
|
||||
#if 0
|
||||
message(tooLongMessage, NumberMessageArg(maxLength));
|
||||
#endif
|
||||
// guess that the closing delimiter has been omitted
|
||||
Messenger::setNextLocation(startLoc);
|
||||
message(ParserMessages::literalClosingDelimiter);
|
||||
return 0;
|
||||
}
|
||||
text.addChar(currentChar(), currentLocation());
|
||||
break;
|
||||
}
|
||||
if (done) break;
|
||||
}
|
||||
if ((flags & literalSingleSpace)
|
||||
&& text.size() > 0
|
||||
&& text.lastChar() == syntax().space())
|
||||
text.ignoreLastChar();
|
||||
if (text.size() > maxLength) {
|
||||
switch (litMode) {
|
||||
case alitMode:
|
||||
case alitaMode:
|
||||
case talitMode:
|
||||
case talitaMode:
|
||||
if (AttributeValue::handleAsUnterminated(text, *this))
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
message(tooLongMessage, NumberMessageArg(maxLength));
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
Boolean Parser::parseNamedCharRef()
|
||||
{
|
||||
InputSource *in = currentInput();
|
||||
Index startIndex = currentLocation().index();
|
||||
in->discardInitial();
|
||||
extendNameToken(syntax().namelen(), ParserMessages::nameLength);
|
||||
Char c;
|
||||
Boolean valid;
|
||||
StringC name;
|
||||
getCurrentToken(syntax().generalSubstTable(), name);
|
||||
if (!syntax().lookupFunctionChar(name, &c)) {
|
||||
message(ParserMessages::functionName, StringMessageArg(name));
|
||||
valid = 0;
|
||||
}
|
||||
else {
|
||||
valid = 1;
|
||||
if (wantMarkup())
|
||||
getCurrentToken(name); // the original name
|
||||
}
|
||||
NamedCharRef::RefEndType refEndType;
|
||||
switch (getToken(refMode)) {
|
||||
case tokenRefc:
|
||||
refEndType = NamedCharRef::endRefc;
|
||||
break;
|
||||
case tokenRe:
|
||||
refEndType = NamedCharRef::endRE;
|
||||
break;
|
||||
default:
|
||||
refEndType = NamedCharRef::endOmitted;
|
||||
break;
|
||||
}
|
||||
in->startToken();
|
||||
if (valid)
|
||||
in->pushCharRef(c, NamedCharRef(startIndex, refEndType, name));
|
||||
return 1;
|
||||
}
|
||||
|
||||
Boolean Parser::parseNumericCharRef(Char &ch, Location &loc)
|
||||
{
|
||||
InputSource *in = currentInput();
|
||||
Location startLocation = currentLocation();
|
||||
in->discardInitial();
|
||||
extendNumber(syntax().namelen(), ParserMessages::numberLength);
|
||||
Boolean valid = 1;
|
||||
Char c = 0;
|
||||
const Char *lim = in->currentTokenEnd();
|
||||
for (const Char *p = in->currentTokenStart(); p < lim; p++) {
|
||||
int val = sd().digitWeight(*p);
|
||||
if (c <= charMax/10 && (c *= 10) <= charMax - val)
|
||||
c += val;
|
||||
else {
|
||||
message(ParserMessages::characterNumber, StringMessageArg(currentToken()));
|
||||
valid = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (valid && !sd().docCharsetDecl().charDeclared(c)) {
|
||||
valid = 0;
|
||||
message(ParserMessages::characterNumber, StringMessageArg(currentToken()));
|
||||
}
|
||||
Owner<Markup> markupPtr;
|
||||
if (wantMarkup()) {
|
||||
markupPtr = new Markup;
|
||||
markupPtr->addDelim(Syntax::dCRO);
|
||||
markupPtr->addNumber(in);
|
||||
switch (getToken(refMode)) {
|
||||
case tokenRefc:
|
||||
markupPtr->addDelim(Syntax::dREFC);
|
||||
break;
|
||||
case tokenRe:
|
||||
markupPtr->addRefEndRe();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
(void)getToken(refMode);
|
||||
if (valid) {
|
||||
ch = c;
|
||||
loc = Location(new NumericCharRefOrigin(startLocation,
|
||||
currentLocation().index()
|
||||
+ currentInput()->currentTokenLength()
|
||||
- startLocation.index(),
|
||||
markupPtr),
|
||||
0);
|
||||
}
|
||||
return valid;
|
||||
}
|
||||
|
||||
// ignoreLevel: 0 means don't ignore;
|
||||
// 1 means parse name group and ignore if inactive
|
||||
// 2 means ignore
|
||||
|
||||
Boolean Parser::parseEntityReference(Boolean isParameter,
|
||||
int ignoreLevel,
|
||||
ConstPtr<Entity> &entity,
|
||||
Ptr<EntityOrigin> &origin)
|
||||
{
|
||||
InputSource *in = currentInput();
|
||||
Location startLocation(in->currentLocation());
|
||||
Owner<Markup> markupPtr;
|
||||
if (wantMarkup()) {
|
||||
markupPtr = new Markup;
|
||||
markupPtr->addDelim(isParameter ? Syntax::dPERO : Syntax::dERO);
|
||||
}
|
||||
if (ignoreLevel == 1) {
|
||||
Markup savedMarkup;
|
||||
Markup *savedCurrentMarkup = currentMarkup();
|
||||
if (savedCurrentMarkup)
|
||||
savedCurrentMarkup->swap(savedMarkup);
|
||||
Location savedMarkupLocation(markupLocation());
|
||||
startMarkup(markupPtr != 0, startLocation);
|
||||
if (markupPtr) {
|
||||
markupPtr->addDelim(Syntax::dGRPO);
|
||||
markupPtr->swap(*currentMarkup());
|
||||
}
|
||||
Boolean ignore;
|
||||
if (!parseEntityReferenceNameGroup(ignore))
|
||||
return 0;
|
||||
if (markupPtr)
|
||||
currentMarkup()->swap(*markupPtr);
|
||||
startMarkup(savedCurrentMarkup != 0, savedMarkupLocation);
|
||||
if (savedCurrentMarkup)
|
||||
savedMarkup.swap(*currentMarkup());
|
||||
if (!ignore)
|
||||
ignoreLevel = 0;
|
||||
in->startToken();
|
||||
Xchar c = in->tokenChar(messenger());
|
||||
if (!syntax().isNameStartCharacter(c)) {
|
||||
message(ParserMessages::entityReferenceMissingName);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
in->discardInitial();
|
||||
if (isParameter)
|
||||
extendNameToken(syntax().penamelen(), ParserMessages::parameterEntityNameLength);
|
||||
else
|
||||
extendNameToken(syntax().namelen(), ParserMessages::nameLength);
|
||||
StringC &name = nameBuffer();
|
||||
getCurrentToken(syntax().entitySubstTable(), name);
|
||||
if (ignoreLevel)
|
||||
entity = new IgnoredEntity(name,
|
||||
isParameter
|
||||
? Entity::parameterEntity
|
||||
: Entity::generalEntity);
|
||||
else {
|
||||
entity = lookupEntity(isParameter, name, startLocation, 1);
|
||||
if (entity.isNull()) {
|
||||
if (haveApplicableDtd())
|
||||
message(isParameter
|
||||
? ParserMessages::parameterEntityUndefined
|
||||
: ParserMessages::entityUndefined,
|
||||
StringMessageArg(name));
|
||||
else
|
||||
message(ParserMessages::entityApplicableDtd);
|
||||
}
|
||||
else if (entity->defaulted() && options().warnDefaultEntityReference)
|
||||
message(ParserMessages::defaultEntityReference, StringMessageArg(name));
|
||||
}
|
||||
if (markupPtr) {
|
||||
markupPtr->addName(in);
|
||||
switch (getToken(refMode)) {
|
||||
case tokenRefc:
|
||||
markupPtr->addDelim(Syntax::dREFC);
|
||||
break;
|
||||
case tokenRe:
|
||||
markupPtr->addRefEndRe();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
(void)getToken(refMode);
|
||||
if (!entity.isNull())
|
||||
origin = new (internalAllocator())
|
||||
EntityOrigin(entity, startLocation,
|
||||
currentLocation().index()
|
||||
+ currentInput()->currentTokenLength()
|
||||
- startLocation.index(),
|
||||
markupPtr);
|
||||
else
|
||||
origin = (EntityOrigin *)0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
Boolean Parser::parseComment(Mode mode)
|
||||
{
|
||||
Location startLoc(currentLocation());
|
||||
Markup *markup = currentMarkup();
|
||||
if (markup)
|
||||
markup->addCommentStart();
|
||||
Token token;
|
||||
while ((token = getToken(mode)) != tokenCom)
|
||||
switch (token) {
|
||||
case tokenUnrecognized:
|
||||
if (!reportNonSgmlCharacter())
|
||||
message(ParserMessages::sdCommentSignificant,
|
||||
StringMessageArg(currentToken()));
|
||||
break;
|
||||
case tokenEe:
|
||||
message(ParserMessages::commentEntityEnd, startLoc);
|
||||
return 0;
|
||||
default:
|
||||
if (markup)
|
||||
markup->addCommentChar(currentChar());
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
void Parser::extendNameToken(size_t maxLength,
|
||||
const MessageType1 &tooLongMessage)
|
||||
{
|
||||
InputSource *in = currentInput();
|
||||
size_t length = in->currentTokenLength();
|
||||
const Syntax &syn = syntax();
|
||||
while (syn.isNameCharacter(in->tokenChar(messenger())))
|
||||
length++;
|
||||
if (length > maxLength)
|
||||
message(tooLongMessage, NumberMessageArg(maxLength));
|
||||
in->endToken(length);
|
||||
}
|
||||
|
||||
|
||||
void Parser::extendNumber(size_t maxLength, const MessageType1 &tooLongMessage)
|
||||
{
|
||||
InputSource *in = currentInput();
|
||||
size_t length = in->currentTokenLength();
|
||||
while (syntax().isDigit(in->tokenChar(messenger())))
|
||||
length++;
|
||||
if (length > maxLength)
|
||||
message(tooLongMessage, NumberMessageArg(maxLength));
|
||||
in->endToken(length);
|
||||
}
|
||||
|
||||
Boolean Parser::reportNonSgmlCharacter()
|
||||
{
|
||||
Char c = getChar();
|
||||
if (!syntax().isSgmlChar(c)) {
|
||||
message(ParserMessages::nonSgmlCharacter, NumberMessageArg(c));
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void Parser::extendS()
|
||||
{
|
||||
InputSource *in = currentInput();
|
||||
size_t length = in->currentTokenLength();
|
||||
while (syntax().isS(in->tokenChar(messenger())))
|
||||
length++;
|
||||
in->endToken(length);
|
||||
}
|
||||
|
||||
#ifdef SP_NAMESPACE
|
||||
}
|
||||
#endif
|
||||
Reference in New Issue
Block a user