Initial import of the CDE 2.1.30 sources from the Open Group.
This commit is contained in:
224
cde/programs/dthelp/parser/pass2/util/context.c
Normal file
224
cde/programs/dthelp/parser/pass2/util/context.c
Normal file
@@ -0,0 +1,224 @@
|
||||
/* $XConsortium: context.c /main/3 1995/11/08 11:03:02 rswiston $ */
|
||||
/*
|
||||
Copyright 1986 Tandem Computers Incorporated.
|
||||
This product and information is proprietary of Tandem Computers Incorporated.
|
||||
Copyright (c) 1986, 1987, 1988, 1989 Hewlett-Packard Co.
|
||||
*/
|
||||
|
||||
/*
|
||||
Context.c contains the main procedure for program CONTEXT.
|
||||
|
||||
CONTEXT builds scanning and parsing tables for a simple
|
||||
state-transition recognizer. Input consists of two files, delimiter
|
||||
declarations in file "delim.dat" and state tables with corresponding
|
||||
actions in file "context.dat". If a parameter is present on the
|
||||
command line, a sparse matrix technique is used to encode some of the
|
||||
output arrays.
|
||||
|
||||
Data lines in "delim.dat" contain the name of the delimiter followed
|
||||
by the text of the delimiter (one or more non-white space
|
||||
characters). The delimiter name must begin in the first column.
|
||||
Lines with white-space in the first column are considered comments.
|
||||
|
||||
Data in "context.dat" consists of free-form multi-line declarations.
|
||||
Each declaration is terminated by a semi-colon. It begins with the
|
||||
name of a token followed by any number of pairs of contexts (parser
|
||||
states). The context pairs are separated by a colon and may
|
||||
optionally be followed by some C code enclosed in braces. In each
|
||||
pair, the first context is one in which the delimiter is recognized,
|
||||
the second is the state that will result from an occurrence of the
|
||||
delimiter in the first context. If code is specified, it is to be
|
||||
executed by the parser when the delimiter occurs in the first context
|
||||
of the pair. If successive pairs of contexts are separated by a
|
||||
comma, code which follows is common to all the pairs.
|
||||
|
||||
The tokens specified in "context.dat" can either be delimiters defined in
|
||||
"delim.dat" or other tokens (e.g., number, name, end_of_file) that will
|
||||
be recognized by the user-defined scanner.
|
||||
|
||||
For example, suppose "delim.dat" contains the line
|
||||
|
||||
grpo (
|
||||
|
||||
and "context.dat" includes
|
||||
|
||||
grpo element:namegroup
|
||||
content:model {openmod() ;}
|
||||
excon:openex
|
||||
pcon:type ;
|
||||
|
||||
name namegroup:namesep {eltname() ;}
|
||||
model:connector,
|
||||
model2:connector
|
||||
{storemod(nstring) ;}
|
||||
|
||||
These declarations specify that the "grpo" delimiter is "(" and is
|
||||
recognized in contexts "element", "content", "excon", and "pcon".
|
||||
Procedure openmod() is called when "grpo" occurs in "content" and,
|
||||
beyond the state change, no code is executed in other contexts when
|
||||
"grpo" occurs. The "name" token is valid in contexts "namegroup",
|
||||
"model", and "model2"; in the first, the context is set to "namesep"
|
||||
and procedure eltname() is called, in the last two, the context is
|
||||
set to "connector" and storemod(nstring) is called.
|
||||
|
||||
The program has several output files: case.c, delim.h, context.h, and
|
||||
error. Case.c contains nested switch statements to execute the code
|
||||
specified with the context transitions. Delim.h contains C
|
||||
declarations related to delimiters; context.h has declarations
|
||||
related to contexts. Programs that include delim.h should define
|
||||
M_DELIMDEF for definition form of the declarations and not define it
|
||||
for the declaration form. Programs that include context.h should
|
||||
define M_CONDEF analogously.
|
||||
|
||||
The program numbers the delimiter names and the contexts as
|
||||
encountered and outputs corresponding #define instructions with all
|
||||
letters in the names forced to uppercase to delim.h and context.h.
|
||||
In addition, an array named for each delimiter (with all letters
|
||||
forced to lowercase) is initialized to the delimiter text. The first
|
||||
declaration in the above example, for instance, would result in the
|
||||
following lines written to delim.h:
|
||||
|
||||
#define GRPO 1
|
||||
M_DELIMEXTERN char grpo[] M_DELIMINIT("(") ;
|
||||
|
||||
These lines would be written to context.h:
|
||||
|
||||
#define ELEMENT 1
|
||||
#define NAMEGROUP 2
|
||||
#define CONTENT 3
|
||||
#define MODEL 4
|
||||
#define EXCON 5
|
||||
#define OPENEX 6
|
||||
#define PCON 7
|
||||
#define TYPE 8
|
||||
#define NAMESEP 9
|
||||
#define CONNECTOR 10
|
||||
#define MODEL2 11
|
||||
|
||||
Delim.h also contains an array, m_dlmptr, which contains pointers to the
|
||||
names of all the defined delimiters. In addition, delim.h contains the
|
||||
declaration of a tree representation of the delimiters allowed in each
|
||||
context. Internally, the program represents the delimiter strings with a
|
||||
tree. The children of the root of this tree represent the set of characters
|
||||
with which delimiters can start. Children of other nodes represent
|
||||
characters that can follow the character of their parent. Leaf nodes
|
||||
are associated with the null character, indicating the end of a
|
||||
delimiter. A path from the root to a leaf thus represents a valid
|
||||
delimiter.
|
||||
|
||||
The form in which the tree is represented within this program is not
|
||||
quite the same as the output data structure. In the latter, each
|
||||
element consists of a flag called 'more' indicating whether it is the last
|
||||
child of its parent; a character, called 'symbol'; and an integer,
|
||||
called 'index'. All sons of a node are stored in adjacent elements
|
||||
ordered in increaing value of their 'symbol' fields. 'Symbol'
|
||||
contains the character associated with the node (0 if the node is a
|
||||
leaf node), with the sign bit turned on if the node is the last child
|
||||
of its parent. 'Index' contains the array index of the first son of
|
||||
the node, if the node is not a leaf. If the node is a leaf, 'index'
|
||||
is the number of the represented delimiter (i.e., 1 for "grpo" in the
|
||||
above example).
|
||||
|
||||
As the tree is constructed, it is not feasible to store brothers in adjacent
|
||||
elements, so the internal data structure is different. It contains a 'next'
|
||||
field which points to the first brother of a given node. Because the 'next'
|
||||
field can be used to detect the last child of a parent, it is not necessary
|
||||
to use the sign bit of 'symbol' to indicate this special case.
|
||||
|
||||
Two other arrays are written to context.h. Contree[i] points to the root
|
||||
of the delimiter tree for the ith context within the array tree whose
|
||||
declaration is in delim.h; nextcon[i][j] indicates the state
|
||||
resulting from an occurrence of delimiter j+1 in state i+1. Finally,
|
||||
a #define of MAXD to the length of the longest delimiter is written to
|
||||
delim.h.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <malloc.h>
|
||||
#if defined(MSDOS)
|
||||
#include <process.h>
|
||||
#endif
|
||||
#include "basic.h"
|
||||
#include "trie.h"
|
||||
|
||||
#include "common.h"
|
||||
#include "chartype.h"
|
||||
#define CONTDEF 1
|
||||
#include "cont.h"
|
||||
|
||||
/* Main procedure */
|
||||
void main(argc, argv)
|
||||
int argc ;
|
||||
char **argv ;
|
||||
{
|
||||
int n ;
|
||||
|
||||
m_openchk(&delim, "delim.h", "w") ;
|
||||
m_openchk(&context, "context.h", "w") ;
|
||||
m_openchk(&fcase, "case.c", "w") ;
|
||||
m_openchk(&cdat, "context.dat", "r") ;
|
||||
m_openchk(&ddat, "delim.dat", "r") ;
|
||||
m_openchk(&m_errfile, "error", "w") ;
|
||||
fputs(" switch(m_token) {\n", fcase) ; /* keep the "}" balanced */
|
||||
|
||||
fputs("#if defined(M_DELIMDEF)\n", delim) ;
|
||||
fputs("#define M_DELIMEXTERN\n", delim) ;
|
||||
fputs("#define M_DELIMINIT(a) = a\n", delim) ;
|
||||
fputs("#else\n", delim) ;
|
||||
fputs("#define M_DELIMEXTERN extern\n", delim) ;
|
||||
fputs("#define M_DELIMINIT(a)\n", delim) ;
|
||||
fputs("#endif\n\n", delim) ;
|
||||
|
||||
fputs("#if defined(M_CONDEF)\n", context) ;
|
||||
fputs("#define M_CONEXTERN\n", context) ;
|
||||
fputs("#else\n", context) ;
|
||||
fputs("#define M_CONEXTERN extern\n", context) ;
|
||||
fputs("#endif\n\n", context) ;
|
||||
|
||||
contree = (M_TRIE **) calloc(NUMCON, sizeof(M_TRIE *)) ;
|
||||
xtransit = (int *) calloc(NUMCON * NUMDELIM, sizeof(int)) ;
|
||||
contexts = (M_WCHAR **) calloc(NUMCON, sizeof(char *)) ;
|
||||
dlmptr = (char**) calloc(NUMDELIM, sizeof(char*)) ;
|
||||
|
||||
loaddelim() ;
|
||||
while (getdname())
|
||||
{
|
||||
casestarted = FALSE ;
|
||||
while ((n = getContext()) >= 0)
|
||||
{
|
||||
if (withdelim) enterdelim(n) ;
|
||||
getcolon() ;
|
||||
if (transit(n, curdelim))
|
||||
{
|
||||
if (! withdelim)
|
||||
{
|
||||
char *mb_dname, *mb_contexts;
|
||||
|
||||
mb_dname = MakeMByteString(dname);
|
||||
mb_contexts = MakeMByteString(contexts[n]);
|
||||
warning2(
|
||||
"Duplicate assignment to token \"%s\" in context \"%s\"",
|
||||
mb_dname,
|
||||
mb_contexts) ;
|
||||
m_free(mb_dname,"multi-byte string");
|
||||
m_free(mb_contexts,"multi-byte string");
|
||||
}
|
||||
}
|
||||
transit(n, curdelim) = getContext() + 1 ;
|
||||
getcode(n) ;
|
||||
}
|
||||
if (casestarted)
|
||||
{
|
||||
fprintf(fcase, " default:\n break ;\n") ;
|
||||
/* keep the "{" balanced */
|
||||
fprintf(fcase, " }\n break ;\n") ;
|
||||
}
|
||||
}
|
||||
/* keep the "{" balanced */
|
||||
fprintf(fcase, " default:\n break ;\n }\n") ;
|
||||
fprintf(context, "#define MAXD %d\n", maxd) ;
|
||||
dumptree((LOGICAL) (argc > 1)) ;
|
||||
fprintf(stderr, "NUMCON set to %d\n", NUMCON) ;
|
||||
fprintf(stderr, "NUMDELIM set to %d\n", NUMDELIM) ;
|
||||
exit(errexit) ;
|
||||
}
|
||||
Reference in New Issue
Block a user