doc2sdl: use POSIX regex functions.
This commit is contained in:
@@ -1,438 +0,0 @@
|
||||
/*
|
||||
* CDE - Common Desktop Environment
|
||||
*
|
||||
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
|
||||
*
|
||||
* These libraries and programs are free software; you can
|
||||
* redistribute them and/or modify them under the terms of the GNU
|
||||
* Lesser General Public License as published by the Free Software
|
||||
* Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* These libraries and programs are distributed in the hope that
|
||||
* they will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with these libraries and programs; if not, write
|
||||
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
|
||||
* Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
/*
|
||||
* Copyright 1993 Open Software Foundation, Inc., Cambridge, Massachusetts.
|
||||
* All rights reserved.
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 1994
|
||||
* Open Software Foundation, Inc.
|
||||
*
|
||||
* Permission is hereby granted to use, copy, modify and freely distribute
|
||||
* the software in this file and its documentation for any purpose without
|
||||
* fee, provided that the above copyright notice appears in all copies and
|
||||
* that both the copyright notice and this permission notice appear in
|
||||
* supporting documentation. Further, provided that the name of Open
|
||||
* Software Foundation, Inc. ("OSF") not be used in advertising or
|
||||
* publicity pertaining to distribution of the software without prior
|
||||
* written permission from OSF. OSF makes no representations about the
|
||||
* suitability of this software for any purpose. It is provided "as is"
|
||||
* without express or implied warranty.
|
||||
*/
|
||||
/* ________________________________________________________________________
|
||||
*
|
||||
* Program to manipulate SGML instances.
|
||||
*
|
||||
* This module contains the initialization routines for translation module.
|
||||
* They mostly deal with reading data files (translation specs, SDATA
|
||||
* mappings, character mappings).
|
||||
*
|
||||
* Entry points:
|
||||
* ReadTransSpec(transfile) read/store translation spec from file
|
||||
* ReadSDATA(sdatafile) read/store SDATA mappings from file
|
||||
* ReadMapping(mapfile) read/store char mappings from file
|
||||
* ________________________________________________________________________
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char *RCSid =
|
||||
"$XConsortium: traninit.c /main/3 1996/06/19 17:13:22 drk $";
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#include <memory.h>
|
||||
#include <sys/types.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include <tptregexp.h>
|
||||
#include "general.h"
|
||||
#include "translate.h"
|
||||
|
||||
/* forward references */
|
||||
void RememberTransSpec(Trans_t *, int);
|
||||
|
||||
/* ______________________________________________________________________ */
|
||||
/* Read the translation specs from the input file, storing in memory.
|
||||
* Arguments:
|
||||
* Name of translation spec file.
|
||||
*/
|
||||
|
||||
void
|
||||
ReadTransSpec(
|
||||
char *transfile
|
||||
)
|
||||
{
|
||||
FILE *fp;
|
||||
char buf[LINESIZE], *cp, *fn, *cp2;
|
||||
int lineno=0, c, i;
|
||||
Trans_t T;
|
||||
int giLen;
|
||||
char *giBuf;
|
||||
|
||||
if ((fp=OpenFile(transfile)) == NULL) {
|
||||
fprintf(stderr, "Can not open translation spec file '%s'.\n%s\n",
|
||||
transfile, strerror(errno));
|
||||
return;
|
||||
}
|
||||
|
||||
memset(&T, 0, sizeof T); /* initialize/clear structure */
|
||||
while (fgets(buf, LINESIZE, fp)) /* read line from .ts file */
|
||||
{
|
||||
lineno++;
|
||||
/* skip comment and blank lines */
|
||||
if (buf[0] == '#' || buf[0] == NL) continue;
|
||||
|
||||
/* '-' indicates end of a spec. When we hit one, remember what we've
|
||||
* accumulated so far, and null-out the accumulating structure. */
|
||||
if (buf[0] == '-') {
|
||||
T.lineno = lineno;
|
||||
RememberTransSpec(&T, lineno);
|
||||
memset(&T, 0, sizeof T);
|
||||
continue;
|
||||
}
|
||||
|
||||
stripNL(buf);
|
||||
|
||||
/* See if next line is continued from this one -- ie. it starts with
|
||||
* whitespace. If so, append to current line. (This is similar to
|
||||
* how e-mail headers work...) */
|
||||
while (1) {
|
||||
c = getc(fp); /* 1st char of next line */
|
||||
if (IsWhite(c)) { /* space or tab? */
|
||||
/* keep getting characters until it's a non-whitespace */
|
||||
c = getc(fp);
|
||||
while (IsWhite(c)) c = getc(fp);
|
||||
ungetc(c, fp); /* put back non-whitespace */
|
||||
i = strlen(buf);
|
||||
fn = buf + i; /* point to end of string in buffer */
|
||||
fgets(fn, LINESIZE-i, fp); /* read and append to buf */
|
||||
lineno++;
|
||||
stripNL(buf);
|
||||
}
|
||||
else {
|
||||
ungetc(c, fp); /* put back non-whitespace */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Isolate field value */
|
||||
if ((cp=strchr(buf, ':'))) {
|
||||
cp++; /* point past colon */
|
||||
while (*cp && IsWhite(*cp)) cp++; /* point to content */
|
||||
}
|
||||
else {
|
||||
fprintf(stderr,
|
||||
"Trans spec error, missing colon (skipping line):\n %s\n", fn);
|
||||
continue;
|
||||
}
|
||||
fn = buf; /* fn is name of the field, cp the value. */
|
||||
|
||||
/* Check field names in order that they're likely to occur. */
|
||||
if (!strncmp("GI:", fn, 3)) {
|
||||
/* if we are folding the case of GIs, make all upper (unless
|
||||
it's an internal pseudo-GI name, which starts with '_') */
|
||||
if (fold_case && cp[0] != '_' && cp[0] != '#') {
|
||||
for (cp2=cp; *cp2; cp2++)
|
||||
if (islower(*cp2)) *cp2 = toupper(*cp2);
|
||||
}
|
||||
if (T.gi) {
|
||||
giLen = strlen(T.gi) + strlen(cp) + 2;
|
||||
Malloc(giLen, giBuf, char);
|
||||
strcpy(giBuf, T.gi);
|
||||
strcat(giBuf, " "); /* why we added "2" above */
|
||||
strcat(giBuf, cp);
|
||||
T.gi = AddElemName(giBuf);
|
||||
free(giBuf);
|
||||
} else {
|
||||
T.gi = AddElemName(cp);
|
||||
}
|
||||
}
|
||||
else if (!strncmp("StartText:", fn, 10)) T.starttext = strdup(cp);
|
||||
else if (!strncmp("EndText:", fn, 8)) T.endtext = strdup(cp);
|
||||
else if (!strncmp("Relation:", fn, 9)) {
|
||||
if (!T.relations) T.relations = NewMap(IMS_relations);
|
||||
SetMapping(T.relations, cp);
|
||||
}
|
||||
else if (!strncmp("StartCode:", fn, 10)) T.startcode = strdup(cp);
|
||||
else if (!strncmp("EndCode:", fn, 8)) T.endcode = strdup(cp);
|
||||
else if (!strncmp("Replace:", fn, 8)) T.replace = strdup(cp);
|
||||
else if (!strncmp("AttValue:", fn, 9)) {
|
||||
if (!T.nattpairs) {
|
||||
Malloc(1, T.attpair, AttPair_t);
|
||||
}
|
||||
else
|
||||
Realloc((T.nattpairs+1), T.attpair, AttPair_t);
|
||||
/* we'll split name/value pairs later */
|
||||
T.attpair[T.nattpairs].name = strdup(cp);
|
||||
T.nattpairs++;
|
||||
}
|
||||
/* If there's only one item in context, it's the parent. Treat
|
||||
* it specially, since it's easier to just check parent gi.
|
||||
*/
|
||||
else if (!strncmp("Context:", fn, 8)) T.context = strdup(cp);
|
||||
else if (!strncmp("Message:", fn, 8)) T.message = strdup(cp);
|
||||
else if (!strncmp("SpecID:", fn, 7)) T.my_id = atoi(cp);
|
||||
else if (!strncmp("Action:", fn, 7)) T.use_id = atoi(cp);
|
||||
else if (!strncmp("Content:", fn, 8)) T.content = strdup(cp);
|
||||
else if (!strncmp("PAttSet:", fn, 8)) T.pattrset = strdup(cp);
|
||||
else if (!strncmp("Ignore:", fn, 7)) {
|
||||
if (!strcmp(cp, "all")) T.ignore = IGN_ALL;
|
||||
else if (!strcmp(cp, "data")) T.ignore = IGN_DATA;
|
||||
else if (!strcmp(cp, "children")) T.ignore = IGN_CHILDREN;
|
||||
else
|
||||
fprintf(stderr, "Bad 'Ignore:' arg in transpec (line %d): %s\n",
|
||||
lineno, cp);
|
||||
}
|
||||
else if (!strncmp("VarValue:", fn, 9)) {
|
||||
char **tok;
|
||||
i = 2;
|
||||
tok = Split(cp, &i, S_STRDUP);
|
||||
T.var_name = tok[0];
|
||||
T.var_value = tok[1];
|
||||
}
|
||||
else if (!strncmp("Set:", fn, 4)) {
|
||||
if (!T.set_var) T.set_var = NewMap(IMS_setvar);
|
||||
SetMapping(T.set_var, cp);
|
||||
}
|
||||
else if (!strncmp("Increment:", fn, 10)) {
|
||||
if (!T.incr_var) T.incr_var = NewMap(IMS_incvar);
|
||||
SetMapping(T.incr_var, cp);
|
||||
}
|
||||
else if (!strncmp("NthChild:", fn, 9)) T.nth_child = atoi(cp);
|
||||
else if (!strncmp("Var:", fn, 4)) SetMapping(Variables, cp);
|
||||
else if (!strncmp("Quit:", fn, 5)) T.quit = strdup(cp);
|
||||
else
|
||||
fprintf(stderr, "Unknown translation spec (skipping it): %s\n", fn);
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
/* ______________________________________________________________________ */
|
||||
/* Store translation spec 't' in memory.
|
||||
* Arguments:
|
||||
* Pointer to translation spec to remember.
|
||||
* Line number where translation spec ends.
|
||||
*/
|
||||
void
|
||||
RememberTransSpec(
|
||||
Trans_t *t,
|
||||
int lineno
|
||||
)
|
||||
{
|
||||
char *cp;
|
||||
int i, do_regex;
|
||||
static Trans_t *last_t;
|
||||
|
||||
/* If context testing, check some details and set things up for later. */
|
||||
if (t->context) {
|
||||
/* See if the context specified is a regular expression.
|
||||
* If so, compile the reg expr. It is assumed to be a regex if
|
||||
* it contains a character other than what's allowed for GIs in the
|
||||
* OSF sgml declaration (alphas, nums, '-', and '.').
|
||||
*/
|
||||
for (do_regex=0,cp=t->context; *cp; cp++) {
|
||||
if (!isalnum(*cp) && *cp != '-' && *cp != '.' && *cp != ' ') {
|
||||
do_regex = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (do_regex) {
|
||||
t->depth = MAX_DEPTH;
|
||||
if (!(t->context_re=tpt_regcomp(t->context))) {
|
||||
fprintf(stderr, "Regex error in Context: %s\n", t->context);
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* If there's only one item in context, it's the parent. Treat
|
||||
* it specially, since it's faster to just check parent gi.
|
||||
*/
|
||||
cp = t->context;
|
||||
if (!strchr(cp, ' ')) {
|
||||
t->parent = t->context;
|
||||
t->context = NULL;
|
||||
}
|
||||
else {
|
||||
/* Figure out depth of context string */
|
||||
t->depth = 0;
|
||||
while (*cp) {
|
||||
if (*cp) t->depth++;
|
||||
while (*cp && !IsWhite(*cp)) cp++; /* find end of gi */
|
||||
while (*cp && IsWhite(*cp)) cp++; /* skip space */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Compile regular expressions for each attribute */
|
||||
for (i=0; i<t->nattpairs; i++) {
|
||||
/* Initially, name points to "name value". Split them... */
|
||||
cp = t->attpair[i].name;
|
||||
while (*cp && !IsWhite(*cp)) cp++; /* point past end of name */
|
||||
if (*cp) { /* value found */
|
||||
*cp++ = EOS; /* terminate name */
|
||||
while (*cp && IsWhite(*cp)) cp++; /* point to value */
|
||||
t->attpair[i].val = cp;
|
||||
}
|
||||
else { /* value not found */
|
||||
t->attpair[i].val = ".";
|
||||
}
|
||||
if (!(t->attpair[i].rex=tpt_regcomp(t->attpair[i].val))) {
|
||||
fprintf(stderr, "Regex error in AttValue: %s %s\n",
|
||||
t->attpair[i].name, t->attpair[i].val);
|
||||
}
|
||||
}
|
||||
|
||||
/* Compile regular expression for content */
|
||||
t->content_re = 0;
|
||||
if (t->content) {
|
||||
if (!(t->content_re=tpt_regcomp(t->content)))
|
||||
fprintf(stderr, "Regex error in Content: %s\n",
|
||||
t->content);
|
||||
}
|
||||
|
||||
/* If multiple GIs, break up into a vector, then remember it. We either
|
||||
* sture the individual, or the list - not both. */
|
||||
if (t->gi && strchr(t->gi, ' ')) {
|
||||
t->gilist = Split(t->gi, 0, S_ALVEC);
|
||||
t->gi = NULL;
|
||||
}
|
||||
|
||||
/* Now, store structure in linked list. */
|
||||
if (!TrSpecs) {
|
||||
Malloc(1, TrSpecs, Trans_t);
|
||||
last_t = TrSpecs;
|
||||
}
|
||||
else {
|
||||
Malloc(1, last_t->next, Trans_t);
|
||||
last_t = last_t->next;
|
||||
}
|
||||
*last_t = *t;
|
||||
}
|
||||
|
||||
|
||||
/* ______________________________________________________________________ */
|
||||
/* Read mapping file, filling in structure slots (just name-value pairs).
|
||||
* Arguments:
|
||||
* Name of character mapping file.
|
||||
*/
|
||||
|
||||
void
|
||||
ReadCharMap(
|
||||
char *filename
|
||||
)
|
||||
{
|
||||
FILE *fp;
|
||||
char buf[LINESIZE], *name, *val;
|
||||
int lineno=0;
|
||||
int n_alloc=0; /* number of slots allocated so far */
|
||||
|
||||
if ((fp=OpenFile(filename)) == NULL) {
|
||||
fprintf(stderr, "Can not open character mapping file '%s'.\n%s\n",
|
||||
filename, strerror(errno));
|
||||
return;
|
||||
}
|
||||
|
||||
/* We allocate slots in blocks of N, so we don't have to call
|
||||
* malloc so many times. */
|
||||
n_alloc = 32;
|
||||
Calloc(n_alloc, CharMap, Mapping_t);
|
||||
|
||||
nCharMap = 0;
|
||||
while (fgets(buf, LINESIZE, fp))
|
||||
{
|
||||
lineno++;
|
||||
/* skip comment and blank lines */
|
||||
if (buf[0] == '#' || buf[0] == NL) continue;
|
||||
stripNL(buf);
|
||||
|
||||
/* Need more slots for mapping structures? */
|
||||
if (nCharMap >= n_alloc) {
|
||||
n_alloc += 32;
|
||||
Realloc(n_alloc, CharMap, Mapping_t);
|
||||
}
|
||||
name = val = buf;
|
||||
while (*val && !IsWhite(*val)) val++; /* point past end of name */
|
||||
if (*val) {
|
||||
*val++ = EOS; /* terminate name */
|
||||
while (*val && IsWhite(*val)) val++; /* point to value */
|
||||
}
|
||||
if (name) {
|
||||
CharMap[nCharMap].name = strdup(name);
|
||||
if (val) CharMap[nCharMap].sval = strdup(val);
|
||||
if (CharMap[nCharMap].name[0] == '\\') CharMap[nCharMap].name++;
|
||||
nCharMap++;
|
||||
}
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
/* ______________________________________________________________________ */
|
||||
/* Read SDATA mapping file, remembering the mappings in memory.
|
||||
* Input file format is 2 columns, name and value, separated by one or
|
||||
* more tabs (not spaces).
|
||||
* This can be called multuple times, reading several files.
|
||||
* Arguments:
|
||||
* Name of SDATA entity mapping file.
|
||||
*/
|
||||
|
||||
void
|
||||
ReadSDATA(
|
||||
char *filename
|
||||
)
|
||||
{
|
||||
FILE *fp;
|
||||
char buf[LINESIZE], *name, *val;
|
||||
int lineno=0;
|
||||
|
||||
if ((fp=OpenFile(filename)) == NULL) {
|
||||
fprintf(stderr, "Can not open SDATA file '%s': %s", filename,
|
||||
strerror(errno));
|
||||
return;
|
||||
}
|
||||
|
||||
if (!SDATAmap) SDATAmap = NewMap(IMS_sdata);
|
||||
|
||||
while (fgets(buf, LINESIZE, fp))
|
||||
{
|
||||
lineno++;
|
||||
/* skip comment and blank lines */
|
||||
if (buf[0] == '#' || buf[0] == NL) continue;
|
||||
stripNL(buf);
|
||||
|
||||
name = val = buf;
|
||||
while (*val && *val != TAB) val++; /* point past end of name */
|
||||
if (*val) {
|
||||
*val++ = EOS; /* terminate name */
|
||||
while (*val && *val == TAB) val++; /* point to value */
|
||||
}
|
||||
|
||||
SetMappingNV(SDATAmap, name, val);
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
/* ______________________________________________________________________ */
|
||||
Reference in New Issue
Block a user