Initial import of the CDE 2.1.30 sources from the Open Group.

This commit is contained in:
Peter Howkins
2012-03-10 18:21:40 +00:00
commit 83b6996daa
18978 changed files with 3945623 additions and 0 deletions

View File

@@ -0,0 +1,82 @@
# $XConsortium: Imakefile /main/12 1996/10/31 18:52:33 cde-ibm $
#
# (c) Copyright 1996 Digital Equipment Corporation.
# (c) Copyright 1996 Hewlett-Packard Company.
# (c) Copyright 1996 International Business Machines Corp.
# (c) Copyright 1996 Sun Microsystems, Inc.
# (c) Copyright 1996 Novell, Inc.
# (c) Copyright 1996 FUJITSU LIMITED.
# (c) Copyright 1996 Hitachi.
#
INCLUDES = -I. -I$(DTSEARCHSRC) -I$(DTSEARCHSRC)/raima
DEFINES = -DMAIN_PROGRAM
#ifdef AlphaArchitecture
EXTRA_DEFINES = -DBYTE_SWAP -D_XOPEN_SOURCE -D_OSF_SOURCE
#endif
#ifdef USLArchitecture
EXTRA_DEFINES = -DBYTE_SWAP -D_XOPEN_SOURCE
#endif
#ifdef AIXArchitecture
BYTE_SWAP_LIB = -lisode
#else
BYTE_SWAP_LIB =
#endif
DEP_LIBS = $(DEPXLIB)
LOCAL_LIBRARIES = $(DTSEARCHLIB)
SYS_LIBRARIES = -lc -lm
PROGRAMS = $(PROGRAM1) $(PROGRAM2) $(PROGRAM3) $(PROGRAM4) $(PROGRAM5) \
$(PROGRAM6) $(PROGRAM7) $(PROGRAM9) $(PROGRAM10)
PROGRAM1 = dtsrcreate
SRCS1 = dtsrcreate.c
OBJS1 = dtsrcreate.o
PROGRAM2 = dtsrdbrec
SRCS2 = dtsrdbrec.c
OBJS2 = dtsrdbrec.o
PROGRAM3 = dtsrhan
SRCS3 = dtsrhan.c
OBJS3 = dtsrhan.o
PROGRAM4 = dtsrload
SRCS4 = dtsrload.c
OBJS4 = dtsrload.o
PROGRAM5 = dtsrindex
SRCS5 = dtsrindex.c
OBJS5 = dtsrindex.o
PROGRAM6 = dtsrdelete
SRCS6 = tomita.c
OBJS6 = tomita.o
PROGRAM7 = dtsrclean
SRCS7 = dtsrclean.c
OBJS7 = dtsrclean.o
PROGRAM9 = huffcode
SRCS9 = huffcode.c
OBJS9 = huffcode.o
PROGRAM10 = dtsrkdump
SRCS10 = dtsrkdump.c
OBJS10 = dtsrkdump.o
ComplexProgramTarget_1($(PROGRAM1),$(LOCAL_LIBRARIES), /* */)
ComplexProgramTarget_2($(PROGRAM2),$(LOCAL_LIBRARIES), /* */)
ComplexProgramTarget_3($(PROGRAM3),$(LOCAL_LIBRARIES), /* */)
ComplexProgramTarget_4($(PROGRAM4),$(LOCAL_LIBRARIES) $(BYTE_SWAP_LIB), /* */)
ComplexProgramTarget_5($(PROGRAM5),$(LOCAL_LIBRARIES), /* */)
ComplexProgramTarget_6($(PROGRAM6),$(LOCAL_LIBRARIES), /* */)
ComplexProgramTarget_7($(PROGRAM7),$(LOCAL_LIBRARIES) $(BYTE_SWAP_LIB), /* */)
ComplexProgramTarget_9($(PROGRAM9),$(LOCAL_LIBRARIES), /* */)
ComplexProgramTarget_10($(PROGRAM10),$(LOCAL_LIBRARIES), /* */)

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,803 @@
/*
* COMPONENT_NAME: austext
*
* FUNCTIONS: change_max_wordsize
* change_min_wordsize
* confirm_ok_to_overwrite
* create_new_dbd
* main
* print_usage
* remove_d9x_file
* user_args_processor
*
* ORIGINS: 27
*
*
* (C) COPYRIGHT International Business Machines Corp. 1993,1996
* All Rights Reserved
* Licensed Materials - Property of IBM
* US Government Users Restricted Rights - Use, duplication or
* disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
*/
/************************* DTSRCREATE.C **************************
* $XConsortium: dtsrcreate.c /main/9 1996/09/23 21:02:04 cde-ibm $
* October 1993.
* Program formerly named initausd, in module initausd.c.
* Essentially performs the same function as vista's initdb,
* but uses only the dtsearch.dbd and renames the files during creation.
* Also creates and initializes the first slot, the 'dbrec'.
*
* $Log$
* Revision 2.8 1996/03/25 18:53:33 miker
* Changed FILENAME_MAX to _POSIX_PATH_MAX.
*
* Revision 2.7 1996/02/01 18:16:16 miker
* Changed some arg defaults depending on DTSEARCH definition.
* Deleted BETA definition.
*
* Revision 2.6 1995/12/27 16:10:03 miker
* Permit -wx before -wn on command line.
*
* Revision 2.5 1995/12/07 23:27:51 miker
* Fixed bug: minwordsz was > max when max inited to -1.
* Changed 'Engine Type' to 'Flavor' for AusBuild.
*
* Revision 2.4 1995/10/25 21:09:42 miker
* Added prolog.
*
* Revision 2.3 1995/10/20 21:28:25 miker
* Intelligently look for dtsearch.dbd in 3 places.
*
* Revision 2.2 1995/10/19 21:16:17 miker
* Internally rename database files so it doesn't have to be
* done at open time. Ask permission to overwrite preexisting
* files. Always create databases from current model dtsearch.dbd
* (elminate SECOND CASE). Coincides with libDtvis enhancements.
*
* Revision 2.1 1995/09/22 19:32:18 miker
* Freeze DtSearch 0.1, AusText 2.1.8
*
* Revision 1.2 1995/09/19 21:56:53 miker
* Enabled Japanese language DtSrJPN.
* If DtSearch, use DtSrVERSION instead of AUSAPI_VERSION in banner.
*
* Revision 1.1 1995/08/31 20:50:28 miker
* Initial revision
*/
#include "SearchP.h"
#include <limits.h>
#include <errno.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <locale.h>
#include "vista.h"
#define PROGNAME "DTSRCREATE"
#define DEFAULT_MINWORD (MINWIDTH_TOKEN + 1)
#define STANDARD_MAXWORD (DtSrMAXWIDTH_HWORD - 1)
#define MS_misc 1
#define MS_initausd 12
#define FNAME_MODEL "dtsearch.dbd"
/* The following MUST MATCH vista's dbtype.h! */
#define SIZEOF_FILE_ENTRY 252 /* sizeof(FILE_ENTRY) */
#define DBD_COMPAT_LEN 6
#define START_OF_FT (DBD_COMPAT_LEN + (8 * sizeof(INT)))
/* Values for 'flavor' global variable */
#define AUSTEXT_FLAVOR 'a'
#define DTSEARCH_FLAVOR 'd'
/*------------------ GLOBALS -------------------*/
static int abstrsz = -1;
static char dbname [12] = "<dbname>";
struct or_dbrec dbrec;
static int debug_mode = FALSE;
static char default_cant_open_msg[] =
"%s: %s: %s.\n";
static int fzkeysz = 0;
static int flavor = DTSEARCH_FLAVOR;
static int language = DtSrLaENG;
static int minwordsz = DEFAULT_MINWORD;
static int maxwordsz = INT_MAX;
static int max_ormisc_size;
static int maxwidth_lword;
static int maxwidth_sword;
static char modelpath [_POSIX_PATH_MAX];
/* path/name of model dbd file */
static char newpath [_POSIX_PATH_MAX];
/* path/name for each renamed file */
static char *newextp; /* loc where extension suffixes placed */
static int ok_to_overwrite = FALSE;
static long path_offset = 0;
static int quiet_mode = FALSE;
static char *exttab[] = {
/* Must be in same order as model .dbd file tables */
".d00", ".d01", ".d21", ".d22", ".d23",
".k00", ".k01", ".k21", ".k22", ".k23",
NULL };
/* Same as MS_initausd, 213... */
static char default_unable_to_open_msg[] =
"%1$s Unable to open '%2$s':\n %3$s.\a\n";
/************************************************/
/* */
/* confirm_ok_to_overwrite */
/* */
/************************************************/
/* Called whenever we are about to write a new file.
* Checks to see if file preexists. If it does,
* and user has never said it's ok to overwrite,
* prompts for permission to overlay all preexisting files.
* If 'yes', never asks again. If 'no', exits.
* Returns if ok to overwrite, else exits.
*/
static void confirm_ok_to_overwrite (char *fname)
{
FILE *fptr;
int i;
if (ok_to_overwrite)
return;
if ((fptr = fopen (newpath, "r")) == NULL)
return;
fclose (fptr);
printf ( catgets(dtsearch_catd, MS_initausd, 12,
"\nFile '%s' already exists.\n"
"Is it ok to overwrite it and other database files? [y,n] ") ,
newpath);
i = tolower (getchar());
if (i == 'y')
ok_to_overwrite = TRUE;
else
DtSearchExit (2);
return;
} /* confirm_ok_to_overwrite() */
/************************************************/
/* */
/* change_max_wordsize */
/* */
/************************************************/
/* Subroutine of user_args_processor().
* Adjusts maxwordsz per user request and allowed sizes of schema.
*/
static int change_max_wordsize (char *new_size)
{
int users_newsize;
maxwordsz = users_newsize = atoi (new_size);
/* error if min and max specifications incompatible */
if (minwordsz > maxwordsz) {
printf (catgets (dtsearch_catd, MS_initausd, 5,
PROGNAME" Minimum word size %d greater "
"than maximum word size %d.\n"),
minwordsz, maxwordsz);
return FALSE;
}
/* If necessary, adjust to nearest logical maxwordsz */
if (maxwordsz != maxwidth_sword &&
maxwordsz != maxwidth_lword &&
maxwordsz != DtSrMAXWIDTH_HWORD - 1) {
if (maxwordsz < maxwidth_sword)
maxwordsz = maxwidth_sword;
else if (maxwordsz < maxwidth_lword)
maxwordsz = maxwidth_lword;
else
maxwordsz = DtSrMAXWIDTH_HWORD - 1;
}
if (maxwordsz != users_newsize)
printf (catgets (dtsearch_catd, MS_initausd, 8,
PROGNAME " Adjusted maximum word size to %d.\n"),
maxwordsz);
/* Give user a final warning about large word sizes */
if (maxwordsz > STANDARD_MAXWORD && language != DtSrLaDEU && !quiet_mode)
printf (catgets (dtsearch_catd, MS_initausd, 10,
PROGNAME" Specifying large maximum word sizes may "
"significantly\n increase storage requirements.\n"));
return TRUE;
} /* change_max_wordsize() */
/************************************************/
/* */
/* change_min_wordsize */
/* */
/************************************************/
/* Subroutine of user_args_processor().
* Adjusts minwordsz per user request.
*/
static int change_min_wordsize (char *new_size)
{
int old_minwordsz = minwordsz;
if ((minwordsz = atoi (new_size)) < 0)
return FALSE;
/* error if min and max specifications incompatible */
if (minwordsz > maxwordsz) {
printf (catgets (dtsearch_catd, MS_initausd, 5,
PROGNAME " Minimum word size %d greater than "
"maximum word size %d.\n"),
minwordsz, maxwordsz);
return FALSE;
}
if (!quiet_mode) {
if (minwordsz != old_minwordsz)
printf (catgets (dtsearch_catd, MS_initausd, 6,
PROGNAME " Adjusted minimum word size to %d.\n"),
minwordsz);
/* give user a warning about short word sizes */
if (minwordsz < DEFAULT_MINWORD)
printf (catgets (dtsearch_catd, MS_initausd, 9,
PROGNAME " Specifying small minimum word sizes"
" may require extensive\n"
" editing of stopword file to prevent significantly\n"
" increased index storage requirements.\n"));
}
return TRUE;
} /* change_min_wordsize() */
/************************************************/
/* */
/* print_usage */
/* */
/************************************************/
static void print_usage (void)
{
int i;
printf (catgets (dtsearch_catd, MS_initausd,
3,
"\nUSAGE: %s [-options] dbname\n"
" Creates and initializes DtSearch/AusText database files.\n"
" -q Do not print information messages.\n"
" -o Ok to overwrite preexisting database.\n"
" -a<n> Set maximum abstract size to <N> (default per flavor).\n"
" -d<dir> Dir containing "FNAME_MODEL" file if not in dbname dir.\n"
" -wn<n> Change minimum word size to <N>. Default is %d.\n"
" -wx<n> Change maximum word size to <N>. Default per language.\n"
" ---------- Database Flavor ----------\n"
" -fd DtSearch flavor. No documents, only document references\n"
" in abstracts (default).\n"
" -fa AusText flavor. Documents stored in central server repository.\n"
" ------------ Supported Languages ------------\n"
" -l<n> Set language number to <N>. Default is 0. Supported values:\n"
" 0 English-ASCII\n"
" 1 English-Latin1\n"
" 2 Spanish\n"
" 3 French\n"
" 4 Italian\n"
" 5 German\n"
" 6 Japanese-autoknj\n"
" 7 Japanese-knjlist\n"
" <dbname> Optional path prefix, then 1 - 8 character\n"
" database name. Do not specify 'austext' or 'dtsearch'.\n"),
aa_argv0, DEFAULT_MINWORD);
return;
} /* print_usage() */
/************************************************/
/* */
/* user_args_processor */
/* */
/************************************************/
/* Handles command line arguments for main().
* Initializes global variables.
*/
static void user_args_processor (int argc, char **argv)
{
int i;
int remaining_slot_space;
char *ptr;
/* Initialize variables prior to parsing command line */
newpath[0] = 0;
modelpath[0] = 0;
if (argc < 2) {
print_usage();
DtSearchExit (2);
}
/* Each pass grabs new parm of "-xxx" format */
for (;;) {
argc--;
argv++;
if (argc <= 0)
break;
ptr = argv[0];
if (ptr[0] != '-')
break;
switch (ptr[1]) {
case 'r': /* unadvertised debug mode */
if (strcmp (ptr, "-russell") == 0) {
debug_mode = TRUE;
puts ("001*** debug mode.");
}
else {
BAD_ARG:
print_usage();
printf (catgets (dtsearch_catd, MS_misc, 9,
"%sInvalid command line argument '%s'.\a\n"),
"\n"PROGNAME" ", ptr);
DtSearchExit (2);
}
break;
case 'a':
/* zero length abstract may be explicity specified */
abstrsz = atoi (ptr + 2);
if (abstrsz < 0 || (abstrsz == 0 && ptr[2] != '0'))
goto BAD_ARG;
break;
case 'q':
quiet_mode = TRUE;
break;
case 'o':
ok_to_overwrite = TRUE;
break;
case 'f':
switch (ptr[2]) {
case AUSTEXT_FLAVOR:
case DTSEARCH_FLAVOR:
flavor = ptr[2];
break;
default:
goto BAD_ARG;
}
break;
case 'w': /* change min (-wn..) or max (-wx..) word size */
switch (ptr[2]) {
case 'x':
if (!change_max_wordsize (ptr + 3))
goto BAD_ARG;
break;
case 'n':
if (!change_min_wordsize (ptr + 3))
goto BAD_ARG;
break;
default:
goto BAD_ARG;
}
break;
case 'd': /* special path name for model .dbd */
strncpy (modelpath, ptr + 2, sizeof(modelpath));
modelpath [sizeof(modelpath) - sizeof(FNAME_MODEL) - 4] = 0;
ensure_end_slash (modelpath);
strcat (modelpath, FNAME_MODEL);
break;
case 'l':
/* Note that custom, unsupported languages
* greater than DtSrLaLAST are permitted.
*/
language = atoi (ptr + 2);
if (language < 0)
goto BAD_ARG;
if (!quiet_mode && language > DtSrLaLAST)
printf ( catgets(dtsearch_catd, MS_initausd, 13,
"%s Warning! you have specified "
"an unsupported, custom language.\n"
" You will have to provide your own "
"language loaders at run time\n"
" in user function 'load_custom_language' "
"to access this database.\a\n"),
PROGNAME"444");
break;
default:
printf (catgets (dtsearch_catd, MS_misc, 10,
"%sIgnored unknown command line argument '%s'.\n"),
PROGNAME " ", ptr);
break;
} /* end switch */
} /* end parse of cmd line options beginning with '-' */
/* Only required arg is new database name,
* including optional path prefix.
* Load newpath and newextp, leaving room
* for long dbnames and .xxx extensions.
*/
if (argc <= 0) {
print_usage();
printf (catgets (dtsearch_catd, MS_misc, 18,
"%sDatabase name not specified.\n\a"), "\n"PROGNAME" ");
DtSearchExit(2);
}
strncpy (newpath, argv[0], sizeof (newpath));
newpath [sizeof(newpath) - 12] = 0;
newextp = newpath + strlen (newpath);
/* Get just the 1 - 8 char database name by moving ptr
* backwards until first non-alphanumeric character
* (such as a ":" in the dos drive id or a slash between directories),
* or to the beginning of string.
* Then test database name for validity.
*/
for (ptr = newpath + strlen(newpath) - 1; ptr >= newpath; ptr--)
if (!isalnum (*ptr)) {
ptr++;
break;
}
if (ptr < newpath)
ptr = newpath;
i = strlen (ptr);
if (i < 1 || i > 8) {
BAD_DBNAME:
print_usage();
printf (catgets (dtsearch_catd, MS_misc, 11,
"%sInvalid database name '%s'.\a\n"),
"\n"PROGNAME"346 ", ptr);
DtSearchExit(2);
}
path_offset = ptr - newpath;
strcpy (dbname, ptr); /* save it */
if (strcmp (dbname, "austext") == 0 || strcmp (dbname, "dtsearch") == 0) {
goto BAD_DBNAME;
}
/* Ensure semantic processing specified only for english language */
if (fzkeysz != 0 && language != DtSrLaENG && language != DtSrLaENG2) {
print_usage();
printf ( catgets(dtsearch_catd, MS_initausd, 14,
"\n%s semantic processing is only available "
"for English language databases.\n\a") ,
PROGNAME"340");
DtSearchExit(2);
}
/* Unless overridden by user args,
* initialize abstract based on flavor.
* The abstract size defaults to the remaining
* space in the final misc slot after the fzkey.
* However if the user specified a specific
* abstract size, it may be adjusted later
* to fill up the last slot.
*/
if (abstrsz == -1)
abstrsz = max_ormisc_size - (fzkeysz % max_ormisc_size);
/* Default maxword size is 'short', except for German */
if (maxwordsz == INT_MAX)
maxwordsz = STANDARD_MAXWORD;
if (debug_mode)
printf ("002*** userargs: modelpath='%s' newpath='%s'\n"
" fzkeysz=%d abstrsz=%d\n",
modelpath, newpath, fzkeysz, abstrsz);
return;
} /* user_args_processor() */
/************************************************/
/* */
/* remove_d9x_file */
/* */
/************************************************/
static void remove_d9x_file (char *extension)
{
strcpy (newextp, extension);
if (debug_mode)
printf ("094*** delete '%s'.\n", newpath);
if (remove (newpath) != 0) {
/* 'file not found' is not an error */
if (errno != ENOENT) {
printf (catgets (dtsearch_catd, MS_initausd, 244,
PROGNAME "244 Unable to remove '%s': %s\n"),
newpath, strerror (errno));
DtSearchExit (5);
}
}
return;
} /* remove_d9x_file() */
/************************************************/
/* */
/* create_new_dbd */
/* */
/************************************************/
/* Copies and moves binary contents in passed, preopened
* model .dbd file (f) to new dbd file in target directory.
* Rename the internal .d00, etc filenames to match dbname.
*/
static void create_new_dbd (FILE *f)
{
FILE *g; /* target dbd file */
int i;
static char *nocopy_msg =
"%s Unable to copy '%s' to '%s':\n %s\a\n";
/* (Same as dtsearch.msg: MS_initausd, 214) */
static char zeros[] =
"\0\0\0\0\0\0\0\0\0\0\0\0";
strcpy (newextp, ".dbd");
if (debug_mode)
printf (PROGNAME"507 create_new_dbd '%s'\n", newpath);
/* If new .dbd file preexists, make sure it is writable */
confirm_ok_to_overwrite (newpath);
if (chmod (newpath, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)) {
if (errno != ENOENT) {
printf (catgets (dtsearch_catd, MS_initausd, 214, nocopy_msg),
PROGNAME"515", modelpath, newpath, strerror(errno));
DtSearchExit (15);
}
}
if ((g = fopen (newpath, "w+b")) == NULL) {
printf (catgets (dtsearch_catd, MS_initausd, 214, nocopy_msg),
PROGNAME"509", modelpath, newpath, strerror(errno));
DtSearchExit (4);
}
errno = 0;
while ((i = fgetc (f)) != EOF)
fputc (i, g);
if (errno) {
printf (catgets (dtsearch_catd, MS_initausd, 214, nocopy_msg),
PROGNAME"531", modelpath, newpath, strerror(errno));
DtSearchExit (13);
}
/* Now reposition the write head in the new dbd file
* to rename the filenames. Rename each internal file
* name to '<newdbname>.xxx'.
*/
for (i = 0; exttab[i] != NULL; i++) {
fseek (g, START_OF_FT + (i * SIZEOF_FILE_ENTRY), SEEK_SET);
fprintf (g, "%s%s", dbname, exttab[i]);
fwrite (zeros, sizeof(char), sizeof(zeros), g);
}
/* The new dbd file only has to be readable */
fclose (g);
chmod (newpath, S_IRUSR | S_IRGRP | S_IROTH);
return;
} /* create_new_dbd() */
/************************************************/
/* */
/* main */
/* */
/************************************************/
/* 1. CREATE or find database dictionary (.dbd file).
* 2. CREATE empty 'dtsearch' database files.
* 3. OPEN 'dtsearch' database.
* 4. INITIALIZE the database.
* 5. WRITE dbrec after initializing it.
* 6. RENAME each database file.
* 7. UNLINK (delete) d9x files.
*/
int main (int argc, char *argv[])
{
int i;
char *ptr;
FILE *f;
struct or_miscrec miscrec;
struct or_swordrec swordrec;
struct or_lwordrec lwordrec;
setlocale (LC_ALL, "");
dtsearch_catd = catopen (FNAME_DTSRCAT, 0);
aa_argv0 = argv[0];
max_ormisc_size = sizeof (miscrec.or_misc);
maxwidth_sword = sizeof (swordrec.or_swordkey) - 1;
maxwidth_lword = sizeof (lwordrec.or_lwordkey) - 1;
printf (catgets (dtsearch_catd, MS_misc, 4,
"%s Version %s.\n"),
aa_argv0,
DtSrVERSION
);
/* Handle cmd line args. Init global variables. */
user_args_processor (argc, argv);
/* ------- copy model .dbd to new .dbd ------- */
/* CASE 1: If user specified -d special alternative
* directory for model .dbd, it should be there.
*/
if (modelpath[0] != 0) {
if (debug_mode)
printf (PROGNAME"628 Try opening '%s' (-d dir).\n", modelpath);
if ((f = fopen (modelpath, "rb")) != NULL) {
if (debug_mode)
puts (PROGNAME"638 Found it!");
create_new_dbd (f);
fclose (f);
goto DBD_OKAY;
}
else {
print_usage();
printf (catgets (dtsearch_catd, MS_initausd, 213,
default_unable_to_open_msg),
"\n"PROGNAME"302", modelpath, strerror(errno));
DtSearchExit (4);
}
} /* end CASE 1 */
/* CASE 2: If model .dbd is in current directory, use it.
* If error is anything other than 'cant find file', quit now.
*/
if (debug_mode)
printf (PROGNAME"649 Try opening '%s' (curr dir).\n", FNAME_MODEL);
if ((f = fopen (FNAME_MODEL, "rb")) != NULL) {
if (debug_mode)
puts (PROGNAME"660 Found it!");
create_new_dbd (f);
fclose (f);
goto DBD_OKAY;
}
else if (errno != ENOENT) {
print_usage();
printf (catgets (dtsearch_catd, MS_initausd, 213,
default_unable_to_open_msg),
"\n"PROGNAME"655", FNAME_MODEL, strerror(errno));
DtSearchExit (4);
} /* end else CASE 2 */
/* CASE 3: Last chance. Look for model .dbd in target directory.
* At this point have to quit on any error.
*/
strcpy (modelpath, newpath);
strcpy (modelpath + path_offset, FNAME_MODEL);
if (debug_mode)
printf (PROGNAME"672 Try opening '%s' (new dir).\n", modelpath);
if ((f = fopen (modelpath, "rb")) != NULL) {
if (debug_mode)
puts (PROGNAME"675 Found it!");
create_new_dbd (f);
fclose (f);
goto DBD_OKAY;
}
if (debug_mode)
puts (PROGNAME"682 Never found it!");
print_usage();
printf (catgets (dtsearch_catd, MS_initausd, 213,
default_unable_to_open_msg),
"\n"PROGNAME"686", FNAME_MODEL,
"Not found in either current or target directories. Use -d option\a");
DtSearchExit (4);
DBD_OKAY:
/* Open a new database */
*newextp = 0; /* use no extension when opening database */
if (debug_mode)
printf ("040*** d_open newpath = '%s'.\n", newpath);
d_open (newpath, "o");
if (db_status != S_OKAY) {
printf (catgets (dtsearch_catd, MS_initausd, 230,
PROGNAME "230 Could not open database '%s'.\n"), newpath);
puts (vista_msg (PROGNAME "231"));
DtSearchExit (3);
}
austext_exit_dbms = (void (*) (int)) d_close; /* emerg exit func */
/* initialize the 'dtsearch' database */
if (debug_mode)
printf ("042*** d_initialize.\n");
d_initialize (0);
if (db_status != S_OKAY) {
printf (catgets (dtsearch_catd, MS_initausd, 239,
PROGNAME "239 Could not initialize database '%s'.\n"), newpath);
puts (vista_msg (PROGNAME "240"));
DtSearchExit (3);
}
/* Create and initialize dbrec database header record in first slot.
* First fill entire record with binary zeros.
* Then set specific values as specified by flavor on command line.
* For now most values are hard-coded.
*/
if (debug_mode)
printf ("050*** create dbrec.\n");
memset (&dbrec, 0, sizeof (dbrec));
/* Init fields that are completely independent */
dbrec.or_language = (DtSrINT16) language;
dbrec.or_maxwordsz = (DtSrINT16) maxwordsz;
dbrec.or_minwordsz = (DtSrINT16) minwordsz;
dbrec.or_fzkeysz = (DtSrINT16) fzkeysz;
dbrec.or_abstrsz = (DtSrINT16) abstrsz;
dbrec.or_dbflags = ORD_NONOTES | ORD_NOMARKDEL | ORD_XWORDS;
strncpy (dbrec.or_version, SCHEMA_VERSION, sizeof(dbrec.or_version));
dbrec.or_version [sizeof(dbrec.or_version) - 1] = 0;
/* Load dbrec's recslots fields based on correct number
* of misc recs required to hold user's abstract.
* Round abstrsz upward if there is any space left on last misc rec.
*/
dbrec.or_recslots = 1; /* start with obj rec itself */
for (i = dbrec.or_fzkeysz + dbrec.or_abstrsz; i > 0; i -= max_ormisc_size)
dbrec.or_recslots++;
if (i < 0) {
/* Add in difference to INCREASE abstrsz */
dbrec.or_abstrsz -= i;
printf (catgets (dtsearch_catd, MS_misc, 433,
"%1$sAdjusted maximum abstract size upward to %2$hd.\n"),
PROGNAME "433 ", dbrec.or_abstrsz);
}
/* Init fields that are dependent on language */
switch (language) {
case DtSrLaENG:
case DtSrLaENG2:
dbrec.or_dbflags |= ORD_XSTEMS;
break;
default:
break;
}
/* Init fields that are dependent on flavor */
if (flavor == AUSTEXT_FLAVOR) {
dbrec.or_dbaccess = ORA_BLOB;
dbrec.or_compflags = ORC_COMPBLOB;
dbrec.or_hufid = -1L; /* -1 = use huffman compression, but
* hufid not yet known. */
dbrec.or_dbotype = DtSrObjTEXT;
}
else { /* default flavor == DTSEARCH_FLAVOR */
dbrec.or_dbaccess = ORA_NOTAVAIL;
}
if (!quiet_mode) {
/******putchar ('\n');******/
print_dbrec (newpath, &dbrec);
fflush (stdout);
}
swab_dbrec (&dbrec, HTON);
if (debug_mode)
printf ("060*** fillnew dbrec.\n");
d_fillnew (OR_DBREC, &dbrec, 0);
if (db_status != S_OKAY) {
printf (catgets (dtsearch_catd, MS_initausd, 509,
PROGNAME "509 Could not initialize database header record.\n"));
puts (vista_msg (PROGNAME "510"));
DtSearchExit (3);
}
/* Close the database */
d_close ();
austext_exit_dbms = NULL; /* emerg exit no longer required */
/* Delete all nonvista (inverted index) database files (.d9x) */
remove_d9x_file (".d97");
remove_d9x_file (".d98");
remove_d9x_file (".d99");
*newextp = 0; /* no extension suffixes for next msgs */
printf (catgets (dtsearch_catd, MS_initausd, 24,
PROGNAME " Successfully initialized database '%s'.\n"), newpath);
return 0;
} /* main() */
/************************* DTSRCREATE.C **************************/

View File

@@ -0,0 +1,330 @@
/*
* COMPONENT_NAME: austext
*
* FUNCTIONS: main
* print_dbrec
*
* ORIGINS: 27
*
*
* (C) COPYRIGHT International Business Machines Corp. 1994,1995
* All Rights Reserved
* Licensed Materials - Property of IBM
* US Government Users Restricted Rights - Use, duplication or
* disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
*/
/************************* DTSRDBREC.C **************************
* $XConsortium: dtsrdbrec.c /main/6 1996/08/12 12:18:12 cde-ibm $
* January 1994.
* Prints out data in a dbrec system record in human readable form.
*
* $Log$
* Revision 2.4 1996/02/01 18:18:18 miker
* Added DtSrLaJPN2. Deleted BETA definition.
*
* Revision 2.3 1995/10/25 19:49:12 miker
* Renamed from dbrec.c. Added prolog.c.
*
* Log: dbrec.c,v
* Revision 2.2 1995/10/19 20:31:07 miker
* No longer necessary to rename d00 file.
* Database files may be read-only.
*
* Revision 2.1 1995/09/22 19:34:56 miker
* Freeze DtSearch 0.1, AusText 2.1.8
*
* Revision 1.12 1995/09/19 21:51:43 miker
* ifdef DTSEARCH, use DtSrVERSION instead of AUSAPI_VERSION in banner.
*
* Revision 1.11 1995/08/31 22:24:50 miker
* Added report of or_language and other minor changes for DtSearch.
* DtSearch executable (without semantic report) renamed dtsrdbrec.
*/
#include "SearchP.h"
#define PROGNAME "DTSRDBREC"
#define MS_misc 1
#define MS_dbrec 23
/**#define MAIN_PROGRAM ***/
/************************************************/
/* */
/* language_name */
/* */
/************************************************/
static char *language_name (int language_number)
{
switch (language_number) {
case DtSrLaENG: return "ENGLISH (ASCII)";
case DtSrLaENG2: return "ENGLISH (ISO Latin-1)";
case DtSrLaESP: return "SPANISH";
case DtSrLaFRA: return "FRENCH";
case DtSrLaITA: return "ITALIAN";
case DtSrLaDEU: return "GERMAN";
case DtSrLaJPN: return "JAPANESE (AUTO COMPOUNDS)";
case DtSrLaJPN2: return "JAPANESE (COMPOUNDS FROM LIST)";
default: return "USER DEFINED LANGUAGE";
}
}
/************************************************/
/* */
/* print_dbrec */
/* */
/************************************************/
void print_dbrec (char *dbname, struct or_dbrec * dbrec)
{
int i;
char *cptr;
int blobs_are_possible = FALSE;
printf (catgets (dtsearch_catd, MS_dbrec, 1,
"---------- System Values for Database '%s' ----------\n"),
dbname);
printf (catgets (dtsearch_catd, MS_dbrec, 2,
"Schema version number (version) is '%s'.\n"),
dbrec->or_version);
printf (catgets (dtsearch_catd, MS_dbrec, 3,
"Maximum object key size (sizeof(objkey)) is %ld bytes.\n"),
DtSrMAX_DB_KEYSIZE);
if (ORD_USEHUGEKEYS & dbrec->or_dbflags)
printf (catgets (dtsearch_catd, MS_dbrec, 4,
"Optional 'Huge' keys enabled.\n"));
printf (catgets (dtsearch_catd, MS_dbrec, 12,
"Maximum length of an abstract string (abstrsz) is %d.\n"),
dbrec->or_abstrsz);
if (dbrec->or_abstrsz == 0)
puts (catgets (dtsearch_catd, MS_dbrec, 14,
" (Abstracts are not used in this database)."));
else {
/*
* if they CAN be compressed, say whether or not they
* actually are
*/
if (dbrec->or_hufid != 0L)
printf (catgets (dtsearch_catd, MS_dbrec, 20,
"Abstracts are %scompressed.\n"),
(ORC_COMPABSTR & dbrec->or_compflags) ? "" : "not ");
}
printf (catgets (dtsearch_catd, MS_dbrec, 22,
"Parsing language is number %d, %s.\n"),
dbrec->or_language, language_name(dbrec->or_language));
printf (catgets (dtsearch_catd, MS_dbrec, 24,
"Minimum word length (minwordsz) is %d.\n"),
dbrec->or_minwordsz);
printf (catgets (dtsearch_catd, MS_dbrec, 26,
"Maximum word length (maxwordsz) is %d.\n"),
dbrec->or_maxwordsz);
printf (catgets (dtsearch_catd, MS_dbrec, 30,
"Number of .d00 slots per object (recslots) is %d.\n"),
dbrec->or_recslots);
printf (catgets (dtsearch_catd, MS_dbrec, 36,
" (Maximum number of database objects is %ld).\n"),
0xffffffL / (long) dbrec->or_recslots);
printf (catgets (dtsearch_catd, MS_dbrec, 40,
"Huffman compression table id (hufid) is %ld.\n"),
dbrec->or_hufid);
if (dbrec->or_hufid == 0L)
puts (catgets (dtsearch_catd, MS_dbrec, 42,
" (Compression is disabled in this database)."));
if (dbrec->or_hufid == -1L)
puts (catgets (dtsearch_catd, MS_dbrec, 44,
" (Specific compression table is not yet determined)."));
blobs_are_possible = FALSE;
switch (dbrec->or_dbaccess) {
case ORA_VARIES:
puts (catgets (dtsearch_catd, MS_dbrec, 50,
"Engine accessibility to data may vary from object to object."));
blobs_are_possible = TRUE;
break;
case ORA_NOTAVAIL:
puts (catgets (dtsearch_catd, MS_dbrec, 54,
"Data objects are not directly accessible from the engine."));
break;
case ORA_BLOB:
puts (catgets (dtsearch_catd, MS_dbrec, 56,
"Data objects are stored internally as blobs."));
blobs_are_possible = TRUE;
break;
case ORA_REFBLOB:
puts (catgets (dtsearch_catd, MS_dbrec, 60,
"Only server file references to objects are stored in the blobs."));
break;
case ORA_CREFBLOB:
puts (catgets (dtsearch_catd, MS_dbrec, 64,
"Only client file references to objects are stored in the blobs."));
break;
case ORA_REFKEY:
puts (catgets (dtsearch_catd, MS_dbrec, 68,
"Object keys are server file references to the objects."));
break;
case ORA_CREFKEY:
puts (catgets (dtsearch_catd, MS_dbrec, 72,
"Object keys are client file references to the objects."));
break;
case ORA_REFHUGEKEY:
puts (catgets (dtsearch_catd, MS_dbrec, 74,
"Server file references to objects are "
"stored in the 'huge' keys."));
break;
case ORA_REFABSTR:
puts (catgets (dtsearch_catd, MS_dbrec, 80,
"Server file references to objects are stored in the abstracts."));
break;
case ORA_CREFABSTR:
puts (catgets (dtsearch_catd, MS_dbrec, 86,
"Client file references to objects are stored in the abstracts."));
break;
default:
printf (catgets (dtsearch_catd, MS_dbrec, 90,
"Error: meaning of or_dbaccess value (%hd) is unknown.\n"),
dbrec->or_dbaccess);
blobs_are_possible = TRUE;
break;
} /* end or_dbaccess switch */
if (blobs_are_possible) {
/*
* if they CAN be compressed, say whether or not they
* actually are
*/
if (dbrec->or_hufid != 0L)
printf (catgets (dtsearch_catd, MS_dbrec, 100,
"Repository blobs are %scompressed.\n"),
(ORC_COMPBLOB & dbrec->or_compflags) ? "" : "not ");
}
else
puts (catgets (dtsearch_catd, MS_dbrec, 110,
"Repository blobs are not used in this database."));
printf (catgets (dtsearch_catd, MS_dbrec, 120,
"Database switches (dbflags) are 0x%lx:\n"),
dbrec->or_dbflags);
printf (catgets (dtsearch_catd, MS_dbrec, 130,
" Inverted index %s words exactly as parsed.\n"),
(ORD_XWORDS & dbrec->or_dbflags) ?
catgets (dtsearch_catd, MS_dbrec, 124, "INCLUDES") :
catgets (dtsearch_catd, MS_dbrec, 125, "EXCLUDES"));
printf (catgets (dtsearch_catd, MS_dbrec, 140,
" Inverted index %s word stems.\n"),
(ORD_XSTEMS & dbrec->or_dbflags) ?
catgets (dtsearch_catd, MS_dbrec, 124, "INCLUDES") :
catgets (dtsearch_catd, MS_dbrec, 125, "EXCLUDES"));
printf (catgets (dtsearch_catd, MS_dbrec, 160,
" Use of optional 'huge' keys is %s.\n"),
(ORD_USEHUGEKEYS & dbrec->or_dbflags) ?
catgets (dtsearch_catd, MS_dbrec, 126, "ENABLED") :
catgets (dtsearch_catd, MS_dbrec, 127, "DISABLED"));
printf (catgets (dtsearch_catd, MS_dbrec, 162,
" Mark-for-deletion is %s.\n"),
(ORD_NOMARKDEL & dbrec->or_dbflags) ?
catgets (dtsearch_catd, MS_dbrec, 127, "DISABLED") :
catgets (dtsearch_catd, MS_dbrec, 126, "ENABLED"));
printf (catgets (dtsearch_catd, MS_dbrec, 164,
" Appendable user notes are %s.\n"),
(ORD_NONOTES & dbrec->or_dbflags) ?
catgets (dtsearch_catd, MS_dbrec, 127, "DISABLED") :
catgets (dtsearch_catd, MS_dbrec, 126, "ENABLED"));
printf (catgets (dtsearch_catd, MS_dbrec, 170,
" Text characters are %s wide.\n"),
(ORD_WIDECHAR & dbrec->or_dbflags) ?
catgets (dtsearch_catd, MS_dbrec, 172, "MULTIPLE bytes") :
catgets (dtsearch_catd, MS_dbrec, 174, "a SINGLE byte"));
printf (catgets (dtsearch_catd, MS_dbrec, 200,
"Current number of database objects (reccount) is %ld.\n"),
dbrec->or_reccount);
printf (catgets (dtsearch_catd, MS_dbrec, 210,
"Last currently used slot number (maxdba) is %ld.\n"),
dbrec->or_maxdba);
for (i = 58; i > 0; i--)
putchar ('-');
putchar ('\n');
return;
} /* print_dbrec() */
#ifdef MAIN_PROGRAM
#include <locale.h>
#include <fcntl.h>
#include "vista.h"
/************************************************/
/* */
/* main */
/* */
/************************************************/
int main (int argc, char *argv[])
{
struct or_dbrec dbrec;
char renamebuf[256];
time_t now;
aa_argv0 = argv[0];
setlocale (LC_ALL, "");
dtsearch_catd = catopen (FNAME_DTSRCAT, 0);
austools_catd = catopen (FNAME_AUSCAT, 0);
time (&now);
strftime (renamebuf, sizeof (renamebuf),
catgets (dtsearch_catd, MS_misc, 22, "%A, %b %d %Y, %I:%M %p"),
localtime (&now));
printf (catgets (dtsearch_catd, MS_misc, 23,
"%s: Version %s. Run %s.\n"),
aa_argv0,
DtSrVERSION,
renamebuf);
if (argc < 2) {
printf (catgets (dtsearch_catd, MS_dbrec, 310,
"USAGE: %s <dbname>\n"), aa_argv0);
return 2;
}
sprintf (renamebuf, "%s.d00", argv[1]);
db_oflag = O_RDONLY; /* db files may be read-only */
d_open (argv[1], "o");
if (db_status != S_OKAY) {
printf (catgets (dtsearch_catd, MS_dbrec, 330,
"Could not open '%s' database.\n%s\n"),
argv[1], vista_msg(PROGNAME"293"));
return 3;
}
d_recfrst (OR_DBREC, 0);
if (db_status != S_OKAY) {
printf (catgets (dtsearch_catd, MS_dbrec, 340,
"No dbrec record in database '%s'.\n"),
argv[1]);
return 4;
}
d_recread (&dbrec, 0);
if (db_status != S_OKAY) {
printf (catgets (dtsearch_catd, MS_dbrec, 350,
"Can't read dbrec record in database '%s'.\n%s\n"),
argv[1], vista_msg(PROGNAME"306"));
return 5;
}
swab_dbrec (&dbrec, NTOH);
print_dbrec (argv[1], &dbrec);
return 0;
} /* main() */
#endif
/************************* DTSRDBREC.C **************************/

2401
cde/programs/dtsr/dtsrhan.c Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,517 @@
/* $XConsortium: dtsrkdump.c /main/3 1996/09/23 21:03:37 cde-ibm $
*
* (c) Copyright 1996 Digital Equipment Corporation.
* (c) Copyright 1996 Hewlett-Packard Company.
* (c) Copyright 1996 International Business Machines Corp.
* (c) Copyright 1996 Sun Microsystems, Inc.
* (c) Copyright 1996 Novell, Inc.
* (c) Copyright 1996 FUJITSU LIMITED.
* (c) Copyright 1996 Hitachi.
*/
/*
* COMPONENT_NAME: austext
*
* FUNCTIONS: count_words
* main
*
* ORIGINS: 27
*
*
* (C) COPYRIGHT International Business Machines Corp. 1994,1996
* All Rights Reserved
* Licensed Materials - Property of IBM
* US Government Users Restricted Rights - Use, duplication or
* disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
*/
/*********************** DTSRKDUMP.C *************************
* $Id: dtsrkdump.c /main/3 1996/09/23 21:03:37 cde-ibm $
* April 1994.
* Dumps a DtSearch/AusText keyfile to stdout.
* Renamed from auskdump for DtSearch.
*
* $Log$
* Revision 2.3 1996/04/10 21:19:28 miker
* Program renamed from auskdump with minor cleanup.
*
*
* *** Log: auskdump.c,v ***
* Revision 2.2 1995/10/19 20:29:37 miker
* Permit accessing of read-only databases.
* Revision 2.1 1995/09/22 18:55:59 miker
* Freeze DtSearch 0.1, AusText 2.1.8
* Revision 1.11 1995/09/19 21:47:26 miker
* Added explanation of '*' in report.
* Revision 1.10 1995/09/06 14:18:33 miker
* Fixed bug: -p value incorrectly converted to double because
* atof() function prototype was not provided from stdlib.h.
* Revision 1.9 1995/09/01 23:58:57 miker
* Minor name changes for DtSearch.
* Print err msgs when databases fail to open.
* Revision 1.8 1995/05/30 18:40:12 miker
* Print progress dots and some additional dbrec info.
*/
#include "SearchP.h"
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <fcntl.h>
#include <locale.h>
#include "vista.h"
#define PROGNAME "DTSRKDUMP"
#define MIN_THRESHOLD 100L
#define KEYS_PER_DOT 1000
#define MS_dtsrkdump 25
/*----------------- GLOBALS -------------------*/
char buf[2048];
static long *counters = NULL; /* allocated array */
static int do_verbose = FALSE;
static DB_ADDR dba;
static long min_threshold = MIN_THRESHOLD;
static long maxdba = 0L;
static struct or_dbrec
dbrec;
/****************************************/
/* */
/* count_words */
/* */
/****************************************/
void count_words (int index)
{
long vista_field;
UCHAR *ptr;
DtSrINT32 offset, free, addrs;
int tabstop;
long keycount = 0;
int dotcount = 0;
if (index == 0)
vista_field = OR_SWORDKEY;
else if (index == 2)
vista_field = OR_LWORDKEY;
else if (index == 4)
vista_field = OR_HWORDKEY;
else {
printf (catgets (dtsearch_catd, MS_dtsrkdump, 1,
"%s Program Error Abort.\a\n"),
PROGNAME"030");
DtSearchExit (4);
}
KEYFRST (PROGNAME"36", vista_field, 0);
while (db_status == S_OKAY) {
KEYREAD (PROGNAME"48", buf);
if (buf[0] == STEM_CH)
(counters[index])++;
else
(counters[index + 1])++;
if (do_verbose) {
CRGET (PROGNAME"58", &dba, 0);
switch (index) {
case 0:
CRREAD (PROGNAME"66", OR_SWOFFSET, &offset, 0);
CRREAD (PROGNAME"67", OR_SWFREE, &free, 0);
CRREAD (PROGNAME"68", OR_SWADDRS, &addrs, 0);
break;
case 2:
CRREAD (PROGNAME"76", OR_LWOFFSET, &offset, 0);
CRREAD (PROGNAME"77", OR_LWFREE, &free, 0);
CRREAD (PROGNAME"78", OR_LWADDRS, &addrs, 0);
break;
case 4:
CRREAD (PROGNAME"86", OR_HWOFFSET, &offset, 0);
CRREAD (PROGNAME"87", OR_HWFREE, &free, 0);
CRREAD (PROGNAME"88", OR_HWADDRS, &addrs, 0);
break;
}
NTOHL (offset);
NTOHL (free);
NTOHL (addrs);
if (addrs >= min_threshold) {
printf (" \"");
tabstop = 0;
for (ptr = (UCHAR *) buf; *ptr != 0; ptr++) {
putchar ((*ptr >= 32) ? *ptr : '~');
tabstop++;
}
printf ("\" ");
while (tabstop++ < 22)
putchar (' ');
printf (catgets(dtsearch_catd, MS_dtsrkdump, 2,
"%c dba=%d:%-7ld ofs=%-9ld adr=%-6ld fre=%ld\n"),
(addrs >= dbrec.or_reccount) ? '*' : ' ',
dba >> 24, dba & 0xffffff, offset, addrs, free);
}
} /* end verbose */
else { /* !verbose */
if (++keycount % KEYS_PER_DOT == 0) {
putchar ('.');
if (++dotcount % 10 == 0)
putchar (' ');
if (dotcount % 50 == 0) {
putchar ('\n');
dotcount = 0;
}
fflush (stdout);
}
} /* end !verbose dot printing */
KEYNEXT (PROGNAME"98", vista_field, 0);
} /* end object key read loop */
if (dotcount)
putchar ('\n');
return;
} /* count_words() */
/****************************************/
/* */
/* main */
/* */
/****************************************/
int main (int argc, char *argv[])
{
int i;
int oops;
int dotcount;
long keycount;
long total;
char *ptr;
int do_objkeys = FALSE;
int do_wordkeys = FALSE;
char dbpath[2048];
char rcs_revision [8];
char dbname[12];
time_t now;
double percent = 0.0;
int listing_most_words = FALSE;
static char *word_labels[6] =
{
"Short Stems = %8ld\n", "Short Words = %8ld\n",
"Long Stems = %8ld\n", "Long Words = %8ld\n",
"Huge Stems = %8ld\n", "Huge Words = %8ld\n"
};
aa_argv0 = argv[0];
time (&now);
sscanf ("$Revision: /main/3 $", "%*s %s", rcs_revision);
setlocale (LC_ALL, "");
dtsearch_catd = catopen (FNAME_DTSRCAT, 0);
strftime (buf, sizeof (buf), "%m/%d/%Y, %I:%M %p",
localtime (&now));
printf (catgets(dtsearch_catd, MS_dtsrkdump, 3,
"%s %s, engine %s. %s.\n"),
aa_argv0, rcs_revision, AUSAPI_VERSION, buf);
if (argc <= 1) {
PRINT_USAGE:
printf (catgets(dtsearch_catd, MS_dtsrkdump, 4,
"\nUSAGE: %s -o|w|ow [-v] [-t<N> | -p<N>] dbname\n"
" Reads DtSearch key files and prints summary report.\n"
" -o Keys examined are OBJECT record keys.\n"
" -w Keys examined are inverted index WORDS.\n"
" -v VERBOSE mode, lists every key.\n"
" -t<N> Threshold. Sets w and v options, and lists only words\n"
" with >= <N> addresses. All words will be listed if <N> = 1.\n"
" -p<N> Another threshold. Same as -t except <N> is percent\n"
" of the entire database (<N> may include a decimal point).\n"
" For example -p99.9 prints out every word that occurs\n"
" in 99.9%% or more of the records--an excellent way to find\n"
" candidates for the stop list.\n"
" If w and v are set without threshold, default is -t%d.\n"
" <dbname> 1 - 8 character database name with optional path prefix.\n")
,aa_argv0
,MIN_THRESHOLD
);
DtSearchExit (2);
}
/* parse options */
else { /* argc >= 2 */
for (;;) {
/* each pass grabs new token with "-xxx" format */
--argc;
++argv;
if (argc <= 0)
break; /* no more tokens of any kind */
ptr = argv[0];
if (*ptr != '-')
break; /* no more option tokens */
/* examine each char in this -xxx token */
while (*(++ptr) != 0) {
switch (*ptr) {
case 'o':
do_objkeys = TRUE;
break;
case 'w':
do_wordkeys = TRUE;
break;
case 'v':
do_verbose = TRUE;
break;
case 'p':
do_verbose = TRUE;
do_wordkeys = TRUE;
percent = atof (ptr + 1);
if (percent <= 0.0 || percent > 100.0) {
fprintf (stderr,
catgets (dtsearch_catd, MS_dtsrkdump, 5,
"%s Invalid percent value %lf.\a\n"),
PROGNAME"195", percent);
goto PRINT_USAGE;
}
ptr[1] = 0; /* terminate parse */
break;
case 't':
do_verbose = TRUE;
do_wordkeys = TRUE;
if ((min_threshold = atol (ptr + 1)) <= 0L) {
fprintf (stderr,
catgets (dtsearch_catd, MS_dtsrkdump, 53,
"%s Invalid threshold value.\a\n"),
PROGNAME"198");
goto PRINT_USAGE;
}
ptr[1] = 0; /* terminate parse */
break;
default:
fprintf (stderr,
catgets (dtsearch_catd, MS_dtsrkdump, 55,
"%s Unknown command line argument '%c'.\a\n"),
PROGNAME"278", *ptr);
goto PRINT_USAGE;
} /* end switch */
} /* end while-loop for each char of -xxx token */
} /* end for-loop for each -xxx token */
} /* end of options parse altogether */
oops = FALSE;
if (argc <= 0) {
printf (catgets (dtsearch_catd, MS_dtsrkdump, 56,
"%s Missing required database name.\a\n"),
PROGNAME"267");
oops = TRUE;
}
if (!do_wordkeys && !do_objkeys) {
printf (catgets (dtsearch_catd, MS_dtsrkdump, 57,
"%s Either -o or -w must be specified.\a\n"),
PROGNAME"271");
oops = TRUE;
}
if (oops)
goto PRINT_USAGE;
/* Database name may have a long path prefix.
* If so, we need to segregate the two.
* Set 'ptr' to just the 8 char dictionary name by moving
* it backwards until first non-alphanumeric character
* (such as a ":" in the dos drive id or a slash between directories),
* or to the beginning of string.
*/
strncpy (dbpath, argv[0], sizeof (dbpath));
dbpath[sizeof (dbpath) - 1] = 0;
for (ptr = dbpath + strlen (dbpath) - 1; ptr >= dbpath; ptr--)
if (!isalnum (*ptr)) {
ptr++;
break;
}
if (ptr < dbpath)
ptr = dbpath;
/* test for valid database name */
i = strlen (ptr);
if (i < 1 || i > 8) {
fprintf (stderr, catgets (dtsearch_catd, MS_dtsrkdump, 58,
"%s Invalid database name '%s'.\a\n"),
PROGNAME"297", ptr);
goto PRINT_USAGE;
}
strcpy (dbname, ptr);
*ptr = 0; /* truncate dbname off of full path/dbname */
/* Open database in read-only mode. */
db_oflag = O_RDONLY;
if (!austext_dopen (dbname, dbpath, NULL, 0, &dbrec)) {
fprintf (stderr, "%s\n", DtSearchGetMessages());
DtSearchExit (3);
}
maxdba = dbrec.or_maxdba;
printf (catgets(dtsearch_catd, MS_dtsrkdump, 60,
"%s: '%s' reccount=%ld maxdba=%ld recslots=%hd minw=%hd maxw=%hd\n"),
aa_argv0, dbname, dbrec.or_reccount,
dbrec.or_maxdba, dbrec.or_recslots,
dbrec.or_minwordsz, dbrec.or_maxwordsz);
/* Adjust threshold if necessary */
if (percent > 0.0)
min_threshold = (long)
((float) percent * (float) dbrec.or_reccount / 100.0);
if (min_threshold > dbrec.or_reccount)
min_threshold = dbrec.or_reccount;
if (do_wordkeys && do_verbose) {
if (min_threshold > 1 && min_threshold < dbrec.or_reccount) {
printf (catgets(dtsearch_catd, MS_dtsrkdump, 70,
"%s Will only list words occurring "
"in %ld or more records.\n"),
aa_argv0, min_threshold);
listing_most_words =
(float) min_threshold / (float) dbrec.or_reccount > .90;
}
else {
printf (catgets(dtsearch_catd, MS_dtsrkdump, 80,
"%s: Listing all words in database.\n"),
aa_argv0);
listing_most_words = TRUE;
}
}
if (do_objkeys) {
/*
* Allocate and initialize an array of keytype counters, one for
* each possible ascii keytype char (256).
*/
counters = austext_malloc (258 * sizeof(long), PROGNAME"113", NULL);
memset (counters, 0, 258 * sizeof(long));
dotcount = 0;
keycount = 0;
KEYFRST (PROGNAME"111", OR_OBJKEY, 0);
while (db_status == S_OKAY) {
KEYREAD (PROGNAME"288", buf);
(counters[buf[0]])++;
CRGET (PROGNAME"251", &dba, 0);
if (maxdba < (dba & 0xffffff))
maxdba = dba;
if (do_verbose) {
/* Mark control and nonascii chars with a period. */
i = 0;
putchar ('\"');
for (ptr = buf; *ptr != 0; ptr++) {
if (*ptr < 32 | *ptr >= 127) {
putchar ('.');
i++;
}
else {
putchar (*ptr);
i++;
}
}
printf ("\" ");
while (i++ < DtSrMAX_DB_KEYSIZE)
putchar (' ');
printf (catgets(dtsearch_catd, MS_dtsrkdump, 100,
"dba x%08lx, %6ld\n"), dba, dba);
} /* end verbose */
else { /* !verbose */
if (++keycount % KEYS_PER_DOT == 0) {
putchar ('.');
if (++dotcount % 10 == 0)
putchar (' ');
if (dotcount % 50 == 0) {
putchar ('\n');
dotcount = 0;
}
fflush (stdout);
}
} /* end !verbose dot printing */
KEYNEXT (PROGNAME"291", OR_OBJKEY, 0);
} /* end object key read loop */
/* Print objkey summary report */
if (dotcount)
putchar ('\n');
if (dbpath[0] == 0)
buf[0] = 0;
else
sprintf (buf, catgets(dtsearch_catd, MS_dtsrkdump, 110,
" in %s"), dbpath);
printf (catgets(dtsearch_catd, MS_dtsrkdump, 120,
"Object Summary for '%s'%s:\n"), dbname, buf);
puts (catgets(dtsearch_catd, MS_dtsrkdump, 130,
"Object Count by Keytypes:"));
total = 0L;
for (i = 0; i < 256; i++) {
if (counters[i] > 0L) {
total += counters[i];
if (i > 32 && i < 127)
printf (" '%c' %6ld\n", i, counters[i]);
else
printf (" x%02x %6ld\n", i, counters[i]);
}
}
printf (catgets(dtsearch_catd, MS_dtsrkdump, 160,
"TOTAL Objects Count = %ld\n"), total);
printf (catgets(dtsearch_catd, MS_dtsrkdump, 170,
"Largest Object DBA = %ld\n"), maxdba);
free (counters);
} /* end do_objkeys */
if (do_wordkeys) {
if (listing_most_words)
printf (catgets(dtsearch_catd, MS_dtsrkdump, 180,
"%s: * Words marked with asterisk occur in every record.\n"),
aa_argv0);
/*
* Allocate and initialize word and stem counters. First is for
* short stems (those beginning with STEM_CH), next is for short
* words (everything else). Next are for long stems, long words,
* huge stems, and huge words (6 in all).
*/
counters = austext_malloc (8 * sizeof (long), PROGNAME"113", NULL);
memset (counters, 0, 6 * sizeof(long));
count_words (0); /* short */
count_words (2); /* long */
count_words (4); /* huge */
/* print wordkey summary report */
if (do_objkeys)
putchar ('\n'); /* separate from last report */
if (dbpath[0] == 0)
buf[0] = 0;
else
sprintf (buf, catgets(dtsearch_catd, MS_dtsrkdump, 110,
" in %s"), dbpath);
printf (catgets(dtsearch_catd, MS_dtsrkdump, 200,
"Words Summary for '%s'%s:\n"), dbname, buf);
total = 0L;
for (i = 0; i < 6; i++) {
printf (word_labels[i], counters[i]);
total += counters[i];
}
printf (catgets(dtsearch_catd, MS_dtsrkdump, 210,
"TOTAL Words Count = %ld\n"), total);
free (counters);
} /* end do_wordkeys */
DtSearchExit (0);
} /* main() */
/*********************** DTSRKDUMP.C *************************/

1267
cde/programs/dtsr/dtsrload.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,784 @@
/*
* COMPONENT_NAME: austext
*
* FUNCTIONS: build_tree
* char_label
* huffman_code
* init_treebase
* main
* next_sorted_node
* print_usage
* strrev
* user_args_processor
*
* ORIGINS: 27
*
*
* (C) COPYRIGHT International Business Machines Corp. 1990,1996
* All Rights Reserved
* Licensed Materials - Property of IBM
* US Government Users Restricted Rights - Use, duplication or
* disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
*/
/************************** HUFFCODE.C ******************************
* $XConsortium: huffcode.c /main/9 1996/11/14 15:31:05 rcs $
* 12/90.
* Counts frequency of occurrance of every possible byte value of input text.
* Creates Huffman Code Table based on byte frequencies and writes it
* in 2 formats to 2 different output files.
* The encode table (.huf) maintains the frequency counts and explicitly
* includes the huffman code strings. Generally speaking, the .huf file
* is intended for humans to read. The decode table (.c) is an array
* of integers meant to be compiled into an object module, then linked
* into the decode program. The .c format closely resembles the original
* huffman code tree in this program.
* By keeping the tree as an obscure array of integers,
* the huffman code can double as an encryption technique,
* and the decoding method kept somewhat proprietary.
*
* For a good discussion of Huffman codes and related algorithms,
* see "Data Compression Techniques and Applications Hardware and
* Software Considerations" by Gilbert Held and Thomas R. Marshall.
* The tree itself is balanced to minimize longest bitstring length
* per Eugene Schwartz, Information and Control 7, 37-44 (1964).
*
* At beginning of each new execution, the program tries to
* open the .huf table file and continue byte frequency counting
* from the last run.
* If the .huf file doesn't exist, the table's counts are
* initialized to zeroes. The .c decode table is recomputed fresh
* each run, whether it existed before or not.
*
* If the input file is not specified then the frequencies in the table
* are not changed, and the huffman codes are recomputed with the
* existing frequencies.
*
* THIS PROGRAM DOES NOT CHECK .HUF FILE FORMAT!--it had better be correct.
*
* HUFFMAN ENCODE TABLE (.huf) FILE FORMAT:
* Each line represents each possible byte value (0 - 255),
* the huffman 'literal' character (#256), or comments.
* There are exactly 257 lines sorted by decreasing count.
* There are four fields, each separated by one or more tabs (\t).
*
* 1. CHARACTER. a number from 0 to 256.
*
* The 'character' represented by the number 256 is the literal.
* it represents all characters whose frequency is so low that
* there is no huffman code translation--this reduces the max
* length of the coded bit string when there are lots of zero
* or low frequency bytes in the input. For example,
* pure ascii text files only occasionally have byte values
* less than 32 (control chars) and rarely greater than 127
* (high order bit turned on).
*
* 2. HUFFMAN CODE. a string of binary digits (0's and 1's).
* Each string is unique to that character.
* This field will consist of a single blank, when the character
* will be coded by the huffman literal. If the code of
* the literal itself is blank, then literal coding is
* not used in this table--all characters are represented
* by complete huffman code strings.
*
* 3. COUNT. The number of times this character appeared in the text.
* The literal's count equals the sum of the counts of all the
* real characters which are represented by the literal.
* The literal's count may be 0 if all the characters it
* represents have zero frequencies.
*
* 4. COMMENTS. A label depicting the printable char or its description.
*
* HUFFMAN DECODE TABLE (.c) FILE FORMAT:
* A sequence of integers formatted as a C array of integer pairs
* and intended to be compiled and linked into the decode program.
* Each huffman tree node contains two integers.
* The root of the tree is the LAST integer pair.
* The first (left) integer contains the array index down the '0' branch,
* the right integer points down the '1' branch.
* However if an integer is negative, the decoding ceases and the
* resulting plaintext character is the negative integer + 257,
* and will always be in the range 0 - 255, or 256 for the literal code.
*
* $Log$
* Revision 2.3 1996/03/25 18:55:04 miker
* Changed FILENAME_MAX to _POSIX_PATH_MAX.
*
* Revision 2.2 1995/10/25 17:50:34 miker
* Added prolog.
*
* Revision 2.1 1995/09/22 20:46:28 miker
* Freeze DtSearch 0.1, AusText 2.1.8
*
* Revision 1.4 1995/09/19 22:04:11 miker
* Print out nonascii chars in .huf file comments.
*
* Revision 1.3 1995/09/05 18:08:00 miker
* Name changes for DtSearch.
*/
#include "SearchP.h"
#include <limits.h>
#include <ctype.h>
#include <errno.h>
#include <locale.h>
#include <sys/stat.h>
#define MS_huff 30 /* message catalog set number */
#define DELIMITERS "\t\n" /* betw fields in .huf file */
#define LAST_BIT '-'
#define MAX_BITLEN 24
#define MAX_NODES 514
/*
* 256 chars + 'literal' char = max leaves, therefore max treesize
* = 2n - 1 = 513
*/
/*----------------------- HCTREE --------------------------*/
/* tree is also a table so tree ptrs are table indexes:
* 0 - 255 = characters themselves (leaves at base of tree).
* 256 = literal code (special char/leaf).
* > 256 = higher nodes.
* Global 'last_node' = highest actual node alloc so far.
* When tree completed, last_node = root of tree.
* -1 = null links.
*/
typedef struct {
char bit; /* '0' or '1' (assoc with link to
* father) */
long count; /* freq of occurrance of char */
int sort; /* specifies output sort order */
int father; /* index points UP toward root of
* tree */
int son0; /* index of '0' (left) subnode */
int son1; /* index of '1' (right) subnode */
} HCTREE;
/*------------------------ GLOBALS ---------------------------*/
static int last_node = 256;
long total_count;
long literal_threshold = 0L;
int debug_switch = FALSE;
int literal_coding_on = TRUE;
int input_file_specified; /* TRUE if user enters
* filename */
int no_huffcode_file; /* TRUE if table file not
* found */
HCTREE hctree1[MAX_NODES];
char filename_input[_POSIX_PATH_MAX];
char filename_huf[_POSIX_PATH_MAX];
char filename_huc[_POSIX_PATH_MAX];
#ifndef TURBO_COMPILER
/****************************************/
/* */
/* strrev */
/* */
/****************************************/
static char *strrev (char *string)
{
int i;
int j;
char temp;
for (i = 0, j = strlen (string) - 1; i < j; i++, j--) {
temp = string[i];
string[i] = string[j];
string[j] = temp;
}
return string;
}
#endif /* !TURBO_COMPILER */
/****************************************/
/* */
/* Build Tree */
/* */
/****************************************/
/* Each call joins the two nodes with smallest count
* into a single higher level node. If there are more than 2 nodes with
* similar 'smallest' counts, then within that group the 2 nodes with the
* shortest current bitstring length are joined.
* Returns TRUE for each successful lower level join.
* Returns FALSE when final join is made at highest level (root).
*/
static int build_tree (void)
{
int i, j;
int low0 = -1;
int low1 = -1;
int len0 = 0;
int len1 = 0;
int curr;
/* find 2 lowest counts */
for (i = 0; i < 257; i++) {
/* skip over real chars with counts <= 'literal' threshold */
if (literal_coding_on
&& i != 256
&& hctree1[i].count <= literal_threshold) {
hctree1[i].sort = MAX_BITLEN + 1;
continue;
}
/* skip over literal if literal coding turned off */
if (i == 256 && !literal_coding_on) {
hctree1[256].sort = MAX_BITLEN + 1;
continue;
}
/*
* Ascend to highest tree level for current table entry,
* putting length of bitstring into sort field. Save
* highest tree level in curr.
*/
hctree1[i].sort = 0;
for (j = i; j != -1; j = hctree1[j].father) {
hctree1[i].sort++;
curr = j;
}
/*
* sanity checks after ascending tree: 1. if bit strings
* have grown too large, quit. 2. if curr points to top
* tree level, quit.
*/
if (hctree1[i].sort > MAX_BITLEN) {
fprintf (stderr, catgets(dtsearch_catd, MS_huff, 30,
"\n183 Bit strings have grown too large. You probably "
"have literals\n turned off with grossly unbalanced "
"character counts.\n\7"));
exit (2);
}
if (hctree1[curr].count >= total_count) {
fprintf (stderr, catgets(dtsearch_catd, MS_huff, 31,
"\n191 Programming Error: Still trying to build\n"
" Huffman Code Tree after root created.\n\7"));
exit (2);
}
/*
* if curr ptr already joins low0 or low1, try the next
* table entry
*/
if (curr == low0 || curr == low1)
continue;
/*
* If curr count is less than low0, or if curr count = low0
* but curr bitstring length is less, replace both low0 and
* low1. (that way, we keep low0 always <= low1)
*/
if (low0 == -1 || hctree1[curr].count < hctree1[low0].count ||
(hctree1[curr].count == hctree1[low0].count && hctree1[i].sort < len0)) {
low1 = low0;
len1 = len0;
low0 = curr;
len0 = hctree1[i].sort;
continue;
}
/*
* At this point curr count is 'greater' than low0. If curr
* count is less than low1, or if curr count = low1 but
* curr bitstring length is less, replace only low1
*/
if (low1 == -1 || hctree1[curr].count < hctree1[low1].count ||
(hctree1[curr].count == hctree1[low1].count && hctree1[i].sort < len1)) {
low1 = curr;
len1 = hctree1[i].sort;
continue;
}
/*
* default: curr count is greater than BOTH low0 and low1,
* try next table entry
*/
} /* end loop to find two lowest counts */
/* low0 and low1 now point to two lowest count nodes.
* link in low0 and low1 to next available new node.
*/
last_node++;
hctree1[low0].bit = '0';
hctree1[low0].father = last_node;
hctree1[low1].bit = '1';
hctree1[low1].father = last_node;
hctree1[last_node].bit = LAST_BIT;
hctree1[last_node].father = -1;
hctree1[last_node].count = hctree1[low0].count + hctree1[low1].count;
hctree1[last_node].son0 = low0;
hctree1[last_node].son1 = low1;
if (debug_switch)
printf ("%3d: low0=%6ld\tlow1=%6ld\tsum=%6ld\t(%ld)\n",
last_node, hctree1[low0].count, hctree1[low1].count,
hctree1[last_node].count, total_count);
if (hctree1[last_node].count < total_count)
return TRUE;
else
return FALSE;
} /* end of function build_tree */
/****************************************/
/* */
/* Char Label */
/* */
/****************************************/
static char *char_label (int x)
{
static char buf[64];
switch (x) {
case 0:
return "NULL";
case 8:
return "\\b (backspace)";
case 9:
return "\\t (tab)";
case 10:
return "\\n (linefeed)";
case 11:
return "\\v (vert tab)";
case 12:
return "\\f (form feed)";
case 13:
return "\\r (carr retn)";
case 26:
return "CTRL-Z (EOF)";
case 27:
return "CTRL-[ (ESC)";
case 31:
return "CTRL-dash";
case 32:
return "SPACE (blank)";
case 45:
return "- (dash)";
case 95:
return "_ (underscore)";
case 127:
return "DEL";
case 256:
return "*** LITERAL CODE ***";
default:
if (x > 256)
return "";
else if (x < 32) {
sprintf (buf, "'CTRL-%c'", 0x40 | x);
return buf;
}
else if (x >= 128) {
strcpy (buf, catgets(dtsearch_catd, MS_huff, 32,
"(nonascii char, high bit set)"));
return buf;
}
else {
sprintf (buf, "'%c'", x);
return buf;
}
}
} /* end of function char_label */
/****************************************/
/* */
/* Next Sorted Node */
/* */
/****************************************/
/* Called repeatedly, returns the next treebase node in sorted order.
* Sort order is by length of Huffman Code String.
* Caller must pass index of last node returned (neg at first call).
* Lasti should never be larger than treebase.
*/
static int next_sorted_node (int lasti)
{
int i;
int nexti = -1;
long nextsortval = MAX_BITLEN + 2;
/* permanently mark last returned node as unavailable */
if (lasti >= 0)
hctree1[lasti].sort = MAX_BITLEN + 2;
/* find next shortest string length */
for (i = 0; i < 257; i++)
if (hctree1[i].sort < nextsortval) {
nextsortval = hctree1[i].sort;
nexti = i;
}
return nexti;
} /* end of function next_sorted_node */
/****************************************/
/* */
/* Initialize Treebase */
/* */
/****************************************/
/* 'Treebase' is original 257 character nodes (including literal code).
* If huffcode table file exists, initializes treebase with its values,
* else initializes treebase with zero counts.
*/
static void init_treebase (void)
{
int i;
FILE *instream_huf;
char filebuf[128];
total_count = 0L;
/* .huf table file does not exist--zero all counts */
if ((instream_huf = fopen (filename_huf, "r")) == NULL) {
no_huffcode_file = TRUE;
for (i = 0; i < 257; i++) {
hctree1[i].bit = LAST_BIT;
hctree1[i].count = 0L;
hctree1[i].father = -1;
hctree1[i].son0 = -1;
hctree1[i].son1 = -1;
}
}
/* Table file exists--init treebase with values from file.
* We are only interested in the character itself (i),
* and its current count. All other fields will be recreated
* at output time. FILE FORMAT IS NOT CHECKED--IT HAD BETTER BE CORRECT!
*/
else {
no_huffcode_file = FALSE;
fgets (filebuf, sizeof (filebuf) - 1, instream_huf);
/* discard this first line (don't need id stamp) */
while (fgets (filebuf, sizeof (filebuf) - 1, instream_huf)
!= NULL) {
i = atoi (strtok (filebuf, DELIMITERS)); /* char */
if (i < 0 || i > 256) {
fprintf (stderr, catgets(dtsearch_catd, MS_huff, 33,
"366 Invalid file format for %s.\n"),
filename_huf);
exit (2);
}
strtok (NULL, DELIMITERS); /* skip over current huff
* code */
hctree1[i].count = (i == 256) ?
0L : atol (strtok (NULL, DELIMITERS));
hctree1[i].bit = LAST_BIT;
hctree1[i].father = -1;
hctree1[i].son0 = -1;
hctree1[i].son1 = -1;
if (i != 256)
total_count += hctree1[i].count;
} /* endwhile loop that reads each table line */
fclose (instream_huf);
}
return;
} /* end of function init_treebase */
/****************************************/
/* */
/* Huffman Code */
/* */
/****************************************/
/* determines correct huffman code based on current counts in tree,
* writes out all to both files overlaying previous values if they existed.
*/
static void huffman_code (time_t idstamp)
{
int i; /* current char */
int lasti;
int j; /* ascends tree from i to build bit_string */
char bit_string[MAX_BITLEN + 4];
char sprintbuf[128];
char *bitptr;
FILE *outstream_huc;
FILE *outstream_huf;
/* establish the 'literal' node (char #256) count
* equal to sum of all chars whose counts are less than threshold.
*/
if (literal_coding_on) {
hctree1[256].count = 0L;
for (i = 0; i < 256; i++)
if (hctree1[i].count <= literal_threshold)
hctree1[256].count += hctree1[i].count;
}
/* build the Huffman Code tree, and determine root (last_node) */
while (build_tree ());
/* now that we know the total number of tree nodes (last_node),
* we are ready to write.
* Open both output files and verify they are not write protected.
*/
if ((outstream_huc = fopen (filename_huc, "w")) == NULL) {
fprintf (stderr, catgets(dtsearch_catd, MS_huff, 34,
"424 File '%s' failed to open for write. Is it read-only?\n"),
filename_huc);
exit (2);
}
if ((outstream_huf = fopen (filename_huf, "w")) == NULL) {
fprintf (stderr, catgets(dtsearch_catd, MS_huff, 34,
"439 File '%s' failed to open for write. Is it read-only?\n"),
filename_huf);
exit (2);
}
/* create the .c decode file (tree as integer array) */
fprintf (outstream_huc,
"#include <time.h>\n"
"char *hctree_name =\t\"%s\";\n"
"time_t hctree_id =\t%ldL;\n"
"int hctree_root =\t%d;\n"
"static int hctree_array[] = {\n",
filename_huc, idstamp, last_node - 257);
for (i = 257; i <= last_node; i++) {
fprintf (outstream_huc, "\t%4d,\t%4d%c\t/* %3d */\n",
hctree1[i].son0 - 257, hctree1[i].son1 - 257,
(i == last_node) ? ' ' : ',', /* no comma after last
* one */
i - 257); /* comment contains node number */
}
fprintf (outstream_huc, "\t};\nint *hctree =\thctree_array;\n");
fclose (outstream_huc);
/* write out the tree base (0-256) in sorted order to .huf file */
fprintf (outstream_huf, "%ld\tHCTREE_ID\n", idstamp);
for (lasti = -1; (i = next_sorted_node (lasti)) >= 0; lasti = i) {
/*
* Create huffman code digit string. j ascends tree from i
* to build string in reverse order.
*/
bitptr = bit_string;
for (j = i; j != -1; j = hctree1[j].father)
*bitptr++ = hctree1[j].bit;
*bitptr = '\0'; /* terminate reversed string */
strrev (bit_string); /* reverse the string order */
if (bit_string[1] == 0)
strcpy (bit_string, " ");
if (strlen (bit_string) < 9)
strcat (bit_string, "\t");
/* write out the line for this char */
sprintf (sprintbuf, "%d\t%s\t%ld\t%s\n",
i,
bit_string + 1, /* hop over LAST_BIT */
hctree1[i].count,
char_label (i));
fprintf (outstream_huf, sprintbuf);
} /* end forloop printing out each tree base entry */
fclose (outstream_huf);
return;
} /* end of function huffman_code */
/****************************************/
/* */
/* Print Usage */
/* */
/****************************************/
static void print_usage (void)
{
fprintf (stderr, catgets(dtsearch_catd, MS_huff, 35,
"USAGE: huffcode [-lN | -l-] [-o] <huffname> [<infile>]\n"
" -l<N> specifies the 'literal' threshold count. Any character occurring\n"
" <= <N> times will be coded with the Huffman literal. Default is -l0,\n"
" literal coding only for bytes with counts of zero.\n"
" -l- turns off literal coding. Turning off literal coding in unbalanced\n"
" trees leads to EXTREMELY LONG bit string codes--don't do it unless\n"
" the input is known to be a well balanced binary file.\n"
" -o preauthorizes overwriting any currently existing decode file.\n"
" <huffname> is the filename prefix for the Huffman Code files.\n"
" If the encode file (%s) already exists, byte counts from infile will\n"
" be added to it, otherwise it will be newly created.\n"
" The decode file (%s) is always newly created each run.\n"
" <infile> is an input file containing bytes to be counted.\n"
" It may be omitted if the encode file already exists.\n"),
EXT_HUFFCODE, EXT_HDECODE);
return;
} /* end of function print_usage */
/********************************************************/
/* */
/* USER_ARGS_PROCESSOR */
/* */
/********************************************************/
/* handles command line arguments for 'main' */
static void user_args_processor (int argc, char **argv)
{
char *argptr;
int OK_to_overwrite = FALSE;
FILE *stream;
if (argc <= 1) { /* user just wants to see usage msg */
print_usage ();
exit (1);
}
/* each pass grabs new parm of "-xxx" format */
while (--argc > 0 && (*++argv)[0] == '-') {
argptr = argv[0];
argptr[1] = tolower (argptr[1]);
switch (argptr[1]) {
case 'l': /* literal threshold */
if (argptr[2] == 0)
goto BADARG;
else if (argptr[2] == '-')
literal_coding_on = FALSE;
else
literal_threshold = atoi (argptr + 2);
break;
case 'o': /* OK_to_overwrite .c file if it already
* exists */
OK_to_overwrite = TRUE;
break;
case 'v': /* verbose mode = debug switch */
debug_switch = TRUE;
break;
BADARG:
default:
fprintf (stderr, catgets(dtsearch_catd, MS_huff, 36,
"'%s' is invalid argument.\n"), argptr);
print_usage ();
exit (2); /* ABORT program */
} /* endswitch */
} /* endwhile for cmd line '-'processing */
/* test for required tree file name */
if (argc <= 0) {
fprintf (stderr, catgets(dtsearch_catd, MS_huff, 37,
"576 Missing Huffman Code file names prefix.\n"));
print_usage ();
exit (2);
}
/* create 2 output file names from passed argument */
strncpy (filename_huf, argv[0], _POSIX_PATH_MAX);
filename_huf[_POSIX_PATH_MAX - 6] = 0;
strcat (filename_huf, EXT_HUFFCODE);
strncpy (filename_huc, argv[0], _POSIX_PATH_MAX);
filename_huc[_POSIX_PATH_MAX - 6] = 0;
strcat (filename_huc, EXT_HDECODE);
/* Since the decode file is a C source code file (.c extension),
* we want to be sure not to erase somebody's source program.
* So if the .c file already exists, and the user didn't specify
* overwrite in a command line argument, ask him now if it's OK to
* blow away the old file.
*/
if (!OK_to_overwrite)
if ((stream = fopen (filename_huc, "r")) != NULL) {
fclose (stream);
printf (catgets(dtsearch_catd, MS_huff, 38,
"Decode file '%s' already exists. "
"Is it OK to overwrite it? [y/n] "),
filename_huc);
if (toupper (getchar ()) != 'Y')
exit (2);
}
/* test for optional input file name */
if (--argc <= 0)
input_file_specified = FALSE;
else {
input_file_specified = TRUE;
strncpy (filename_input, argv[1], _POSIX_PATH_MAX);
filename_input[_POSIX_PATH_MAX - 1] = 0;
}
return;
} /* end of function user_args_processor */
/****************************************/
/* */
/* Main */
/* */
/****************************************/
int main (int argc, char *argv[])
{
FILE *instream;
struct stat fstat_input;
long bytes_in = 0L;
int mychar;
time_t now, start_stamp;
setlocale (LC_ALL, "");
dtsearch_catd = catopen (FNAME_DTSRCAT, 0);
printf (catgets(dtsearch_catd, MS_huff, 40,
"HUFFCODE Version %s\n"), AUSAPI_VERSION);
/* validate user's command line arguments */
user_args_processor (argc, argv);
/* initialize tree table, using the table file if it exists */
init_treebase ();
if (total_count == 0L)
printf (catgets(dtsearch_catd, MS_huff, 41,
"Huffman Code Tables will be newly created.\n"));
else
printf (catgets(dtsearch_catd, MS_huff, 42,
"Table '%s' already contains %ld Kbytes from previous runs.\n"),
filename_huf, total_count / 1000L);
if (!input_file_specified && no_huffcode_file) {
fprintf (stderr, catgets(dtsearch_catd, MS_huff, 43,
"645 Input file not specified and '%s' table file\n"
" doesn't exist--nothing to do!\n"),
filename_huf);
print_usage ();
exit (2);
}
/* read the input file and count its bytes */
if (input_file_specified) {
if ((instream = fopen (filename_input, "rb")) == NULL) {
BAD_INPUT_FILE:
fprintf (stderr, catgets(dtsearch_catd, MS_huff, 44,
"Could not open input file '%s' or access status: %s\n"),
filename_input, strerror (errno));
exit (2);
}
if (fstat (fileno (instream), &fstat_input) == -1)
goto BAD_INPUT_FILE;
printf (catgets(dtsearch_catd, MS_huff, 45,
"Input file '%s' contains about %ld Kbytes.\n"),
filename_input, fstat_input.st_size / 1000L);
time (&start_stamp);
while ((mychar = getc (instream)) != EOF) {
hctree1[mychar].count++;
total_count++;
/* echo progress to user every so often */
if (!(++bytes_in % 10000L))
printf (catgets(dtsearch_catd, MS_huff, 46,
"\r%ld%% done. %2ld Kbytes read. "
"Estimate %3ld seconds to completion. "),
(bytes_in * 100L) / fstat_input.st_size,
bytes_in / 1000L,
(fstat_input.st_size - bytes_in) *
(time (NULL) - start_stamp) / bytes_in);
} /* end read loop for each char in input file */
putchar ('\n');
fclose (instream);
} /* endif that processes input file */
/* build huffman code tree, write out files */
time (&now); /* this will be the official tree id time
* stamp */
printf (catgets(dtsearch_catd, MS_huff, 47,
"Identifying timestamp will be '%ld'.\n"
"%s Huffman Code Tables in '%s' and '%s'..."),
now,
(no_huffcode_file) ?
catgets(dtsearch_catd, MS_huff, 48, "Creating") :
catgets(dtsearch_catd, MS_huff, 49, "Rebuilding"),
filename_huf,
filename_huc);
huffman_code (now);
putchar ('\n');
return 0;
} /* end of function main */
/************************** HUFFCODE.C ******************************/

904
cde/programs/dtsr/tomita.c Normal file
View File

@@ -0,0 +1,904 @@
/*
* COMPONENT_NAME: austext
*
* FUNCTIONS: browser
* change_database
* deleter
* kill_delete
* load_dbatab
* main
* parse_infbuf
* print_exit_code
* retncode_abort
*
* ORIGINS: 27
*
* (C) COPYRIGHT International Business Machines Corp. 1992,1995
* All Rights Reserved
* US Government Users Restricted Rights - Use, duplication or
* disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
*/
/******************* TOMITA.C *******************
* $TOG: tomita.c /main/9 1998/04/17 11:23:38 mgreess $
* May 1992.
* Replaces original tomita but removes curses dependencies
* and uses Opera Engine for deletes rather than hard
* coding vista calls.
*
* In effect, tomita is two programs.
* Program #1 browses an input list of opera record ids and
* prompts user to confirm that they should be deleted from opera.
* The confirmed list is written to an output file that is identical in format.
* Program 1 can be run anytime because it only reads the database.
*
* Program #2 deletes record from an input list, presumably
* the output of program #1. Program #2 writes to vista and changes it.
* It MUST be run offline when no users are logged into opera,
* in order to prevent database corruption.
* Currently password is passed on command line.
* The password is maintained in an encrypted flat file.
* It can be changed from an undocumented Opera Engine function
* available in tuiopera.
*
* RECORD ID FILE FORMAT (shdscrd.lst format):
* One record to be deleted per line.
* 3 words per line separated by whitespace, everything thereafter is comments.
* (These programs only use the first 2 words).
* All words may be optionally enclosed in double quotes
* to capture embedded blanks ("xxx xxx").
* ...
* databasename recordid userid comments...\n
* ...
*
* $Log$
* Revision 2.2 1995/10/25 15:21:36 miker
* Added prolog.
*
* Revision 2.1 1995/09/22 22:17:11 miker
* Freeze DtSearch 0.1, AusText 2.1.8
*
* Revision 1.11 1995/09/05 19:14:39 miker
* Removed password requirement. DtSearch header file and function
* name changes. Made usrblk universal global.
*/
#include "SearchE.h"
#include <string.h>
#include <ctype.h>
#include <signal.h>
#include <sys/stat.h>
#include <locale.h>
#define PRINT_MESSAGES \
{ puts (DtSearchGetMessages()); DtSearchFreeMessages(); }
#define TOKEN_DELIM " \t\n"
#define PAUSE_ROWS 15
#define DBACOUNT 2000
#define PROGNAME "TOMITA"
#define MS_tomita 29
#define MS_misc 1
/*------------------ GLOBALS ------------------*/
static int debug_mode = FALSE;
static int prog = 'B'; /* 'D' = deleting, 'B' = browsing */
static int shutdown_now = FALSE;
static int yesarg = FALSE;
static int retncode = 0;
static int max_dbacount = DBACOUNT;
static FILE *inf, *outf;
static char *infname;
static char *outfname;
static long maxtime;
static long records_read;
static char parsed_dbname [24];
static DBLK *parsed_dblk;
char parsed_recid [2048];
/************************************************/
/* */
/* print_exit_code */
/* */
/************************************************/
/* Called from inside DtSearchExit() at austext_exit_last */
static void print_exit_code (int exit_code)
{
printf ( catgets(dtsearch_catd, MS_tomita, 3,
"%s: Exit Code = %d.\n") ,
aa_argv0, exit_code);
return;
} /* print_exit_code() */
/************************************************/
/* */
/* kill_delete */
/* */
/************************************************/
/* Interrupt handler for all termination signals
* in Delete mode. Just sets global flag so we
* can come down gracefully between deletions.
*/
static void kill_delete (int sig)
{
shutdown_now = TRUE;
printf ( catgets(dtsearch_catd, MS_tomita, 1,
"\n%s Received interrupt %d.\n"
" Program will stop after current batch of deletions.\n") ,
PROGNAME"069", sig);
return;
} /* kill_delete() */
/************************************************/
/* */
/* retncode_abort */
/* */
/************************************************/
static void retncode_abort (int location)
{
fputc ('\n', aa_stderr);
if (DtSearchHasMessages ())
fprintf (aa_stderr, "%s\n", DtSearchGetMessages ());
fprintf (aa_stderr,
PROGNAME "%d Program abort. usrblk.retncode = %d. Exit code = 3.\n",
location, usrblk.retncode);
DtSearchExit (3);
} /* retncode_abort() */
/****************************************/
/* */
/* change_database */
/* */
/****************************************/
/* Changes usrblk.dblk to point to passed database name.
* Returns TRUE if successful.
*/
static int change_database (char *newname)
{
DBLK *db;
for (db = usrblk.dblist; db != NULL; db = db->link)
if (strcmp (db->name, newname) == 0) {
usrblk.dblk = db;
return TRUE;
}
/* Invalid newname. If deleting, just say which database is invalid. */
retncode = 1;
fprintf (aa_stderr, catgets(dtsearch_catd, MS_tomita, 4,
"%s Database '%s' not found.\n") ,
PROGNAME"114", newname);
if (prog == 'D')
return FALSE;
/* If browsing, tell user his options */
fprintf (aa_stderr, catgets(dtsearch_catd, MS_tomita, 5,
"Available choices are:") );
for (db = usrblk.dblist; db != NULL; db = db->link)
fprintf (aa_stderr, " '%s'", db->name);
fputc ('\n', aa_stderr);
return FALSE;
} /* change_database() */
/****************************************/
/* */
/* parse_infbuf */
/* */
/****************************************/
/* Parses a line from a standard formatted discard file.
* If first word indicates different database from usrblk.dblk,
* changes it. First token loaded into parsed_dbname
* (and parsed_dblk will be made to track it), and
* second token is loaded into parsed_recid.
* Tokens are separated by blanks and/or tabs,
* except 2nd token may have embedded spaces if it is
* surrounded by double quotes. Returns TRUE unless
* database couldn't change or other error, then returns FALSE.
*/
static int parse_infbuf (char *infbuf)
{
char *ptr;
char mybuf[1024];
/* Do all parsing in my own buf so infbuf not peppered with \0's */
strncpy (mybuf, infbuf, sizeof (mybuf));
mybuf[sizeof (mybuf) - 1] = 0;
/* Parse first token (database name) */
if ((ptr = strtok (mybuf, " \t")) == NULL) {
/* Msg #8 is used in two places */
fprintf (aa_stderr, catgets(dtsearch_catd, MS_tomita, 8,
"%s Invalid input format: %.30s...\n") ,
PROGNAME"152", infbuf);
retncode = 1;
return FALSE;
}
/* Change database if necessary */
if (strcmp (ptr, usrblk.dblk->name) != 0)
if (!change_database (ptr)) {
retncode = 1;
return FALSE;
}
strcpy (parsed_dbname, ptr);
parsed_dblk = usrblk.dblk;
/* Hop over to beginning of 2nd token */
for (ptr += strlen (ptr) + 1; *ptr == ' ' || *ptr == '\t'; ptr++);
/* Get 2nd token (record id). Token delimiters depend
* on whether token begins with a double quote.
*/
ptr = strtok (ptr, (*ptr == '\"') ? "\"" : " \t");
if (ptr == NULL) {
/* Msg #8 is used in two places */
fprintf (aa_stderr, catgets(dtsearch_catd, MS_tomita, 8,
"%s Invalid input format: %.30s...\n") ,
PROGNAME"176", infbuf);
retncode = 1;
return FALSE;
}
strncpy (parsed_recid, ptr, sizeof (parsed_recid));
parsed_recid[sizeof (parsed_recid) - 1] = 0;
return TRUE;
} /* parse_infbuf() */
/****************************************/
/* */
/* browser */
/* */
/****************************************/
/* Program 1: displays records in input file,
* or user selected records, and if confirmed,
* writes their record ids to output file.
*/
static int browser (void)
{
int done = FALSE;
int pausing = FALSE;
int redisplay_rec = FALSE;
int pause_counter;
time_t stamp;
LLIST *llptr;
char *ptr;
char datestr[32]; /* "1946/04/17 13:03" */
char userbuf[1024];
char infbuf[1024];
/* All writes to output file will have same date string in comment */
time (&stamp);
strftime (datestr, sizeof (datestr), "%Y/%m/%d %H:%M", localtime (&stamp));
/* Main menu loop */
while (!done) {
if (DtSearchHasMessages ()) {
putchar ('\n');
PRINT_MESSAGES
}
/* Write main menu prompt */
printf ( catgets(dtsearch_catd, MS_tomita, 10,
"\n---------- SHOW NEXT RECORD ----------- Database = '%s'\n"
"q QUIT. Current Record Count = %ld\n"
"p Toggle PAUSE from %s.\n"
"n NEXT INPUT file record.\n"
"+ NEXT SEQUENTIAL database record.\n"
"- PRIOR SEQUENTIAL database record.\n"
"r REDISPLAY current record '%s'.\n"
"x CONFIRM DELETION of current record.\n"
"dxxx Change DATABASE to xxx.\n"
"\"xxx GET record id xxx (embedded blanks are ok).\n"
"> ") ,
usrblk.dblk->name,
usrblk.dblk->dbrec.or_reccount,
(pausing) ? "on to OFF" : "off to ON",
usrblk.objrec.or_objkey
);
/* Read user's response. Remove user's \n. */
*userbuf = '\0';
if ((fgets (userbuf, sizeof (userbuf), stdin)) == NULL) break;
if (strlen(userbuf) && userbuf[strlen(userbuf)-1] == '\n')
userbuf[strlen(userbuf)-1] = '\0';
putchar ('\n');
/* depending on response, get database address into usrblk */
redisplay_rec = FALSE;
switch (tolower (*userbuf)) {
case 'q':
done = TRUE;
break;
case 'd':
change_database (userbuf + 1);
continue;
break;
case 'p':
pausing = !pausing;
continue;
break;
case 'r':
if (usrblk.objrec.or_objkey[0] == 0) {
fprintf (aa_stderr,
catgets(dtsearch_catd, MS_tomita, 11,
"%s Record buffer empty.\n"),
PROGNAME"267");
continue;
}
redisplay_rec = FALSE;
break;
case '+':
case '-':
usrblk.request = (*userbuf == '+') ? OE_NEXT_DBA : OE_PREV_DBA;
Opera_Engine ();
break;
case 'n':
if (inf == NULL) {
fprintf (aa_stderr,
catgets(dtsearch_catd, MS_tomita, 12,
"%s Input file unavailable.\n"),
PROGNAME"282");
continue;
}
*infbuf = '\0';
if ((fgets (infbuf, sizeof (infbuf), inf)) == NULL)
{
fprintf (aa_stderr,
catgets(dtsearch_catd, MS_tomita, 13,
"%s No more records in input file.\n"),
PROGNAME"288");
fclose (inf);
inf = NULL;
continue;
}
if (strlen(infbuf) && infbuf[strlen(infbuf)-1] == '\n')
infbuf[strlen(infbuf)-1] = '\0';
if (!parse_infbuf (infbuf))
continue;
usrblk.request = OE_RECKEY2DBA;
usrblk.query = parsed_recid;
Opera_Engine ();
break;
case '\"':
ptr = strtok (userbuf, "\"");
if (ptr == NULL || *ptr == 0) {
fprintf (aa_stderr,
catgets(dtsearch_catd, MS_tomita, 14,
"%s Invalid Record ID.\n"),
PROGNAME"303");
continue;
}
usrblk.request = OE_RECKEY2DBA;
usrblk.query = ptr;
Opera_Engine ();
break;
case 'x':
/*
* Write record id to output file. Format:
* dbasename "recid" userid comments(date)...
*/
fprintf (outf, DISCARD_FORMAT, usrblk.dblk->name,
usrblk.objrec.or_objkey, usrblk.userid, datestr);
printf ( catgets(dtsearch_catd, MS_tomita, 15,
"%s '%s' appended to file of confirmed deletions.\n") ,
PROGNAME"317", usrblk.objrec.or_objkey);
continue;
default:
printf (catgets(dtsearch_catd, MS_tomita, 16, "...what?\n"));
continue;
} /* end switch */
if (done)
break;
/* if user requested redisplay, skip the following OE code */
if (redisplay_rec)
goto DISPLAY_RECORD;
/*
* check return code from attempt to get opera database
* address
*/
if (usrblk.retncode == OE_WRAPPED)
fprintf (aa_stderr, catgets(dtsearch_catd, MS_tomita, 17,
"%s %s Engine wrapped to next record.\n") ,
PROGNAME"333", OE_prodname);
else if (usrblk.retncode != OE_OK)
retncode_abort (334);
/* retrieve the record and uncompress it */
usrblk.request = OE_GETREC;
Opera_Engine ();
if (usrblk.retncode != OE_OK)
retncode_abort (339);
DISPLAY_RECORD:
/* display the record's cleartext, character by character */
printf ( catgets(dtsearch_catd, MS_tomita, 18,
"\n\n"
"Record: '%s'\n"
"Abstract: '%s'\n"
"--------------------------------------\n") ,
usrblk.objrec.or_objkey,
(usrblk.abstrbufsz > 0) ? usrblk.abstrbuf :
catgets (dtsearch_catd, MS_misc, 1, "<null>"));
pause_counter = 0;
for (ptr = usrblk.cleartext; *ptr != 0; ptr++) {
putchar (*ptr);
/*
* pause every so many lines so user can browse the
* output
*/
if (pausing && *ptr == '\n') {
if (++pause_counter >= PAUSE_ROWS) {
/* Msg 21 is used in two places */
printf ( catgets(dtsearch_catd, MS_tomita, 21,
"\n...push ENTER to continue... ") );
*userbuf = '\0';
fgets (userbuf, sizeof (userbuf), stdin);
if (strlen(userbuf) && userbuf[strlen(userbuf)-1] == '\n')
userbuf[strlen(userbuf)-1] = '\0';
putchar ('\n');
pause_counter = 0;
}
}
} /* end of cleartext printing */
/* display the user notes if any, character by character */
if (usrblk.notes != NULL) {
printf ( catgets(dtsearch_catd, MS_tomita, 20,
"--------------------------------------\n"
"End of Text Blob for '%s':\n\n"
"User Notes:\n"
"--------------------------------------\n") ,
usrblk.objrec.or_objkey);
pause_counter += 5;
for (llptr = usrblk.notes; llptr != NULL; llptr = llptr->link) {
for (ptr = llptr->data; *ptr != '\0'; ptr++) {
putchar (*ptr);
if (pausing && *ptr == '\n')
if (++pause_counter >= PAUSE_ROWS) {
/* Msg 21 is used in two places */
printf ( catgets(dtsearch_catd, MS_tomita, 21,
"\n...push ENTER to continue... ") );
*userbuf = '\0';
fgets (userbuf, sizeof (userbuf), stdin);
if (strlen(userbuf) &&
userbuf[strlen(userbuf)-1] == '\n')
userbuf[strlen(userbuf)-1] = '\0';
putchar ('\n');
pause_counter = 0;
}
}
}
} /* end of user notes printing */
printf ("--------------------------------------\n"
"End of Record '%s'.\n", usrblk.objrec.or_objkey);
} /* end of main menu loop */
return 0;
} /* browser() */
/****************************************/
/* */
/* load_dbatab */
/* */
/****************************************/
/* Subroutine of deleter(). Reads discard file containing
* record ids to be deleted, converts to database addresses,
* loads usrblk.dbatab up to max batch size.
* Returns number of dba's added to table.
* Returns 0 when file is empty after last batch.
*/
static int load_dbatab (void)
{
static int read_next_rec = TRUE;
static char last_dbname[24] = "";
static DBLK *last_dblk;
DB_ADDR *next_dba;
char buf[1024];
int first_err = TRUE;
if (inf == NULL)
return 0;
usrblk.dbacount = 0;
next_dba = usrblk.dbatab;
KEEP_READING:
/* MAIN LOOP - break it at EOF, max count, or dbname change */
while (usrblk.dbacount < max_dbacount) {
/*
* Skip the read of the first record if the reason we left
* main loop the last time was because of a database name
* change, and the data from the last read is still in
* parsed_dbname, _dblk, and _recid. Update usrblk.dblk
* because it's based on the last table's database.
*/
if (!read_next_rec) {
read_next_rec = TRUE;
usrblk.dblk = parsed_dblk;
}
else {
*buf = '\0';
if (fgets (buf, sizeof (buf), inf) == NULL)
{
fclose (inf);
inf = NULL;
break;
}
records_read++;
buf[sizeof (buf) - 1] = 0; /* guarantee termination */
if (strlen(buf) && buf[strlen(buf)-1] == '\n')
buf[strlen(buf)-1] = '\0';
/*
* Parse line into dbname and recid. Skip line if
* error.
*/
if (!parse_infbuf (buf))
continue;
/* on very first read, save the database name */
if (last_dbname[0] == 0) {
strcpy (last_dbname, parsed_dbname);
last_dblk = parsed_dblk;
}
} /* finished reading next rec in input file */
/*
* Test for change of database name. Restore usrblk.dblk
* to reflect all the records on the dba table so far. Then
* save the new dblk for when we are again called.
*/
if (strcmp (last_dbname, parsed_dbname) != 0) {
read_next_rec = FALSE;
strcpy (last_dbname, parsed_dbname);
usrblk.dblk = last_dblk;
last_dblk = parsed_dblk;
break;
}
/*
* Call OE to get record's db address. Turn off debug
* temporarily so won't flood output with messages.
*/
usrblk.query = parsed_recid;
usrblk.debug &= ~USRDBG_DELETE;
usrblk.request = OE_RECKEY2DBA;
Opera_Engine ();
if (debug_mode) /* restore */
usrblk.debug |= USRDBG_DELETE;
if (DtSearchHasMessages ()) {
putchar ('\n');
PRINT_MESSAGES
}
if (usrblk.retncode == OE_WRAPPED) {
if (first_err) {
first_err = FALSE;
fputc ('\n', aa_stderr);
}
fprintf (aa_stderr, catgets(dtsearch_catd, MS_tomita, 24,
"%s Database %s, '%s' not found.\n") ,
PROGNAME"482", parsed_dbname, parsed_recid);
continue;
}
else if (usrblk.retncode != OE_OK)
retncode_abort (486);
/* add db address to growing table */
*next_dba = usrblk.dba;
next_dba++;
usrblk.dbacount++;
} /* end of main record read loop */
/* It is possible to exit the main loop, because database changed
* or whatever, but no records were added to usrblk.dbatab.
* If there are still records to be read from the input file,
* go back and try another pass.
*/
if (inf != NULL && usrblk.dbacount == 0)
goto KEEP_READING;
return usrblk.dbacount;
} /* load_dbatab() */
/****************************************/
/* */
/* deleter */
/* */
/****************************************/
/* Program 2: deletes records specified in input file.
* Must be run offline when all online users have logged off.
*/
static void deleter (char *infname)
{
int i;
long records_deleted;
time_t start_time, minutes, hours, seconds, elapsed;
char buf[128];
if (!yesarg) {
printf ( catgets(dtsearch_catd, MS_tomita, 25,
"\nDO NOT CONTINUE under any of the following circumstances:\n"
"-> If the input file which lists record ids to be deleted is not\n"
" named '%s'.\n"
"-> If any users are still accessing the affected database(s).\n"
"-> If any database files have not been backed up.\n\n"
"If you are sure you are ready to start deleting, enter 'y' now... ") ,
infname, OE_prodname);
fgets (buf, sizeof(buf)-1, stdin);
if (tolower (*buf) != 'y')
return;
}
/* Make sure engine doesn't abort because of
* recurring changes to d99 files.
*/
OE_sitecnfg_mtime = 0L;
/* Init table of db addrs */
usrblk.dbatab = austext_malloc
(sizeof (DB_ADDR) * (max_dbacount + 2), PROGNAME "531", NULL);
usrblk.dbacount = 0; /* number of recs currently in table */
/* Init status msg stuff */
records_read = 0L;
records_deleted = 0L;
time (&start_time);
signal (SIGINT, kill_delete);
signal (SIGQUIT, kill_delete);
signal (SIGTRAP, kill_delete);
signal (SIGTERM, kill_delete);
signal (SIGPWR, kill_delete);
#ifdef _AIX
signal (SIGXCPU, kill_delete); /* cpu time limit exceeded */
signal (SIGDANGER, kill_delete); /* imminent paging space
* crash */
#endif
/* MAIN LOOP */
while (load_dbatab ()) {
/*
* Stop now if we have exceeded user specified time limit
* or if user sent termination or interrupt signal.
*/
if (shutdown_now)
break;
elapsed = time (NULL) - start_time;
if (maxtime > 0L && elapsed >= maxtime)
break;
/* echo status for humans who might be watching */
hours = elapsed / 3600L;
seconds = elapsed - (3600L * hours); /* remaining after hours */
minutes = seconds / 60L;
seconds = seconds - (60L * minutes);
printf ( catgets(dtsearch_catd, MS_tomita, 26,
"%s %ld read, %ld deleted, %ldh %2ldm %2lds elapsed.\n"
" Database '%s': Current record count = %ld, Batch size = %d.\n") ,
aa_argv0, records_read, records_deleted,
hours, minutes, seconds,
usrblk.dblk->name, usrblk.dblk->dbrec.or_reccount, usrblk.dbacount);
/*****fflush (stdout);*****/
/* call OE to delete batch of records */
usrblk.request = OE_DELETE_BATCH;
Opera_Engine ();
if (DtSearchHasMessages ()) {
putchar ('\n');
PRINT_MESSAGES
}
if (usrblk.retncode != OE_OK)
retncode_abort (572);
records_deleted += usrblk.dbacount;
} /* end main loop */
/* Print final status messages */
elapsed = time (NULL) - start_time; /* total elapsed time */
hours = elapsed / 3600L;
seconds = elapsed - (3600L * hours); /* remaining after hours */
minutes = seconds / 60L;
seconds = seconds - (60L * minutes); /* remaining after hours
* & mins */
printf ( catgets(dtsearch_catd, MS_tomita, 27,
"%s %ld records read from input file. %ld were deleted and\n"
" %ld were not found in %ld hours, %ld minutes, %ld seconds,\n") ,
aa_argv0, records_read, records_deleted,
records_read - records_deleted,
hours, minutes, seconds);
/* Figure average time for a deletion */
elapsed = (records_deleted) ? elapsed / records_deleted : 0L;
minutes = elapsed / 60L;
seconds = elapsed - (60L * minutes);
printf ( catgets(dtsearch_catd, MS_tomita, 28,
" or an average of %ld minutes, %ld seconds per record deleted.\n"),
minutes, seconds);
return;
} /* deleter() */
/****************************************/
/* */
/* main */
/* */
/****************************************/
int main (int argc, char *argv[])
{
char *arg;
time_t mytime;
char timebuf[80];
aa_argv0 = argv[0];
setlocale (LC_ALL, "");
dtsearch_catd = catopen (FNAME_DTSRCAT, 0);
time (&mytime);
strftime (timebuf, sizeof (timebuf),
catgets(dtsearch_catd, MS_misc, 22, "%A, %b %d %Y, %I:%M %p"),
localtime (&mytime));
printf (catgets(dtsearch_catd, MS_tomita, 29,
"%s. Run %s.\n") ,
aa_argv0, timebuf);
austext_exit_last = print_exit_code;
signal (SIGINT, DtSearchExit);
signal (SIGTERM, DtSearchExit);
/****memset (&usrblk, 0, sizeof(USRBLK));****/
/* Validate program number argument */
if (argc < 2) {
BAD_ARGS:
fprintf (aa_stderr, catgets(dtsearch_catd, MS_tomita, 30,
"\nUSAGE: %s [options]\n"
" -i Input file name. If not specified, defaults to %s.\n"
" -d[v] Print debug statements.\n"
" -dv turns on verbose (record-by-record) debugging.\n"
" -t<N> Max desired number of seconds of run time.\n"
" Ctrl-C/Break will also stop deletion at next record.\n"
" -n<N> Change number of records in a batch from %d to <N>.\n"
" -y Automatically answers 'yes' to Delete mode confirm prompt.\n"
" -d trace deletion operations.\n") ,
aa_argv0, FNAME_DISCARD_DATA,
FNAME_CONFIRM_LIST, FNAME_CONFIRM_LIST, DBACOUNT);
DtSearchExit (2);
}
prog = toupper (argv[1][0]);
if (prog != 'B' && prog != 'D')
goto BAD_ARGS;
/* Initialize defaults depending on program mode */
if (prog == 'B') {
infname = FNAME_DISCARD_DATA;
outfname = FNAME_CONFIRM_LIST;
}
else {
infname = FNAME_CONFIRM_LIST;
outfname = PROGNAME "654";
}
maxtime = 0L;
/* Save rest of command line arguments */
for (argc -= 2, argv += 2; argc > 0; argc--, argv++) {
arg = *argv;
switch (tolower (arg[1])) {
case 'i':
infname = arg + 2;
break;
case 'o':
outfname = arg + 2;
break;
case 'd':
debug_mode = TRUE;
usrblk.debug |= USRDBG_DELETE;
if (arg[2] == 'v')
usrblk.debug |= USRDBG_VERBOSE;
break;
case 'y':
yesarg = TRUE;
break;
case 't':
maxtime = atol (arg + 2);
break;
case 'n':
max_dbacount = atol (arg + 2);
break;
default:
fprintf (aa_stderr, catgets(dtsearch_catd, MS_tomita, 31,
"\n%s Unknown argument '%s'.\n") ,
PROGNAME"689", arg);
goto BAD_ARGS;
} /* end switch */
} /* end arg parsing */
/* Open input file to test for its existence.
* For the Browse program, file ptr 'inf' == NULL
* means the file is not open.
*/
if ((inf = fopen (infname, "r")) == NULL) {
if (prog == 'D') {
fprintf (aa_stderr, catgets(dtsearch_catd, MS_tomita, 32,
"%s Unable to open input file '%s'.\n") ,
PROGNAME"710", infname);
goto BAD_ARGS;
}
}
/* If browsing, get output file name and
* open it to test for write permission.
*/
if (prog == 'B') {
if ((outf = fopen (outfname, "a ")) == NULL)
/* the blank in "a " works around old aix bug */
{
fprintf (aa_stderr, catgets(dtsearch_catd, MS_tomita, 33,
"\n%s Unable to open output file '%s'.\n") ,
PROGNAME"721", outfname);
goto BAD_ARGS;
}
}
/* Initialize the opera engine, i.e. open the database */
printf ( catgets(dtsearch_catd, MS_tomita, 34,
"Initializing %s engine...\n"),
OE_prodname);
strcpy (usrblk.userid, "ToMiTa");
usrblk.request = OE_INITIALIZE;
usrblk.query = AUSAPI_VERSION;
Opera_Engine ();
if (usrblk.retncode != OE_OK)
retncode_abort (733);
PRINT_MESSAGES
if (prog == 'B')
browser ();
else
deleter (infname);
usrblk.request = OE_SHUTDOWN;
Opera_Engine ();
printf ( catgets(dtsearch_catd, MS_tomita, 36,
"Normal engine shutdown.\n") );
DtSearchExit (0);
} /* main() */
/******************* TOMITA.C *******************/