/* * CDE - Common Desktop Environment * * Copyright (c) 1993-2012, The Open Group. All rights reserved. * * These libraries and programs are free software; you can * redistribute them and/or modify them under the terms of the GNU * Lesser General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) * any later version. * * These libraries and programs are distributed in the hope that * they will be useful, but WITHOUT ANY WARRANTY; without even the * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU Lesser General Public License for more * details. * * You should have received a copy of the GNU Lesser General Public * License along with these libraries and programs; if not, write * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth * Floor, Boston, MA 02110-1301 USA */ /* * COMPONENT_NAME: austext * * FUNCTIONS: TERMINATE_LINE * copy_new_d99 * copy_old_d2x_to_new * end_of_job * main * open_all_files * print_progress * print_usage * read_d2x * signal_shutdown * user_args_processor * validation_error * write_d2x * * ORIGINS: 27 * * IBM CONFIDENTIAL -- (IBM Confidential Restricted when * combined with the aggregated modules for this product) * OBJECT CODE ONLY SOURCE MATERIALS * * (C) COPYRIGHT International Business Machines Corp. 1995 * All Rights Reserved * US Government Users Restricted Rights - Use, duplication or * disclosure restricted by GSA ADP Schedule Contract with IBM Corp. */ /*************************** MRCLEAN.C **************************** * $TOG: mrclean.c /main/7 1998/04/17 11:25:42 mgreess $ * Does garbage collection (ie compression) of .d99 file. * Optionally verifies all database addresses in d99. * Modification of clndtbs.c and checkd99.c. * Does NOT use austext engine so this must be modified if schema changes. * * INPUT FORMAT: * All command input is on command line. Reads existing d2x and d99 files. * * OUTPUT FORMAT: * New .d2x and .d99 files are placed into the directory specified by user. * * EXIT CODE STANDARDS: * 0 = normal. * 1 = warnings, but output should be ok. * 2 = failure in cmd line parse or other initialization; job never started. * 3 - 49 = fatal error, but output may be acceptable. * 50 - 99 = fatal error and output files are probably unusable. * (In this program, even input may be corrupted). * 100+ = aborting due to asynchronous interrupt signal. * Output files may or may not be unusable. * * $Log$ * Revision 1.11 1995/09/05 18:16:46 miker * Name, msg, and other minor changes for DtSearch.. * Print messages if austext_dopen() fails. * * Revision 1.10 1995/06/02 15:52:42 miker * Cleaned up -m and bit vector overflow msgs. * * Revision 1.9 1995/05/30 19:15:58 miker * Print beta char in startup banner msg. * Remove -m option and max_totrecs; select bit vector * size from maxdba, not reccount. */ #include "SearchP.h" #include #include #include #include #include #include #include "vista.h" #define XOS_USE_NO_LOCKING #define X_INCLUDE_TIME_H #include #define MS_misc 1 /* msg catalog set number */ #define DISCARD_FORMAT "%s\t\"%s\"\t%s\t%s\n" /* copied from oe.h */ #define RECS_PER_DOT 1000L #define DOTS_PER_MSG 50L #define DISK_BLKSIZE 512 #define MAX_CORRUPTION 100L #define MAX_REC_READ (DISK_BLKSIZE / sizeof(DB_ADDR)) /* Max number of addresses to be read from * database addresses file, ie the size * of one block read from hard disk. */ #define PROGNAME "MRCLEAN" #define READBUFSIZE (1024 + 32) #define SHOW_NOTHING 0 /* bit arguments for end_of_job() */ #define SHOW_USAGE 1 #define SHOW_EXITCODE 2 #define SHOW_PROGRESS 4 #define TERMINATE_LINE() if(need_linefeed){fputc('\n',aa_stderr);need_linefeed=FALSE;} /*-------------------------- GLOBALS ----------------------------*/ static char *arg_dbname = NULL; static char *arg_newpath = NULL; long batch_size = 0; /* (to fileman.c) */ int beta = #ifdef BETA BETA; #else 0; #endif char betabuf[8]; unsigned char *bit_vector = NULL; static size_t bytes_in = 0L; static size_t corruption_count = 0L; static struct or_swordrec d21new, d21old; static struct or_lwordrec d22new, d22old; static struct or_hwordrec d23new, d23old; static char datestr [32] = ""; /* "1946/04/17 13:03" */ static int debug_mode = FALSE; static size_t dot_count = 0L; char fname_d99_new [1024]; char fname_d99_old [1024]; FILE *fp_d99_new = NULL; FILE *fp_d99_old = NULL; static FILE *frecids = NULL; static int is_valid_dba; static size_t max_corruption = MAX_CORRUPTION; /***static long max_totrecs = 0L;****/ static int normal_exitcode = 0; static int need_linefeed = FALSE; static int overlay_no = FALSE; static int overlay_yes = FALSE; static size_t reccount = 0L; static short recslots; static int dba_offset; static size_t recs_per_dot = RECS_PER_DOT; static int rewrite_reccount = FALSE; static int shutdown_now = 0; /* = FALSE */ static size_t size_d21_old = 0L; static size_t size_d22_old = 0L; static size_t size_d23_old = 0L; static size_t size_d99_old = 0L; static time_t timestart = 0L; static long total_num_addrs = 0L; static int validation_mode = FALSE; /***************************************************************************** structure words: from opera.h file (just FYI...) char word[DtSrMAXWIDTH_WORD] - unique word. long word_offset - offset in a database addresses file for a given word. the first address starts at this position. int num_free_slots - number of free slots in a database addresses file for a given word. long number_of_addrs - total number of addresses for a given word. *****************************************************************************/ /********************************************************/ /* */ /* signal_shutdown */ /* */ /********************************************************/ /* interrupt handler for SIGINT */ static void signal_shutdown (int sig) { shutdown_now = 100 + sig; return; } /* signal_shutdown() */ /************************************************/ /* */ /* print_usage */ /* */ /************************************************/ /* Prints usage statement to stderr. */ static void print_usage (void) { fprintf (aa_stderr, "\nUSAGE: %s [options] dbname newpath\n" " Compresses unused d99 space and validates d00-d99 links.\n" " dbname: 1 - 8 char database name = the old d99/d2x files to be updated.\n" " Files found in local directory or DBFPATH environment variable.\n" " newpath: Specifies where the new d99/d2x files will be placed.\n" " If first char is not slash, path is relative to local directory.\n" "OPTIONS:\n" " -pN: Progress dots printed every N records (default %lu).\n" " Complete progress message printed every %d dots.\n" " -oy: Authorizes overlaying preexisting d99/d2x files in newpath.\n" " -on: Forces exit if preexisting d99/d2x files in newpath.\n" " -v: Validates d99 and d00 links, uncorrupts d99 file, and ensures\n" " accurate record count. Also use -c0 to uncorrupt entire database.\n" " -v: Same as -v but also writes all d00 recs unreferenced by d99\n" " to fname in format suitable for albeniz to extract into fzk format.\n" /******** " -mN: Changes max # database documents from curr count in database to N.\n" *********/ " -cN: Exits if more than N corrupted/incomplete links (default %d).\n" " Corruption limit turned off by -c0.\n" "EXIT CODES:\n" " 0: Complete success. 1: Warning. 2: Job never started.\n" " 3-49: Job ended prematurely, old files ok, new files unusable.\n" " 50-99: Fatal Error, even old database may be corrupted.\n" " 100+: Ctrl-C, kill, and all other signal interrupts cause premature\n" " end, new files may be unusable. Signal = exit code - 100.\n" , aa_argv0, RECS_PER_DOT, DOTS_PER_MSG, MAX_CORRUPTION); return; } /* print_usage() */ /************************************************/ /* */ /* print_progress */ /* */ /************************************************/ /* Prints progress msg after dots or at end of job. * Label is "Final" or "Progress". */ static void print_progress (char *label) { long seconds; int compression; seconds = time(NULL) - timestart; /* total seconds elapsed */ if (seconds < 0L) seconds = 0L; if ((float) bytes_in / (float) size_d99_old >= 99.5) compression = 100; else { compression = (int) (100. * (float) bytes_in / (float) size_d99_old); if (compression < 0 || compression > 100) compression = 0; } TERMINATE_LINE(); fprintf (aa_stderr, "%s: %s Compression %d%% (about %lu KB) in %ld:%02ld min:sec.\n", aa_argv0, label, compression, bytes_in / 1000L, seconds / 60UL, seconds % 60UL); if (*label == 'F') fprintf (aa_stderr, "%s: Counted %ld WORDS in %s.d99.\n", aa_argv0, reccount, arg_dbname); return; } /* print_progress() */ /************************************************/ /* */ /* end_of_job */ /* */ /************************************************/ /* Exits program. Prints status messages before going down. * Should be called on even record boundaries whenever possible, * ie after record writes complete and shutdown_now > 0 (TRUE). */ static void end_of_job (int exitcode, int show_flags) { TERMINATE_LINE(); if (exitcode >= 100) { fprintf (aa_stderr, PROGNAME"66 Aborting after interrupt signal %d.\n", exitcode - 100); } if (validation_mode && corruption_count == 0L) fprintf (aa_stderr, "%s: No corrupted links detected.\n", aa_argv0); if (corruption_count > 0L) { if (max_corruption > 0L && corruption_count >= max_corruption) fprintf (aa_stderr, PROGNAME"193 Aborting at %ld corrupted links.\n", corruption_count); else fprintf (aa_stderr, PROGNAME"194 " "Detected%s %ld corrupted/incomplete link(s).\n", (validation_mode)? " and corrected" : "", corruption_count); } if (show_flags & SHOW_PROGRESS) { print_progress ("Final"); } if (show_flags & SHOW_USAGE) print_usage(); if (show_flags & SHOW_EXITCODE) fprintf (aa_stderr, "%s: Exit code = %d.\n", aa_argv0, exitcode); DtSearchExit (exitcode); } /* end_of_job() */ /************************************************/ /* */ /* user_args_processor() */ /* */ /************************************************/ /* Reads and verifies users command line arguments and * converts them into internal switches and variables. * Some attempt is made to read as many errors as possible * before ending job for bad arguments. */ static void user_args_processor (int argc, char **argv) { char *argptr; int oops = FALSE; int i; time_t stamp; size_t tempsize; if (argc < 3) end_of_job (2, SHOW_USAGE); /* parse all args that begin with a dash (-) */ while (--argc > 0) { argv++; argptr = argv[0]; if (argptr[0] != '-') break; switch (tolower(argptr[1])) { case 'r': if (strcmp(argptr, "-russell") == 0) /* backdoor debug */ debug_mode = TRUE; else goto UNKNOWN_ARG; case 'm': fprintf (aa_stderr, PROGNAME"301 The -m argument is no longer necessary.\n"); break; case 'o': i = tolower (argptr[2]); if (i == 'n') overlay_no = TRUE; else if (i == 'y') overlay_yes = TRUE; else { INVALID_ARG: fprintf (aa_stderr, PROGNAME"177 Invalid %.2s argument.\n", argptr); oops = TRUE; } break; case 'v': validation_mode = TRUE; if (argptr[2] != '\0') { _Xltimeparams localtime_buf; struct tm *time_ptr; if ((frecids = fopen (argptr+2, "w")) == NULL) { fprintf (aa_stderr, PROGNAME"802 Unable to open '%s' " "to output unreferenced d00 records: %s\n", argptr, strerror(errno)); oops = TRUE; } time (&stamp); time_ptr = _XLocaltime(&stamp, localtime_buf); strftime (datestr, sizeof(datestr), "%Y/%m/%d %H:%M", time_ptr); } break; case 'p': recs_per_dot = atol (argptr + 2); if (recs_per_dot <= 0L) goto INVALID_ARG; break; case 'c': tempsize = atol (argptr + 2); if (tempsize < 0L) goto INVALID_ARG; max_corruption = tempsize; break; UNKNOWN_ARG: default: fprintf (aa_stderr, PROGNAME"159 Unknown argument: '%s'.\n", argptr); oops = TRUE; break; } /* end switch */ } /* end parse of cmd line args */ /* Test how we broke loop. * There should still be 2 args past the ones * beginning with a dash: dbname and newpath. */ if (argc != 2) { if (argc <= 0) fputs (PROGNAME"210 Missing required dbname argument.\n", aa_stderr); if (argc <= 1) fputs (PROGNAME"211 Missing required newpath argument.\n", aa_stderr); if (argc > 2) fputs (PROGNAME"212 Too many arguments.\n", aa_stderr); oops = TRUE; } if (oops) end_of_job (2, SHOW_USAGE); /* DBNAME */ arg_dbname = argv[0]; if (strlen(arg_dbname) > 8) { fprintf (aa_stderr, PROGNAME"229 Invalid database name '%s'.\n", arg_dbname); end_of_job (2, SHOW_USAGE); } /* NEWPATH: * Oldpath and newpath are validated when the files * are copied and the database is opened. */ arg_newpath = argv[1]; return; } /* user_args_processor() */ /************************************************/ /* */ /* validation_error() */ /* */ /************************************************/ /* Subroutine of validation_mode in main(). * Prints d2x and d99 data at location of error. * Adjusts d2x counts for number of good addrs and free slots. */ static void validation_error (DB_ADDR dbaorig) { DB_ADDR slot; is_valid_dba = FALSE; slot = dbaorig >> 8; /* now retranslate back to real dba */ if (dbaorig != -1) slot = ((slot + 1) * recslots - dba_offset) | (OR_D00 << 24); fprintf (aa_stderr, " DBA = %d:%ld (x%02x:%06lx), orig addr val = x%08lx\n" " Word='%c%s' offset=%ld addrs=%ld free=%d\n", OR_D00, slot, OR_D00, slot, dbaorig, (!isgraph(d23old.or_hwordkey[0]))? '^' : d23old.or_hwordkey[0], d23old.or_hwordkey+1, d23old.or_hwoffset, d23old.or_hwaddrs, d23old.or_hwfree); if (--d23new.or_hwaddrs < 0L) d23new.or_hwaddrs = 0L; /* (should never occur) */ d23new.or_hwfree++; return; } /* validation_error() */ /************************************************/ /* */ /* open_all_files */ /* */ /************************************************/ static void open_all_files (FILE **fp, char *fname, char *mode, size_t *size, int *oops) { struct stat fstatbuf; if ((*fp = fopen (fname, mode)) == NULL) { fprintf (aa_stderr, PROGNAME"439 Can't open %s: %s\n", fname, strerror(errno)); *oops = TRUE; return; } if (fstat (fileno(*fp), &fstatbuf) == -1) { fprintf (aa_stderr, PROGNAME"440 Can't access status of %s: %s\n", fname, strerror(errno)); *oops = TRUE; return; } if (size) if ((*size = fstatbuf.st_size) <= 0L) { fprintf (aa_stderr, PROGNAME"499 %s is empty.\n", fname); *oops = TRUE; } return; } /* open_all_files() */ /************************************************/ /* */ /* copy_old_d2x_to_new */ /* */ /************************************************/ static void copy_old_d2x_to_new (char *fname_old, char *fname_new, FILE *fp_old, FILE *fp_new) { char readbuf [READBUFSIZE]; int i, j; fprintf (aa_stderr, "%s: Copying from old d2x files to %s...\n", aa_argv0, fname_new ); for (;;) /* loop ends when eof set on input stream */ { errno = 0; i = fread (readbuf, 1, sizeof(readbuf), fp_old); if (errno) { fprintf (aa_stderr, PROGNAME"517 Read error on %s: %s.\n", fname_old, strerror(errno)); end_of_job (3, SHOW_EXITCODE); } j = fwrite (readbuf, 1, i, fp_new); if (i != j) { fprintf (aa_stderr, PROGNAME"489 Write error on %s: %s.\n", fname_new, strerror(errno)); end_of_job (3, SHOW_EXITCODE); } if (shutdown_now) end_of_job (shutdown_now, SHOW_EXITCODE); if (feof(fp_old)) break; } TERMINATE_LINE(); fclose (fp_old); fclose (fp_new); return; } /* copy_old_d2x_to_new() */ /********************************/ /* */ /* read_d2x */ /* */ /********************************/ /* Performs vista RECREAD on curr word record. * CALLER SHOULD CHECK DB_STATUS. */ void read_d2x (struct or_hwordrec *glob_word, long field) { if (field == OR_SWORDKEY) { RECREAD (PROGNAME"61", &d21old, 0); if (db_status != S_OKAY) return; strncpy (glob_word->or_hwordkey, d21old.or_swordkey, DtSrMAXWIDTH_HWORD); glob_word->or_hwordkey [DtSrMAXWIDTH_HWORD-1] = 0; glob_word->or_hwoffset = d21old.or_swoffset; glob_word->or_hwfree = d21old.or_swfree; glob_word->or_hwaddrs = d21old.or_swaddrs; } else if (field == OR_LWORDKEY) { RECREAD (PROGNAME"69", &d22old, 0); if (db_status != S_OKAY) return; strncpy (glob_word->or_hwordkey, d22old.or_lwordkey, DtSrMAXWIDTH_HWORD); glob_word->or_hwordkey [DtSrMAXWIDTH_HWORD-1] = 0; glob_word->or_hwoffset = d22old.or_lwoffset; glob_word->or_hwfree = d22old.or_lwfree; glob_word->or_hwaddrs = d22old.or_lwaddrs; } else { RECREAD (PROGNAME"78", glob_word, 0); glob_word->or_hwordkey [DtSrMAXWIDTH_HWORD-1] = 0; } return; } /* read_d2x() */ /********************************/ /* */ /* write_d2x */ /* */ /********************************/ /* performs vista RECWRITE on curr word record. * CALLER MUST CHECK DB_STATUS. */ static void write_d2x (struct or_hwordrec *glob_word, long field) { if (field == OR_SWORDKEY) { strcpy (d21new.or_swordkey, glob_word->or_hwordkey); d21new.or_swoffset = glob_word->or_hwoffset; d21new.or_swfree = glob_word->or_hwfree; d21new.or_swaddrs = glob_word->or_hwaddrs; RECWRITE (PROGNAME"102", &d21new, 0); } else if (field == OR_LWORDKEY) { strcpy (d22new.or_lwordkey, glob_word->or_hwordkey); d22new.or_lwoffset = glob_word->or_hwoffset; d22new.or_lwfree = glob_word->or_hwfree; d22new.or_lwaddrs = glob_word->or_hwaddrs; RECWRITE (PROGNAME"111", &d22new, 0); } else { RECWRITE (PROGNAME"115", glob_word, 0); } return; } /* write_d2x() */ /************************************************/ /* */ /* copy_new_d99() */ /* */ /************************************************/ /* The garbage collection/compression process itself. * For very large databases, there will be appx 3 million word records, * so the loop should be coded for ***EFFICIENCY***. */ /* fool the read_wordstr() function by using a phony word * whose length tells the function which read to do. * This code really sucks but I don't have time to fix it. */ static void copy_new_d99 (long keyfield) { int is_odd_nibble; long num_holes; long slots_left; long good_addrs_this_block; unsigned char *bvptr; int a; DB_ADDR dba, dbaorig; long x; int done; long good_addrs_left; int num_reads, num_writes; DB_ADDR word_addrs [MAX_REC_READ + 64]; /* d99 read buf */ DB_ADDR word_addrs_out [MAX_REC_READ + 64]; /* d99 write buf */ KEYFRST (PROGNAME"179", keyfield, 0); while (db_status == S_OKAY) { /********RECREAD (PROGNAME"182", &d02new, 0);********/ read_d2x (&d23new, keyfield); if (validation_mode) /* save for validation err msgs */ memcpy (&d23old, &d23new, sizeof(d23old)); /**************** @@@@@ START HERE NEXT @@@@@ USE OR_D2x instead of keyfiled OR_SWROD etc so we can @@@@@ correctly call validation_error() ****************/ /* Read old d99 file at specified offset to get total num "holes". * In the first portion of record holes are filled with * representations of valid database addresses + statistical weights. * In the second portion the holes are "free slots" for future * expansion which are conventionally initialized with a -1. */ /* force number of free slots to 0(ZERO) */ d23new.or_hwfree = 0L; fseek (fp_d99_old, d23new.or_hwoffset, SEEK_SET); num_holes = d23new.or_hwaddrs + (long )d23new.or_hwfree; good_addrs_left = d23new.or_hwaddrs; bytes_in += sizeof(DB_ADDR) * num_holes; /* Update the offset in the d2x record buffer */ d23new.or_hwoffset = ftell (fp_d99_new); /* Copy the array of holes in each disk block, * reading the old and writing to the new. Loop ends * when the number of holes left will fit into one last block. */ done = FALSE; while (!done) /* loop on each block in this word */ { if (num_holes > MAX_REC_READ) { num_reads = MAX_REC_READ; num_holes -= MAX_REC_READ; } else { done = TRUE; num_reads = num_holes; } errno = 0; fread (word_addrs, sizeof(DB_ADDR), num_reads, fp_d99_old); if (errno) { TERMINATE_LINE(); fprintf (aa_stderr, PROGNAME"657 Read error on %s: %s.\n", fname_d99_old, strerror(errno)); end_of_job (4, SHOW_PROGRESS + SHOW_EXITCODE); } /* Addrs on d99 are now 'record numbers' not dbas. * A rec# is what the dba/slot# would be if records * took up just one slot and there were no dbrec at * start of file. D99 rec#s start at #1, not #0. */ /* If user requested validation_mode, validate each 'good' rec# * (not free slots) in word_addrs buffer. If any d99 links * are corrupt, skip them when copying to the new d99 file. * Rewrite -1's to all free slots. * ----> NOTE UNUSUAL FORMAT OF DBA HOLES IN D99! <---- * Record number is shifted to the high order 3 bytes. * The statistical weight is in the low order byte. * The vista file number is known from the #define constant OR_D00, * and the vista dba/slot# is mapped from rec# by mult/div * number of slots per rec, plus/minus dbrec offset. */ if (validation_mode) { /* set x to number of good addrs in this block */ if (good_addrs_left > num_reads) { x = num_reads; good_addrs_left -= num_reads; } else { x = good_addrs_left; good_addrs_left = 0; } /* Validate the rec#'s in this block. Note that the loop * is skipped if the entire block is free slots. */ good_addrs_this_block = 0; for (a=0; a> 8; /* 0,rec#,rec#,rec# */ is_valid_dba = TRUE; /* default */ /* If original rec# == -1 we've overrun the good * rec#'s into the expansion area, which is filled * with -1's. This is real bad news because if * the counts in d02 are bad, the online programs * will quickly crash, and we can't continue this program. * Advance to next rec# because we can't mark the bit vector. */ if (dbaorig == -1L) { TERMINATE_LINE(); fprintf (aa_stderr, "*** "PROGNAME "111 DBA in d99 = -1. " "Probable overrun into expansion area\n" " due to incorrect count values in d2x file.\n"); validation_error (dbaorig); corruption_count++; if (max_corruption > 0L && corruption_count >= max_corruption) end_of_job (91, SHOW_PROGRESS + SHOW_EXITCODE); continue; /* skip the bit vector check */ } /* If slot number > max totrecs, we have a * corrupted d99-d00 link because we've * already validated the d00 file and we know that * it has no slots > max. Also we have to advance * to next slot because we can't mark the bit vector. */ /****** if (dba >= max_totrecs)*******/ if (dba >= total_num_addrs) { TERMINATE_LINE(); fprintf (aa_stderr, "*** "PROGNAME"222 " "DBA in d99 not in d00, slot > max num docs.\n"); validation_error (dbaorig); corruption_count++; if (max_corruption > 0L && corruption_count >= max_corruption) end_of_job (92, SHOW_PROGRESS + SHOW_EXITCODE); continue; /* skip the bit vector check */ } /* Verify that dba exists in d00 file (test bit #1). * If not, mark bit #3 (3rd lowest) in nibble and * print error msg unless bit #3 previously marked. */ bvptr = bit_vector + (dba >> 1); is_odd_nibble = (dba & 1L); if (!(*bvptr & ((is_odd_nibble)? 0x01 : 0x10))) /* bit #1 */ { if (!(*bvptr & ((is_odd_nibble)? 0x04 : 0x40)))/* bit #3 */ { *bvptr |= (is_odd_nibble)? 0x04 : 0x40; TERMINATE_LINE(); fprintf (aa_stderr, "*** "PROGNAME"333 " "DBA in d99 does not exist in d00.\n"); validation_error (dbaorig); corruption_count++; if (max_corruption > 0L && corruption_count >= max_corruption) end_of_job (93, SHOW_PROGRESS + SHOW_EXITCODE); } /* endif where corrupt link detected */ } /* Mark bit #2 in bit vector indicating a d99 reference. */ *bvptr |= (is_odd_nibble)? 0x02 : 0x20; /* bit #2 */ /* move good dba to curr output block, incr counter */ if (is_valid_dba) word_addrs_out [good_addrs_this_block++] = dbaorig; } /* end validation loop for each good dba in the block */ /* Write out only validated addrs in current block. * If this was the last block, fill out all the free slots, * if any, with -1 values, and exit the dba loop for this word. */ if (good_addrs_this_block > 0L) { num_writes = fwrite (word_addrs_out, sizeof(DB_ADDR), good_addrs_this_block, fp_d99_new); if (num_writes != good_addrs_this_block) goto WRITE_ERROR; } if (good_addrs_left <= 0L) { /* Write blocks of -1s until new d2x free slot count * is exhausted. The last block may be < MAX_REC_READ. */ slots_left = d23new.or_hwfree; while (slots_left > 0L) { /* set x to number of -1's to write for this block */ if (slots_left > MAX_REC_READ) { x = MAX_REC_READ; slots_left -= MAX_REC_READ; } else { x = slots_left; slots_left = 0L; } for (a=0; a DOTS_PER_MSG) { dot_count = 0; print_progress ("Progress"); } else { fputc ('.', aa_stderr); need_linefeed = TRUE; if (!(dot_count % 10L)) fputc (' ', aa_stderr); } fflush (aa_stderr); } /* end of print-a-dot */ if (shutdown_now) end_of_job (shutdown_now, SHOW_PROGRESS + SHOW_EXITCODE); KEYNEXT (PROGNAME"196", keyfield, 0); } /* end of main loop on each word in database */ return; } /* copy_new_d99() */ /************************************************/ /* */ /* main() */ /* */ /************************************************/ int main (int argc, char *argv[]) { FILE_HEADER fl_hdr; int a, i, j; unsigned char *bvptr; DB_ADDR dba, dba1, dbaorig; char dbfpath [1024]; char fname_d21_new [1024]; char fname_d21_old [1024]; char fname_d22_new [1024]; char fname_d22_old [1024]; char fname_d23_new [1024]; char fname_d23_old [1024]; FILE *fp_d21_new = NULL; FILE *fp_d21_old = NULL; FILE *fp_d22_new = NULL; FILE *fp_d22_old = NULL; FILE *fp_d23_new = NULL; FILE *fp_d23_old = NULL; char full_dbname_old [1024]; char full_dbname_new [1024]; long max_bitvec = 0L; int oops; char *ptr; char readbuf [READBUFSIZE]; unsigned long reads_per_dot; char recidbuf [DtSrMAX_DB_KEYSIZE + 4]; time_t starttime; long x; struct or_dbrec dbrec; struct tm *time_ptr; _Xltimeparams localtime_buf; if (beta > 1) sprintf (betabuf, "%c", beta); else betabuf[0] = 0; aa_argv0 = argv[0]; time (&starttime); time_ptr = _XLocaltime(&starttime, localtime_buf); strftime (dbfpath, sizeof (dbfpath), /* just use any ol' buffer */ CATGETS(dtsearch_catd, MS_misc, 22, "%A, %b %d %Y, %I:%M %p"), time_ptr); printf (CATGETS(dtsearch_catd, MS_misc, 23, "%s: Version %s%s. Run %s.\n"), aa_argv0, AUSAPI_VERSION, betabuf, dbfpath); signal (SIGHUP, signal_shutdown); signal (SIGINT, signal_shutdown); signal (SIGQUIT, signal_shutdown); signal (SIGTRAP, signal_shutdown); signal (SIGABRT, signal_shutdown); /* ordinarily this causes core dump */ signal (SIGKILL, signal_shutdown); /* this cannot be trapped */ signal (SIGALRM, signal_shutdown); signal (SIGTERM, signal_shutdown); signal (SIGPWR, signal_shutdown); signal (SIGUSR1, signal_shutdown); /* ordinarily this "pings" OE */ #ifdef _AIX signal (SIGXCPU, signal_shutdown); signal (SIGDANGER, signal_shutdown); #endif user_args_processor (argc, argv); /* In order to find old files, we have to check if * DBFPATH environment variable has been set. * Load the fully constructed DBFPATH-dbname into its own buffer. */ full_dbname_old[0] = '\0'; dbfpath[0] = 0; if ((ptr = getenv ("DBFPATH")) != NULL) { if (*ptr == 0) fprintf (aa_stderr, "%s: Ignoring empty DBFPATH environment variable.\n", aa_argv0); else { fprintf (aa_stderr, "%s: Using DBFPATH = '%s'.\n", aa_argv0, ptr); strcpy (full_dbname_old, ptr); /* Ensure that DBFPATH ends in a slash. */ ptr = strchr (full_dbname_old, '\0'); if (*(ptr-1) != LOCAL_SLASH) { *ptr++ = LOCAL_SLASH; *ptr = '\0'; } strcpy (dbfpath, full_dbname_old); } } /* Currently full_dbname_old contains just the path. * Similarly, build just path name for the 2 new files * using full_dbname_new as a buffer. * Verify they don't both refer to the same directory. */ strcpy (full_dbname_new, arg_newpath); ptr = strchr (full_dbname_new, '\0'); if (*(ptr-1) != LOCAL_SLASH) { *ptr++ = LOCAL_SLASH; *ptr = '\0'; } if (strcmp (full_dbname_old, full_dbname_new) == 0) { fprintf (aa_stderr, PROGNAME"393 Old and new directories are identical: '%s'\n.", full_dbname_old); end_of_job (2, SHOW_USAGE); } /* Complete full_dbname_old by appending dbname to the path prefix. * Then build full path/file names for all 4 files. */ strcat (full_dbname_old, arg_dbname); strcat (full_dbname_new, arg_dbname); fprintf (aa_stderr, "%s: Old files: '%s.d2x, .d99'.\n", aa_argv0, full_dbname_old); fprintf (aa_stderr, "%s: New files: '%s.d2x, .d99'.\n", aa_argv0, full_dbname_new); strcat (fname_d99_old, full_dbname_old); strcat (fname_d99_old, ".d99"); strcpy (fname_d21_old, full_dbname_old); strcat (fname_d21_old, ".d21"); strcpy (fname_d22_old, full_dbname_old); strcat (fname_d22_old, ".d22"); strcpy (fname_d23_old, full_dbname_old); strcat (fname_d23_old, ".d23"); strcat (fname_d99_new, full_dbname_new); strcat (fname_d99_new, ".d99"); strcpy (fname_d21_new, full_dbname_new); strcat (fname_d21_new, ".d21"); strcpy (fname_d22_new, full_dbname_new); strcat (fname_d22_new, ".d22"); strcpy (fname_d23_new, full_dbname_new); strcat (fname_d23_new, ".d23"); /* If the user hasn't already authorized overwriting preexisting files, * check new directory and if new files already exist, * ask permission to overwrite. */ if (!overlay_yes) { oops = FALSE; /* TRUE forces a user prompt */ if ((fp_d99_new = fopen(fname_d99_new, "r")) != NULL) { fclose (fp_d99_new); oops = TRUE; } if ((fp_d21_new = fopen(fname_d21_new, "r")) != NULL) { fclose (fp_d21_new); oops = TRUE; } if ((fp_d22_new = fopen(fname_d22_new, "r")) != NULL) { fclose (fp_d22_new); oops = TRUE; } if ((fp_d23_new = fopen(fname_d23_new, "r")) != NULL) { fclose (fp_d23_new); oops = TRUE; } if (oops) { fprintf (aa_stderr, "%s: One or more new files already exist.\n", aa_argv0); if (overlay_no) { fputs (PROGNAME"463 " "Command line argument disallows file overlay.\n", aa_stderr); end_of_job (2, SHOW_EXITCODE); } fputs (" Is it ok to overlay files in new directory? [y/n] ", aa_stderr); *readbuf = '\0'; fgets (readbuf, sizeof(readbuf), stdin); if (strlen(readbuf) && readbuf[strlen(readbuf)-1] == '\n') readbuf[strlen(readbuf)-1] = '\0'; if (tolower (*readbuf) != 'y') end_of_job (2, SHOW_NOTHING); } } /* end of check for overlaying new files */ /* Open all files. The d2x's are opened so that the old ones * can be copied into the new directory before starting * the garbage collection process proper. * The d99's are opened now just to verify permissions. */ oops = FALSE; /* TRUE ends job, but only after trying all 4 files */ open_all_files (&fp_d21_old, fname_d21_old, "rb", &size_d21_old, &oops); open_all_files (&fp_d22_old, fname_d22_old, "rb", &size_d22_old, &oops); open_all_files (&fp_d23_old, fname_d23_old, "rb", &size_d23_old, &oops); open_all_files (&fp_d99_old, fname_d99_old, "rb", &size_d99_old, &oops); open_all_files (&fp_d21_new, fname_d21_new, "wb", NULL, &oops); open_all_files (&fp_d22_new, fname_d22_new, "wb", NULL, &oops); open_all_files (&fp_d23_new, fname_d23_new, "wb", NULL, &oops); open_all_files (&fp_d99_new, fname_d99_new, "wb", NULL, &oops); if (shutdown_now) end_of_job (shutdown_now, SHOW_EXITCODE); if (oops) end_of_job (2, SHOW_EXITCODE); /* Copy old d2x files to new directory. * Database will open using new files so only they will be changed. */ copy_old_d2x_to_new (fname_d21_old, fname_d21_new, fp_d21_old, fp_d21_new); copy_old_d2x_to_new (fname_d22_old, fname_d22_new, fp_d22_old, fp_d22_new); copy_old_d2x_to_new (fname_d23_old, fname_d23_new, fp_d23_old, fp_d23_new); /* Open database, but use new d02 file for updates. */ if (!austext_dopen (arg_dbname, (dbfpath[0] == 0)? NULL : dbfpath, arg_newpath, 0, &dbrec)) { puts (DtSearchGetMessages()); end_of_job (3, SHOW_EXITCODE); } /* Use the number of records in the dbrec, unless * user specified a larger number on command line. * Get other constants from dbrec. */ /******************** if (max_totrecs == 0) { max_totrecs = dbrec.or_reccount + 24; fprintf (aa_stderr, "%s: %ld records in database. Using this as default for -m.\n", aa_argv0, dbrec.or_reccount); } **********************/ /* this is where real dba was changed to record number (still called dba) */ RECFRST (PROGNAME"1067", OR_OBJREC, 0); CRGET (PROGNAME"1068", &dba, 0); /* dba of first real obj record */ recslots = dbrec.or_recslots; /* vista slots per obj record */ dba_offset = recslots - (dba & 0xffffff); /* accounts for dbrec */ /* total_num_addrs = what reccount would be if * all holes were filled with good records. */ total_num_addrs = (dbrec.or_maxdba - (dba & 0xffffff) + 1) / recslots + 1; fprintf (aa_stderr, "%s: curr reccnt=%ld, mxdba=%ld, sl/rec=%ld, tot#adr=%ld.\n", aa_argv0, dbrec.or_reccount, dbrec.or_maxdba, dbrec.or_recslots, total_num_addrs); /* Initialize validation_mode (checkd99) */ if (validation_mode) { /* Allocate and initialize a bit vector: * 4 bits for every possible d00 database address. */ max_bitvec = (total_num_addrs >> 1) + 2; if ((bit_vector = malloc (max_bitvec + 64L)) == NULL) { fprintf (aa_stderr, PROGNAME"465 WARNING: " "Can't allocate memory for bit vector. " "'Validate' mode switched off.\n"); validation_mode = FALSE; normal_exitcode = 1; /* warning */ goto EXIT_INIT_VALIDATION; } memset (bit_vector, 0, max_bitvec); /* Read every d00 rec sequentially. 1 in bit #1 (lowest order) * in bit vector means record (dba) exists in d00 file. * While we're at it, count the total number of records. */ x = dbrec.or_reccount / 50L + 1; /* x = recs per dot */ fprintf (aa_stderr, "%s: Reading d00 file. Each dot appx %ld database documents...\n", aa_argv0, x); reccount = 0L; dot_count = 0L; RECFRST (PROGNAME"534", OR_OBJREC, 0); while (db_status == S_OKAY) { CRREAD (PROGNAME"617", OR_OBJKEY, recidbuf, 0); /* print periodic progress dots */ if (!(++reccount % x)) { fputc ('.', aa_stderr); need_linefeed = TRUE; if (!(++dot_count % 10L)) fputc (' ', aa_stderr); fflush (aa_stderr); } /* Get dba and record number and confirm * it will not overflow bit vector. */ CRGET (PROGNAME"537", &dba, 0); dba &= 0x00ffffff; /* mask out file number in high order byte */ dba1 = (dba + dba_offset) / recslots; /* = "rec number", base 1 */ /***** if (dba1 >= max_totrecs)*****/ if (dba1 >= total_num_addrs) { TERMINATE_LINE(); fprintf (aa_stderr, PROGNAME"561 " "DBA '%d:%ld' (rec #%ld) in d00 exceeds total num addrs %ld;\n" " Bit vector overflow because maxdba %ld" " in dbrec is incorrect.\n", /****** OR_D00, dba, dba1, max_totrecs);******/ OR_D00, dba, dba1, total_num_addrs, dbrec.or_maxdba); end_of_job (7, SHOW_EXITCODE); } if (shutdown_now) end_of_job (shutdown_now, SHOW_EXITCODE); /* Set bit #1 of even or odd nibble to indicate that * this record *number* actually exists in d00 file. */ bit_vector[dba1>>1] |= (dba1 & 1L)? 0x01 : 0x10; RECNEXT (PROGNAME"541", 0); } /* end of sequential read thru d00 file */ TERMINATE_LINE(); /* end the dots... */ /* confirm that RECCOUNT record holds the correct number */ if (dbrec.or_reccount == reccount) { fprintf (aa_stderr, "%s: Confirmed %ld DOCUMENTS in %s.d00.\n", aa_argv0, dbrec.or_reccount, arg_dbname); } else { fprintf (aa_stderr, "%s: %ld DOCUMENTS actually in %s.d00 not =" " %ld count stored there.\n" " Count will be corrected in new d00 file.\n", aa_argv0, reccount, arg_dbname, dbrec.or_reccount); dbrec.or_reccount = reccount; rewrite_reccount = TRUE; } EXIT_INIT_VALIDATION: ; } /* end of validation_mode initialization */ /* initialize main loop */ time (×tart); reccount = 0L; bytes_in = 0L; dot_count = DOTS_PER_MSG; /* force initial msg after first blk of recs */ TERMINATE_LINE(); fprintf (aa_stderr, "%s: Compressing into %s. Each dot appx %lu words...\n", aa_argv0, arg_newpath, recs_per_dot); /* write New Header Information to a new d99 file */ init_header(fp_d99_new, &fl_hdr); /* Sequentially read each word key file in big loop. * For each word, read the d99. * In validation mode check the dbas. * If not validating, just blindly rewrite the old d99 to the new one. * If validating only write good dba's and mark the bit vector. */ copy_new_d99 (OR_SWORDKEY); copy_new_d99 (OR_LWORDKEY); copy_new_d99 (OR_HWORDKEY); if (reccount == 0L) end_of_job (50, SHOW_PROGRESS + SHOW_EXITCODE); else print_progress ("Final"); /* If validation_mode requested, traverse bit vector and print out * table of each d00 record which cannot be accessed from any d99 word. * If a validation file name was provided, write out a line for each * bad reecord in alebeniz-compatible format. */ if (validation_mode) { for (x=0L, bvptr = bit_vector; x 0L && corruption_count >= max_corruption) end_of_job (94, SHOW_EXITCODE); } /* endif where d00 is not referenced by d99 */ } /* end forloop: every 2 bits in a bitvector byte */ } /* end forloop: every byte in bitvector */ } /* Normal_exitcode currently will contain either a 0 or a 1. * If we were uncorrupting the d99 and found any corrupt links, * make sure it's 1 (warning). If there were corrupt links and * we weren't trying to uncorrupt it, change it to a hard error. */ /***@@@@ by the way, corruption_count can be > 0 only if in validation_mode */ if (corruption_count > 0L) { if (validation_mode) normal_exitcode = 1; else normal_exitcode = 90; } end_of_job (normal_exitcode, SHOW_EXITCODE); } /* main() */ /*************************** MRCLEAN.C ****************************/