Initial import of the CDE 2.1.30 sources from the Open Group.
This commit is contained in:
229
cde/lib/DtSearch/bmstrstr.c
Normal file
229
cde/lib/DtSearch/bmstrstr.c
Normal file
@@ -0,0 +1,229 @@
|
||||
/*
|
||||
* COMPONENT_NAME: austext
|
||||
*
|
||||
* FUNCTIONS: bmhcore
|
||||
* bmhtable_build
|
||||
* bmstrstr
|
||||
* main
|
||||
*
|
||||
* ORIGINS: 27
|
||||
*
|
||||
*
|
||||
* (C) COPYRIGHT International Business Machines Corp. 1992,1995
|
||||
* All Rights Reserved
|
||||
* Licensed Materials - Property of IBM
|
||||
* US Government Users Restricted Rights - Use, duplication or
|
||||
* disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
|
||||
*/
|
||||
/************************* BMSTRSTR.C ****************************
|
||||
* $TOG: bmstrstr.c /main/6 1998/04/17 11:25:23 mgreess $
|
||||
* Original module named fastsearch.c
|
||||
* and included colocation string search functions.
|
||||
* Modification of Boyer-Moore-Horspool algorithm,
|
||||
* Sec 10.5.2 of Information Retrieval, Frakes and Baeza-Yates, editors.
|
||||
* Provides a generalized boyer-moore
|
||||
* strstr() function. The table used in the BMH algorithm is
|
||||
* generated in a separate function to improve efficiency when
|
||||
* looking for the same substring pattern in multiple text strings.
|
||||
* The 'length' arguments can be passed if known, or passed as
|
||||
* strlen(xxx) if not known. HOWEVER the string arrays MUST be at
|
||||
* least 1 char larger then strlen() says ('cause we insert a \0).
|
||||
* This whole thing has been coded for SPEED!
|
||||
*
|
||||
* $Log$
|
||||
* Revision 2.2 1995/10/26 15:37:42 miker
|
||||
* Added prolog.
|
||||
*
|
||||
* Revision 2.1 1995/09/22 19:10:39 miker
|
||||
* Freeze DtSearch 0.1, AusText 2.1.8
|
||||
*
|
||||
* Revision 1.2 1995/08/31 22:12:05 miker
|
||||
* Minor changes for DtSearch.
|
||||
*/
|
||||
#include "SearchP.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/param.h>
|
||||
|
||||
#ifndef _AIX
|
||||
#define __strcmp strcmp
|
||||
#endif
|
||||
|
||||
/*********#define TEST_BMSTRSTR**************/
|
||||
|
||||
/****************************************/
|
||||
/* */
|
||||
/* bmhtable_build */
|
||||
/* */
|
||||
/****************************************/
|
||||
/* Builds the table for search substring 'pattern'
|
||||
* used by the BMH core search algorithm.
|
||||
* 'patlen' is the string length of 'pattern'.
|
||||
* The caller defines and passes 'bmhtable',
|
||||
* an array of long integers (size_t) of size MAX_BMHTAB.
|
||||
* This function initializes bmhtable for later search call.
|
||||
*/
|
||||
void bmhtable_build (
|
||||
unsigned char *pattern,
|
||||
size_t patlen,
|
||||
size_t * bmhtable)
|
||||
{
|
||||
int k;
|
||||
|
||||
for (k = 0; k < MAX_BMHTAB; k++)
|
||||
bmhtable[k] = patlen;
|
||||
patlen--;
|
||||
for (k = 0; k < patlen; k++)
|
||||
bmhtable[pattern[k]] = (patlen - k);
|
||||
return;
|
||||
} /* bmhtable_build() */
|
||||
|
||||
|
||||
/****************************************/
|
||||
/* */
|
||||
/* bmhcore */
|
||||
/* */
|
||||
/****************************************/
|
||||
/* Performs 'core' BMH search after bmhtable is built.
|
||||
* Returns ptr to first occurrence of pattern in text or NULL.
|
||||
* WARNING! IF EITHER txtlen OR patlen <= 0, THIS FUNCTION WILL CRASH!!!
|
||||
* Pattern and patlen MUST BE identical with those used to build
|
||||
* bmhtable.
|
||||
*/
|
||||
|
||||
char *bmhcore (
|
||||
unsigned char *text,
|
||||
size_t txtlen,
|
||||
unsigned char *pattern,
|
||||
size_t patlen,
|
||||
size_t *bmhtable)
|
||||
{
|
||||
register unsigned char
|
||||
lastchar = pattern[patlen - 1];
|
||||
register unsigned char
|
||||
textchar;
|
||||
register unsigned char
|
||||
*cp;
|
||||
register unsigned char
|
||||
*last;
|
||||
int savechar;
|
||||
int savechar2;
|
||||
unsigned char
|
||||
*result = NULL;
|
||||
|
||||
/* Terminate pattern with a char we KNOW is not in text.
|
||||
* Note that this requires string to have room for \0 at end.
|
||||
*/
|
||||
savechar = pattern[patlen];
|
||||
pattern[patlen] = '\0';
|
||||
|
||||
last = text + txtlen;
|
||||
for (cp = text + patlen - 1; cp < last; cp += bmhtable[textchar]) {
|
||||
/*
|
||||
* Check if last character matches. If it doesn't, no need
|
||||
* to check any further.
|
||||
*/
|
||||
if ((textchar = *cp) != lastchar)
|
||||
continue;
|
||||
savechar2 = cp[1];
|
||||
cp[1] = '\0';
|
||||
if (!__strcmp ((char *) (cp + 1 - patlen), (char *) pattern))
|
||||
result = cp + 1 - patlen;
|
||||
cp[1] = savechar2;
|
||||
if (result)
|
||||
break;
|
||||
}
|
||||
pattern[patlen] = savechar; /* restore last char */
|
||||
return (char *) result;
|
||||
} /* bmhcore() */
|
||||
|
||||
|
||||
/****************************************/
|
||||
/* */
|
||||
/* bmstrstr */
|
||||
/* */
|
||||
/****************************************/
|
||||
/* Search in text [1..txtlen] for pattern [1..patlen].
|
||||
* Returns ptr to first occurrence of pattern, or NULL.
|
||||
*/
|
||||
char *bmstrstr (
|
||||
unsigned char *text,
|
||||
size_t txtlen,
|
||||
unsigned char *pattern,
|
||||
size_t patlen)
|
||||
{
|
||||
size_t bmhtable[MAX_BMHTAB];
|
||||
|
||||
bmhtable_build (pattern, patlen, bmhtable);
|
||||
return bmhcore (text, txtlen, pattern, patlen, bmhtable);
|
||||
} /* bmstrstr() */
|
||||
|
||||
|
||||
#ifdef TEST_BMSTRSTR /* for test only */
|
||||
#include <sys/stat.h>
|
||||
/****************************************/
|
||||
/* */
|
||||
/* main */
|
||||
/* */
|
||||
/****************************************/
|
||||
/* tests bmstrstr() against standard strstr() on a specified file */
|
||||
main ()
|
||||
{
|
||||
FILE *f;
|
||||
struct stat statbuf;
|
||||
size_t fsize = 0L;
|
||||
char fname[BUFSIZ+1];
|
||||
char pattern[MAXPATHLEN+1];
|
||||
char *readbuf = NULL;
|
||||
char *ptr;
|
||||
|
||||
MAIN_LOOP:
|
||||
printf ("\nEnter a filename (Ctrl-C quits) > ");
|
||||
|
||||
*fname = '\0';
|
||||
fgets (fname, sizeof(fname), stdin);
|
||||
if (strlen(fname) && fname[strlen(fname)-1] == '\n')
|
||||
fname[strlen(fname)-1] = '\0';
|
||||
|
||||
if ((f = fopen (fname, "r")) == NULL) {
|
||||
printf ("Can't open '%s': %s\n", fname, strerror (errno));
|
||||
goto MAIN_LOOP;
|
||||
}
|
||||
|
||||
fstat (fileno (f), &statbuf);
|
||||
if (fsize > statbuf.st_size) {
|
||||
free (readbuf);
|
||||
readbuf = NULL;
|
||||
}
|
||||
fsize = statbuf.st_size;
|
||||
if (readbuf == NULL)
|
||||
readbuf = malloc (fsize + 64L);
|
||||
|
||||
fread (readbuf, fsize, 1L, f);
|
||||
fclose (f);
|
||||
|
||||
printf ("Enter a search pattern > ");
|
||||
|
||||
*pattern = '\0';
|
||||
fgets (pattern, sizeof(pattern), stdin);
|
||||
if (strlen(pattern) && pattern[strlen(pattern)-1] == '\n')
|
||||
pattern[strlen(pattern)-1] = '\0';
|
||||
|
||||
ptr = bmstrstr (readbuf, fsize, pattern, strlen (pattern));
|
||||
if (ptr == NULL)
|
||||
puts ("bmstrstr: Pattern not found.");
|
||||
else
|
||||
printf ("bmstrstr: Pattern found at offset %ld.\n", ptr - readbuf);
|
||||
|
||||
ptr = strstr (readbuf, pattern);
|
||||
if (ptr == NULL)
|
||||
puts ("strstr: Pattern not found.");
|
||||
else
|
||||
printf ("strstr: Pattern found at offset %ld.\n", ptr - readbuf);
|
||||
|
||||
goto MAIN_LOOP;
|
||||
} /* main() test program */
|
||||
|
||||
#endif
|
||||
|
||||
/************************* BMSTRSTR.C ****************************/
|
||||
Reference in New Issue
Block a user