/* * CDE - Common Desktop Environment * * Copyright (c) 1993-2012, The Open Group. All rights reserved. * * These libraries and programs are free software; you can * redistribute them and/or modify them under the terms of the GNU * Lesser General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) * any later version. * * These libraries and programs are distributed in the hope that * they will be useful, but WITHOUT ANY WARRANTY; without even the * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU Lesser General Public License for more * details. * * You should have received a copy of the GNU Lesser General Public * License along with these libraries and programs; if not, write * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth * Floor, Boston, MA 02110-1301 USA */ /* * COMPONENT_NAME: austext * * FUNCTIONS: bmhcore * bmhtable_build * bmstrstr * main * * ORIGINS: 27 * * * (C) COPYRIGHT International Business Machines Corp. 1992,1995 * All Rights Reserved * Licensed Materials - Property of IBM * US Government Users Restricted Rights - Use, duplication or * disclosure restricted by GSA ADP Schedule Contract with IBM Corp. */ /************************* BMSTRSTR.C **************************** * $TOG: bmstrstr.c /main/6 1998/04/17 11:25:23 mgreess $ * Original module named fastsearch.c * and included colocation string search functions. * Modification of Boyer-Moore-Horspool algorithm, * Sec 10.5.2 of Information Retrieval, Frakes and Baeza-Yates, editors. * Provides a generalized boyer-moore * strstr() function. The table used in the BMH algorithm is * generated in a separate function to improve efficiency when * looking for the same substring pattern in multiple text strings. * The 'length' arguments can be passed if known, or passed as * strlen(xxx) if not known. HOWEVER the string arrays MUST be at * least 1 char larger then strlen() says ('cause we insert a \0). * This whole thing has been coded for SPEED! * * $Log$ * Revision 2.2 1995/10/26 15:37:42 miker * Added prolog. * * Revision 2.1 1995/09/22 19:10:39 miker * Freeze DtSearch 0.1, AusText 2.1.8 * * Revision 1.2 1995/08/31 22:12:05 miker * Minor changes for DtSearch. */ #include "SearchP.h" #include #include #include #ifndef _AIX #define __strcmp strcmp #endif /*********#define TEST_BMSTRSTR**************/ /****************************************/ /* */ /* bmhtable_build */ /* */ /****************************************/ /* Builds the table for search substring 'pattern' * used by the BMH core search algorithm. * 'patlen' is the string length of 'pattern'. * The caller defines and passes 'bmhtable', * an array of long integers (size_t) of size MAX_BMHTAB. * This function initializes bmhtable for later search call. */ void bmhtable_build ( unsigned char *pattern, size_t patlen, size_t * bmhtable) { int k; for (k = 0; k < MAX_BMHTAB; k++) bmhtable[k] = patlen; patlen--; for (k = 0; k < patlen; k++) bmhtable[pattern[k]] = (patlen - k); return; } /* bmhtable_build() */ /****************************************/ /* */ /* bmhcore */ /* */ /****************************************/ /* Performs 'core' BMH search after bmhtable is built. * Returns ptr to first occurrence of pattern in text or NULL. * WARNING! IF EITHER txtlen OR patlen <= 0, THIS FUNCTION WILL CRASH!!! * Pattern and patlen MUST BE identical with those used to build * bmhtable. */ char *bmhcore ( unsigned char *text, size_t txtlen, unsigned char *pattern, size_t patlen, size_t *bmhtable) { unsigned char lastchar = pattern[patlen - 1]; unsigned char textchar; unsigned char *cp; unsigned char *last; int savechar; int savechar2; unsigned char *result = NULL; /* Terminate pattern with a char we KNOW is not in text. * Note that this requires string to have room for \0 at end. */ savechar = pattern[patlen]; pattern[patlen] = '\0'; last = text + txtlen; for (cp = text + patlen - 1; cp < last; cp += bmhtable[textchar]) { /* * Check if last character matches. If it doesn't, no need * to check any further. */ if ((textchar = *cp) != lastchar) continue; savechar2 = cp[1]; cp[1] = '\0'; if (!__strcmp ((char *) (cp + 1 - patlen), (char *) pattern)) result = cp + 1 - patlen; cp[1] = savechar2; if (result) break; } pattern[patlen] = savechar; /* restore last char */ return (char *) result; } /* bmhcore() */ /****************************************/ /* */ /* bmstrstr */ /* */ /****************************************/ /* Search in text [1..txtlen] for pattern [1..patlen]. * Returns ptr to first occurrence of pattern, or NULL. */ char *bmstrstr ( unsigned char *text, size_t txtlen, unsigned char *pattern, size_t patlen) { size_t bmhtable[MAX_BMHTAB]; bmhtable_build (pattern, patlen, bmhtable); return bmhcore (text, txtlen, pattern, patlen, bmhtable); } /* bmstrstr() */ #ifdef TEST_BMSTRSTR /* for test only */ #include /****************************************/ /* */ /* main */ /* */ /****************************************/ /* tests bmstrstr() against standard strstr() on a specified file */ main () { FILE *f; struct stat statbuf; size_t fsize = 0L; char fname[BUFSIZ+1]; char pattern[MAXPATHLEN+1]; char *readbuf = NULL; char *ptr; MAIN_LOOP: printf ("\nEnter a filename (Ctrl-C quits) > "); *fname = '\0'; fgets (fname, sizeof(fname), stdin); if (strlen(fname) && fname[strlen(fname)-1] == '\n') fname[strlen(fname)-1] = '\0'; if ((f = fopen (fname, "r")) == NULL) { printf ("Can't open '%s': %s\n", fname, strerror (errno)); goto MAIN_LOOP; } fstat (fileno (f), &statbuf); if (fsize > statbuf.st_size) { free (readbuf); readbuf = NULL; } fsize = statbuf.st_size; if (readbuf == NULL) readbuf = malloc (fsize + 64L); fread (readbuf, fsize, 1L, f); fclose (f); printf ("Enter a search pattern > "); *pattern = '\0'; fgets (pattern, sizeof(pattern), stdin); if (strlen(pattern) && pattern[strlen(pattern)-1] == '\n') pattern[strlen(pattern)-1] = '\0'; ptr = bmstrstr (readbuf, fsize, pattern, strlen (pattern)); if (ptr == NULL) puts ("bmstrstr: Pattern not found."); else printf ("bmstrstr: Pattern found at offset %ld.\n", ptr - readbuf); ptr = strstr (readbuf, pattern); if (ptr == NULL) puts ("strstr: Pattern not found."); else printf ("strstr: Pattern found at offset %ld.\n", ptr - readbuf); goto MAIN_LOOP; } /* main() test program */ #endif /************************* BMSTRSTR.C ****************************/