/* * CDE - Common Desktop Environment * * Copyright (c) 1993-2012, The Open Group. All rights reserved. * * These libraries and programs are free software; you can * redistribute them and/or modify them under the terms of the GNU * Lesser General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) * any later version. * * These libraries and programs are distributed in the hope that * they will be useful, but WITHOUT ANY WARRANTY; without even the * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU Lesser General Public License for more * details. * * You should have received a copy of the GNU Lesser General Public * License along with these libraries and programs; if not, write * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth * Floor, Boston, MA 02110-1301 USA */ /* $XConsortium: AusTextStorage.cc /main/5 1996/07/23 18:08:29 cde-hal $ * * (c) Copyright 1996 Digital Equipment Corporation. * (c) Copyright 1996 Hewlett-Packard Company. * (c) Copyright 1996 International Business Machines Corp. * (c) Copyright 1996 Sun Microsystems, Inc. * (c) Copyright 1996 Novell, Inc. * (c) Copyright 1996 FUJITSU LIMITED. * (c) Copyright 1996 Hitachi. */ #include #include #include #include #include #include #include using namespace std; /* imported interfaces */ #include #include "FlexBuffer.h" #include "Task.h" #include "DataBase.h" #include "DataRepository.h" #include "api/utility.h" /* exported interfaces */ #include "AusTextStorage.h" #ifdef DTSR_USE_CNTR_L const char CNTR_L = '\014'; /* This is for ascii system only */ #endif // If NodeParser ever gets setup to run on all bookcases at one time, we // will need a reset() function for this member. unsigned long AusTextStore::f_recordcount = 0; #ifdef DTSR_LIKES_FGETS const int LINE_SIZE = 80; /* this is the line size allowed for data in * the *.fzk file */ /* Most of the non-alphanumeric character in ascii code set */ const char *DELIMITER_SET = "\t\n !@#$%^&*()_-=+\\|~[]{};:,.<>/?"; enum EucCodeSet { CodeSetInv = -1, CodeSet0 = 0, CodeSet1 = 1, CodeSet2 = 2, CodeSet3 = 3 }; /* * charcspn determines if ch is found in the set * returns 1 if so, 0 if otherwise */ /* * @@ charset is expensive, alternative approach is to use a * static array * static char char_tab[] = { 0, 0, 0, 1,...... } * where 1 indicates the character is in the delimiter character set * however, this might not be portable for character set other than * ascii , so this has to be done carefully * If the format of the fzk is changed, all this will no longer be * required. So, I am not going to do anything at this point */ //----------------------------------------------------------------- static int charset ( const char ch, const char *set) { for ( const char *ptr = set; *ptr != '\0'; ptr++ ) { if ( ch == *ptr ) return 1; } return 0; } /* * getline returns the no. of bytes that should be read as a line. * Normally it should read line_size, but if there is a token that * spans 2 lines, getline need to determine the line size such that * at the end of the line, no token should be spanning the next line. */ /* * start_ptr is the start of the buffer and end_ptr is the end of the buffer * it is similar to fread except that end_ptr is supplied as the bounding * condition as opposed to the EOF in fread. Besides, no actual character * is read , only the number of characters that should be read as a line. */ //-------------------------------------------------------------------------- static unsigned int DefaultGetLine ( const char *start_ptr, const char *end_ptr, int line_size ) { if ( start_ptr > end_ptr ) { return 0; } if ( start_ptr + line_size - 1 <= end_ptr ) { // not @ the end yet /* * FIrst see if there is a token that spans multiple lines */ const char *ptr = start_ptr + line_size - 1; if ( ptr == end_ptr ) { return line_size; } if ( charset( *(ptr+1), DELIMITER_SET ) || charset ( *ptr, DELIMITER_SET ) ) { return ( line_size ); } /* That means found a token that spans 2 lines */ /* So now loop back until *ptr is not in DELIMITER_SET */ const char *new_end_ptr; for ( new_end_ptr = ptr; new_end_ptr > start_ptr && !charset( *new_end_ptr , DELIMITER_SET ); new_end_ptr-- ); return( new_end_ptr - start_ptr + 1 ); } else { // last chunk of line return ( end_ptr - start_ptr + 1 ); } } inline EucCodeSet JpEucCodeSet(const unsigned char* text) { EucCodeSet codeset; if (text == NULL) codeset = CodeSetInv; else if (*text < 0x80) codeset = CodeSet0; else if (*text == 0x8E) codeset = CodeSet2; else if (*text == 0x8F) codeset = CodeSet3; else { assert( *text > 0xA0 && *text < 0xFF); codeset = CodeSet1; } return codeset; } static unsigned int JpGetLine ( const char *start_ptr, const char *end_ptr, int line_size ) { if (start_ptr > end_ptr) return 0; if (end_ptr - start_ptr + 1 <= line_size) return (end_ptr - start_ptr + 1); // reference limit const char* limit = start_ptr + line_size; EucCodeSet codeset = JpEucCodeSet((const unsigned char*)start_ptr); int len; const char* p; for (p = start_ptr; p < limit; p += len) { if (JpEucCodeSet((const unsigned char*)p) != codeset) break; if (codeset == CodeSet0) len = 1; else if ((codeset == CodeSet1) || (codeset == CodeSet2)) len = 2; else if (codeset == CodeSet3) len = 3; else len = 0; if ((len == 0) || (p + len - 1 > end_ptr)) break; } return (p - start_ptr); } #endif // DTSR_LIKES_FGETS //----------------------------------------------------------------------- static int isdir(const char* filename) { int ret = 0; struct stat sb; if(stat(filename, &sb) == 0){ if(S_ISDIR(sb.st_mode)){ ret = 1; } } return ret; } //----------------------------------------------------------------------- static void makedir(const char *path) /* throw(PosixError) */ { if(mkdir((char*)path, 0775) != 0){ throw(PosixError(errno, path)); } } //----------------------------------------------------------------------- AusTextStore::AusTextStore( const char *path, const char *name ) { if ( !isdir(path) ) { makedir(path); } int textlen = strlen(path) + 1 + strlen("dtsearch") + 1; austext_path = new char [ textlen ]; /* * throw(ResourceExhausted) * */ assert ( austext_path != NULL ); snprintf( austext_path, textlen, "%s/dtsearch", path ); if ( !isdir(austext_path) ) { makedir(austext_path); } char *fzk = form("%s/%s.fzk", austext_path, name ); /* Use append instead because this fzk file is going to be appended * all the time */ afp = fopen ( fzk, "a" ); if ( !afp ) { throw(PosixError(errno, form("unable to open fzk file %s\n", fzk) ) ); } } //----------------------------------------------------------------------- void AusTextStore::insert( const char *BookShortTitle, const char *BookID, const char *SectionID, const char *SectionTitle, DataRepository *store ) { /* write the abstract and record stuff in the fzk file */ if ( afp ) { f_recordcount++; /* Record type ie for all the zone content */ FlexBuffer **table = store->tabbuf(); for ( int pos=store->Default; pos < store->Total; pos++ ) { if ( table[pos] ) { if ( table[pos]->GetSize() > 0 ) { fprintf(afp, " 0,2\n"); /* abstract includes SectionID\tBookShortTitle\tSectionTitle */ fprintf(afp, "ABSTRACT: %s\t%s\t%s\n", SectionID, BookShortTitle, SectionTitle ); // first the record type // The following was unique, but there is a limit to the size of // the key, so let's just use a simple counter. // fprintf(afp, "%s%s%s\n", store->get_zone_name(pos), BookID, SectionID); fprintf(afp, "%s%d\n", store->get_zone_name(pos), (int)f_recordcount); fprintf(afp, "0/0/0~0:0\n"); // null date // Now the actual buffer const char *start_ptr = table[pos]->GetBuffer(); const char *end_ptr = start_ptr + table[pos]->GetSize() - 1; #ifdef DTSR_LIKES_FGETS unsigned int (*getline)(const char *, const char *, int); const char* lang = getenv("LANG"); if (lang && !strncmp(lang, "ja", strlen("ja"))) getline = JpGetLine; else getline = DefaultGetLine; int num_byte; while ( num_byte = getline(start_ptr, end_ptr, LINE_SIZE) ) { if ( !fwrite(start_ptr, num_byte, 1, afp ) ) { throw(PosixError(errno, "unable to write to fzk file\n" ) ); } fputc('\n', afp ); start_ptr += num_byte; } // for current section and book level scopes, place the book and // section ids into the indexed data. fprintf(afp, "\n%s\n%s\n", BookID, SectionID); #else char *ptr = (char*)start_ptr; for (; ptr <= end_ptr; ptr++) { if (*ptr == '\n') *ptr = ' '; } if (fwrite(start_ptr, table[pos]->GetSize(), 1, afp) == 0) throw(PosixError(errno, "unable to write to fzk file\n")); // for current section and book level scopes, place the book and // section ids into the indexed data. fprintf(afp, "\t%s\t%s", BookID, SectionID); #endif #ifdef DTSR_USE_CNTR_L // Then the ^L character at the end fprintf(afp, "\n%c\n", CNTR_L ); #else fprintf(afp, "\n"); #endif } } } } } //----------------------------------------------------------------------- AusTextStore::~AusTextStore() { if ( afp ) { fclose(afp); } if ( austext_path ) { delete [] austext_path; } }