dtdocbook: merge dtinfogen into dtdocbook.
This commit is contained in:
368
cde/programs/dtdocbook/infolib/AusTextStorage.C
Normal file
368
cde/programs/dtdocbook/infolib/AusTextStorage.C
Normal file
@@ -0,0 +1,368 @@
|
||||
/*
|
||||
* CDE - Common Desktop Environment
|
||||
*
|
||||
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
|
||||
*
|
||||
* These libraries and programs are free software; you can
|
||||
* redistribute them and/or modify them under the terms of the GNU
|
||||
* Lesser General Public License as published by the Free Software
|
||||
* Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* These libraries and programs are distributed in the hope that
|
||||
* they will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with these libraries and programs; if not, write
|
||||
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
|
||||
* Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
/* $XConsortium: AusTextStorage.cc /main/5 1996/07/23 18:08:29 cde-hal $
|
||||
*
|
||||
* (c) Copyright 1996 Digital Equipment Corporation.
|
||||
* (c) Copyright 1996 Hewlett-Packard Company.
|
||||
* (c) Copyright 1996 International Business Machines Corp.
|
||||
* (c) Copyright 1996 Sun Microsystems, Inc.
|
||||
* (c) Copyright 1996 Novell, Inc.
|
||||
* (c) Copyright 1996 FUJITSU LIMITED.
|
||||
* (c) Copyright 1996 Hitachi.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <errno.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <assert.h>
|
||||
#include <sstream>
|
||||
using namespace std;
|
||||
|
||||
/* imported interfaces */
|
||||
#include <misc/unique_id.h>
|
||||
#include "FlexBuffer.h"
|
||||
#include "Task.h"
|
||||
#include "DataBase.h"
|
||||
#include "DataRepository.h"
|
||||
#include "api/utility.h"
|
||||
|
||||
/* exported interfaces */
|
||||
#include "AusTextStorage.h"
|
||||
|
||||
#ifdef DTSR_USE_CNTR_L
|
||||
const char CNTR_L = '\014'; /* This is for ascii system only */
|
||||
#endif
|
||||
|
||||
// If NodeParser ever gets setup to run on all bookcases at one time, we
|
||||
// will need a reset() function for this member.
|
||||
unsigned long AusTextStore::f_recordcount = 0;
|
||||
|
||||
#ifdef DTSR_LIKES_FGETS
|
||||
const int LINE_SIZE = 80; /* this is the line size allowed for data in
|
||||
* the *.fzk file
|
||||
*/
|
||||
|
||||
/* Most of the non-alphanumeric character in ascii code set */
|
||||
const char *DELIMITER_SET = "\t\n !@#$%^&*()_-=+\\|~[]{};:,.<>/?";
|
||||
|
||||
enum EucCodeSet {
|
||||
CodeSetInv = -1,
|
||||
CodeSet0 = 0,
|
||||
CodeSet1 = 1,
|
||||
CodeSet2 = 2,
|
||||
CodeSet3 = 3
|
||||
};
|
||||
|
||||
/*
|
||||
* charcspn determines if ch is found in the set
|
||||
* returns 1 if so, 0 if otherwise
|
||||
*/
|
||||
/*
|
||||
* @@ charset is expensive, alternative approach is to use a
|
||||
* static array
|
||||
* static char char_tab[] = { 0, 0, 0, 1,...... }
|
||||
* where 1 indicates the character is in the delimiter character set
|
||||
* however, this might not be portable for character set other than
|
||||
* ascii , so this has to be done carefully
|
||||
* If the format of the fzk is changed, all this will no longer be
|
||||
* required. So, I am not going to do anything at this point
|
||||
*/
|
||||
|
||||
//-----------------------------------------------------------------
|
||||
static int charset ( const char ch, const char *set)
|
||||
{
|
||||
for ( const char *ptr = set;
|
||||
*ptr != '\0';
|
||||
ptr++ ) {
|
||||
if ( ch == *ptr ) return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* getline returns the no. of bytes that should be read as a line.
|
||||
* Normally it should read line_size, but if there is a token that
|
||||
* spans 2 lines, getline need to determine the line size such that
|
||||
* at the end of the line, no token should be spanning the next line.
|
||||
*/
|
||||
/*
|
||||
* start_ptr is the start of the buffer and end_ptr is the end of the buffer
|
||||
* it is similar to fread except that end_ptr is supplied as the bounding
|
||||
* condition as opposed to the EOF in fread. Besides, no actual character
|
||||
* is read , only the number of characters that should be read as a line.
|
||||
*/
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
static unsigned int DefaultGetLine ( const char *start_ptr,
|
||||
const char *end_ptr,
|
||||
int line_size )
|
||||
{
|
||||
if ( start_ptr > end_ptr ) { return 0; }
|
||||
|
||||
if ( start_ptr + line_size - 1 <= end_ptr ) { // not @ the end yet
|
||||
/*
|
||||
* FIrst see if there is a token that spans multiple lines
|
||||
*/
|
||||
const char *ptr = start_ptr + line_size - 1;
|
||||
if ( ptr == end_ptr ) { return line_size; }
|
||||
|
||||
if ( charset( *(ptr+1), DELIMITER_SET ) || charset ( *ptr, DELIMITER_SET ) ) {
|
||||
return ( line_size );
|
||||
}
|
||||
|
||||
/* That means found a token that spans 2 lines */
|
||||
/* So now loop back until *ptr is not in DELIMITER_SET */
|
||||
const char *new_end_ptr;
|
||||
for ( new_end_ptr = ptr;
|
||||
new_end_ptr > start_ptr && !charset( *new_end_ptr , DELIMITER_SET );
|
||||
new_end_ptr-- );
|
||||
|
||||
return( new_end_ptr - start_ptr + 1 );
|
||||
|
||||
}
|
||||
else {
|
||||
// last chunk of line
|
||||
return ( end_ptr - start_ptr + 1 );
|
||||
}
|
||||
}
|
||||
|
||||
inline EucCodeSet JpEucCodeSet(const unsigned char* text)
|
||||
{
|
||||
EucCodeSet codeset;
|
||||
|
||||
if (text == NULL)
|
||||
codeset = CodeSetInv;
|
||||
else if (*text < 0x80)
|
||||
codeset = CodeSet0;
|
||||
else if (*text == 0x8E)
|
||||
codeset = CodeSet2;
|
||||
else if (*text == 0x8F)
|
||||
codeset = CodeSet3;
|
||||
else {
|
||||
assert( *text > 0xA0 && *text < 0xFF);
|
||||
codeset = CodeSet1;
|
||||
}
|
||||
|
||||
return codeset;
|
||||
}
|
||||
|
||||
static unsigned int JpGetLine ( const char *start_ptr,
|
||||
const char *end_ptr,
|
||||
int line_size )
|
||||
{
|
||||
if (start_ptr > end_ptr)
|
||||
return 0;
|
||||
|
||||
if (end_ptr - start_ptr + 1 <= line_size)
|
||||
return (end_ptr - start_ptr + 1);
|
||||
|
||||
// reference limit
|
||||
const char* limit = start_ptr + line_size;
|
||||
|
||||
EucCodeSet codeset = JpEucCodeSet((const unsigned char*)start_ptr);
|
||||
|
||||
int len;
|
||||
const char* p;
|
||||
|
||||
for (p = start_ptr; p < limit; p += len) {
|
||||
|
||||
if (JpEucCodeSet((const unsigned char*)p) != codeset)
|
||||
break;
|
||||
|
||||
if (codeset == CodeSet0)
|
||||
len = 1;
|
||||
else if ((codeset == CodeSet1) || (codeset == CodeSet2))
|
||||
len = 2;
|
||||
else if (codeset == CodeSet3)
|
||||
len = 3;
|
||||
else
|
||||
len = 0;
|
||||
|
||||
if ((len == 0) || (p + len - 1 > end_ptr))
|
||||
break;
|
||||
}
|
||||
|
||||
return (p - start_ptr);
|
||||
}
|
||||
#endif // DTSR_LIKES_FGETS
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
static int isdir(const char* filename)
|
||||
{
|
||||
int ret = 0;
|
||||
struct stat sb;
|
||||
|
||||
if(stat(filename, &sb) == 0){
|
||||
if(S_ISDIR(sb.st_mode)){
|
||||
ret = 1;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
static void makedir(const char *path) /* throw(PosixError) */
|
||||
{
|
||||
if(mkdir((char*)path, 0775) != 0){
|
||||
throw(PosixError(errno, path));
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
AusTextStore::AusTextStore( const char *path, const char *name )
|
||||
{
|
||||
if ( !isdir(path) ) {
|
||||
makedir(path);
|
||||
}
|
||||
|
||||
int textlen = strlen(path) + 1 + strlen("dtsearch") + 1;
|
||||
austext_path = new char [ textlen ];
|
||||
/*
|
||||
* throw(ResourceExhausted)
|
||||
*
|
||||
*/
|
||||
assert ( austext_path != NULL );
|
||||
|
||||
snprintf( austext_path, textlen, "%s/dtsearch", path );
|
||||
|
||||
if ( !isdir(austext_path) ) {
|
||||
makedir(austext_path);
|
||||
}
|
||||
|
||||
char *fzk = form("%s/%s.fzk", austext_path, name );
|
||||
|
||||
/* Use append instead because this fzk file is going to be appended
|
||||
* all the time
|
||||
*/
|
||||
|
||||
afp = fopen ( fzk, "a" );
|
||||
if ( !afp ) {
|
||||
throw(PosixError(errno, form("unable to open fzk file %s\n", fzk) ) );
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
void
|
||||
AusTextStore::insert( const char *BookShortTitle,
|
||||
const char *BookID,
|
||||
const char *SectionID,
|
||||
const char *SectionTitle,
|
||||
DataRepository *store
|
||||
)
|
||||
{
|
||||
|
||||
/* write the abstract and record stuff in the fzk file */
|
||||
if ( afp ) {
|
||||
|
||||
f_recordcount++;
|
||||
/* Record type ie for all the zone content */
|
||||
FlexBuffer **table = store->tabbuf();
|
||||
for ( int pos=store->Default;
|
||||
pos < store->Total;
|
||||
pos++ ) {
|
||||
|
||||
if ( table[pos] ) {
|
||||
|
||||
if ( table[pos]->GetSize() > 0 ) {
|
||||
|
||||
fprintf(afp, " 0,2\n");
|
||||
|
||||
/* abstract includes SectionID\tBookShortTitle\tSectionTitle */
|
||||
fprintf(afp, "ABSTRACT: %s\t%s\t%s\n", SectionID,
|
||||
BookShortTitle,
|
||||
SectionTitle );
|
||||
|
||||
// first the record type
|
||||
// The following was unique, but there is a limit to the size of
|
||||
// the key, so let's just use a simple counter.
|
||||
// fprintf(afp, "%s%s%s\n", store->get_zone_name(pos), BookID, SectionID);
|
||||
fprintf(afp, "%s%d\n", store->get_zone_name(pos), (int)f_recordcount);
|
||||
|
||||
fprintf(afp, "0/0/0~0:0\n"); // null date
|
||||
|
||||
// Now the actual buffer
|
||||
const char *start_ptr = table[pos]->GetBuffer();
|
||||
const char *end_ptr = start_ptr + table[pos]->GetSize() - 1;
|
||||
|
||||
#ifdef DTSR_LIKES_FGETS
|
||||
unsigned int (*getline)(const char *, const char *, int);
|
||||
|
||||
const char* lang = getenv("LANG");
|
||||
if (lang && !strncmp(lang, "ja", strlen("ja")))
|
||||
getline = JpGetLine;
|
||||
else
|
||||
getline = DefaultGetLine;
|
||||
|
||||
int num_byte;
|
||||
while ( num_byte = getline(start_ptr, end_ptr, LINE_SIZE) ) {
|
||||
if ( !fwrite(start_ptr, num_byte, 1, afp ) )
|
||||
{
|
||||
throw(PosixError(errno, "unable to write to fzk file\n" ) );
|
||||
}
|
||||
fputc('\n', afp );
|
||||
|
||||
start_ptr += num_byte;
|
||||
}
|
||||
|
||||
// for current section and book level scopes, place the book and
|
||||
// section ids into the indexed data.
|
||||
fprintf(afp, "\n%s\n%s\n", BookID, SectionID);
|
||||
#else
|
||||
char *ptr = (char*)start_ptr;
|
||||
for (; ptr <= end_ptr; ptr++) {
|
||||
if (*ptr == '\n')
|
||||
*ptr = ' ';
|
||||
}
|
||||
|
||||
if (fwrite(start_ptr, table[pos]->GetSize(), 1, afp) == 0)
|
||||
throw(PosixError(errno, "unable to write to fzk file\n"));
|
||||
|
||||
// for current section and book level scopes, place the book and
|
||||
// section ids into the indexed data.
|
||||
fprintf(afp, "\t%s\t%s", BookID, SectionID);
|
||||
#endif
|
||||
|
||||
#ifdef DTSR_USE_CNTR_L
|
||||
// Then the ^L character at the end
|
||||
fprintf(afp, "\n%c\n", CNTR_L );
|
||||
#else
|
||||
fprintf(afp, "\n");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
AusTextStore::~AusTextStore()
|
||||
{
|
||||
if ( afp ) { fclose(afp); }
|
||||
if ( austext_path ) { delete [] austext_path; }
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user