dtdocbook: merge dtinfogen into dtdocbook.

2022-01-12 04:04:33 +08:00
parent f9e02f163b
commit c6661c92c9
161 changed files with 1884 additions and 2390 deletions
--- a/cde/programs/dtdocbook/infolib/AusTextStorage.C
+++ b/cde/programs/dtdocbook/infolib/AusTextStorage.C
@@ -0,0 +1,368 @@
+/*
+ * CDE - Common Desktop Environment
+ *
+ * Copyright (c) 1993-2012, The Open Group. All rights reserved.
+ *
+ * These libraries and programs are free software; you can
+ * redistribute them and/or modify them under the terms of the GNU
+ * Lesser General Public License as published by the Free Software
+ * Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * These libraries and programs are distributed in the hope that
+ * they will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with these libraries and programs; if not, write
+ * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
+ * Floor, Boston, MA 02110-1301 USA
+ */
+/* $XConsortium: AusTextStorage.cc /main/5 1996/07/23 18:08:29 cde-hal $
+ *
+ * (c) Copyright 1996 Digital Equipment Corporation.
+ * (c) Copyright 1996 Hewlett-Packard Company.
+ * (c) Copyright 1996 International Business Machines Corp.
+ * (c) Copyright 1996 Sun Microsystems, Inc.
+ * (c) Copyright 1996 Novell, Inc. 
+ * (c) Copyright 1996 FUJITSU LIMITED.
+ * (c) Copyright 1996 Hitachi.
+ */
+
+#include <stdio.h>
+#include <stddef.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <assert.h>
+#include <sstream>
+using namespace std;
+
+/* imported interfaces */
+#include <misc/unique_id.h>
+#include "FlexBuffer.h"
+#include "Task.h"
+#include "DataBase.h"
+#include "DataRepository.h"
+#include "api/utility.h"
+
+/* exported interfaces */
+#include "AusTextStorage.h"
+
+#ifdef DTSR_USE_CNTR_L
+const char CNTR_L = '\014';  /*  This is for ascii system only */
+#endif
+
+// If NodeParser ever gets setup to run on all bookcases at one time, we
+// will need a reset() function for this member.
+unsigned long AusTextStore::f_recordcount = 0;
+
+#ifdef DTSR_LIKES_FGETS
+const int LINE_SIZE = 80;    /* this is the line size allowed for data in 
+			      * the *.fzk file
+			      */
+
+/* Most of the non-alphanumeric character in ascii code set */
+const char *DELIMITER_SET = "\t\n !@#$%^&*()_-=+\\|~[]{};:,.<>/?";
+
+enum EucCodeSet {
+	CodeSetInv = -1,
+	CodeSet0   =  0,
+	CodeSet1   =  1,
+	CodeSet2   =  2,
+	CodeSet3   =  3
+};
+
+/* 
+ * charcspn determines if ch is found in the set
+ * returns 1 if so, 0 if otherwise
+ */
+/*
+ * @@ charset is expensive, alternative approach is to use a 
+ * static array 
+ * static char char_tab[] = { 0, 0, 0, 1,...... }
+ * where 1 indicates the character is in the delimiter character set
+ * however, this might not be portable for character set other than
+ * ascii , so this has to be done carefully 
+ * If the format of the fzk is changed, all this will no longer be
+ * required. So, I am not going to do anything at this point 
+ */
+
+//-----------------------------------------------------------------
+static int charset ( const char ch, const char *set)
+{
+  for ( const char *ptr = set;
+       *ptr != '\0';
+	ptr++ ) {
+    if ( ch == *ptr ) return 1;
+  }
+  
+  return 0;
+}
+
+/*
+ * getline returns the no. of bytes that should be read as a line.
+ * Normally it should read line_size, but if there is a token that 
+ * spans 2 lines, getline need to determine the line size such that
+ * at the end of the line, no token should be spanning the next line.
+ */
+/*
+ * start_ptr is the start of the buffer and end_ptr is the end of the buffer
+ * it is similar to fread except that end_ptr is supplied as the bounding
+ * condition as opposed to the EOF in fread. Besides, no actual character
+ * is read , only the number of characters that should be read as a line.
+ */
+//--------------------------------------------------------------------------
+
+static unsigned int DefaultGetLine ( const char *start_ptr,
+			      const char *end_ptr,
+			      int line_size )
+{
+  if ( start_ptr > end_ptr ) { return 0; }
+  
+  if ( start_ptr + line_size - 1 <= end_ptr ) {  // not @ the end yet
+    /* 
+     * FIrst see if there is a token that spans multiple lines
+     */
+    const char *ptr = start_ptr + line_size - 1;
+    if ( ptr == end_ptr ) { return line_size; }
+
+    if ( charset( *(ptr+1), DELIMITER_SET ) || charset ( *ptr, DELIMITER_SET ) ) {
+      return ( line_size );
+    }
+
+    /* That means found a token that spans 2 lines */
+    /* So now loop back until *ptr is not in DELIMITER_SET */
+    const char *new_end_ptr;
+    for ( new_end_ptr = ptr;
+	 new_end_ptr > start_ptr && !charset( *new_end_ptr , DELIMITER_SET );
+	 new_end_ptr-- );
+    
+    return( new_end_ptr - start_ptr + 1 );
+
+  }
+  else { 
+    // last chunk of line
+    return ( end_ptr - start_ptr + 1 );
+  }
+}
+
+inline EucCodeSet JpEucCodeSet(const unsigned char* text)
+{
+    EucCodeSet codeset;
+
+    if (text == NULL)
+	codeset = CodeSetInv;
+    else if (*text < 0x80)
+	codeset = CodeSet0;
+    else if (*text == 0x8E)
+	codeset = CodeSet2;
+    else if (*text == 0x8F)
+	codeset = CodeSet3;
+    else {
+	assert( *text > 0xA0 && *text < 0xFF);
+	codeset = CodeSet1;
+    }
+
+    return codeset;
+}
+
+static unsigned int JpGetLine ( const char *start_ptr,
+			      const char *end_ptr,
+			      int line_size )
+{
+    if (start_ptr > end_ptr)
+	return 0;
+
+    if (end_ptr - start_ptr + 1 <= line_size)
+	return (end_ptr - start_ptr + 1);
+
+    // reference limit
+    const char* limit = start_ptr + line_size;
+
+    EucCodeSet codeset = JpEucCodeSet((const unsigned char*)start_ptr);
+
+    int len;
+    const char* p;
+
+    for (p = start_ptr; p < limit; p += len) {
+
+	if (JpEucCodeSet((const unsigned char*)p) != codeset)
+	    break;
+
+	if (codeset == CodeSet0)
+		len = 1;
+	else if ((codeset == CodeSet1) || (codeset == CodeSet2))
+		len = 2;
+	else if (codeset == CodeSet3)
+		len = 3;
+	else
+		len = 0;
+
+	if ((len == 0) || (p + len - 1 > end_ptr))
+	    break;
+    }
+
+    return (p - start_ptr);
+}
+#endif // DTSR_LIKES_FGETS
+
+//-----------------------------------------------------------------------
+static int isdir(const char* filename)
+{
+  int ret = 0;
+  struct stat sb;
+
+  if(stat(filename, &sb) == 0){
+    if(S_ISDIR(sb.st_mode)){
+      ret = 1;
+    }
+  }
+
+  return ret;
+}
+
+//-----------------------------------------------------------------------
+static void makedir(const char *path) /* throw(PosixError) */
+{
+  if(mkdir((char*)path, 0775) != 0){
+    throw(PosixError(errno, path));
+  }
+}
+
+//-----------------------------------------------------------------------
+AusTextStore::AusTextStore( const char *path, const char *name )
+{
+  if ( !isdir(path) ) {
+    makedir(path);
+  }
+
+  int textlen = strlen(path) + 1 + strlen("dtsearch") + 1;
+  austext_path = new char [ textlen ];
+  /*
+   * throw(ResourceExhausted)
+   *
+   */
+  assert ( austext_path != NULL );
+  
+  snprintf( austext_path, textlen, "%s/dtsearch", path );
+  
+  if ( !isdir(austext_path) ) {
+    makedir(austext_path);
+  }
+
+  char *fzk = form("%s/%s.fzk", austext_path, name );
+
+  /* Use append instead because this fzk file is going to be appended 
+   * all the time
+   */
+
+  afp = fopen ( fzk, "a" );
+  if ( !afp ) {
+    throw(PosixError(errno, form("unable to open fzk file %s\n", fzk) ) );
+  }
+}
+
+//-----------------------------------------------------------------------
+void
+AusTextStore::insert( const char *BookShortTitle,
+		      const char *BookID,
+		      const char *SectionID,
+		      const char *SectionTitle,
+		      DataRepository *store
+                    )
+{
+
+  /* write the abstract and record stuff in the fzk file */
+  if ( afp ) {
+    
+    f_recordcount++;
+    /* Record type ie for all the zone content */
+    FlexBuffer **table = store->tabbuf();
+    for ( int pos=store->Default; 
+	  pos < store->Total;
+	  pos++ ) {
+
+      if (  table[pos] ) {
+
+	if ( table[pos]->GetSize() > 0 ) {
+
+	  fprintf(afp, " 0,2\n");
+	  
+	  /* abstract includes SectionID\tBookShortTitle\tSectionTitle */
+	  fprintf(afp, "ABSTRACT: %s\t%s\t%s\n", SectionID, 
+		  BookShortTitle, 
+		  SectionTitle );
+
+	  // first the record type 
+	  // The following was unique, but there is a limit to the size of
+	  // the key, so let's just use a simple counter.
+//	  fprintf(afp, "%s%s%s\n", store->get_zone_name(pos), BookID, SectionID);
+	  fprintf(afp, "%s%d\n", store->get_zone_name(pos), (int)f_recordcount);
+
+	  fprintf(afp, "0/0/0~0:0\n"); // null date
+
+	  // Now the actual buffer
+	  const char *start_ptr = table[pos]->GetBuffer();
+	  const char *end_ptr = start_ptr + table[pos]->GetSize() - 1;
+
+#ifdef DTSR_LIKES_FGETS
+	  unsigned int (*getline)(const char *, const char *, int);
+
+	  const char* lang = getenv("LANG");
+	  if (lang && !strncmp(lang, "ja", strlen("ja")))
+	    getline = JpGetLine;
+	  else
+	    getline = DefaultGetLine;
+	
+	  int num_byte;
+	  while ( num_byte = getline(start_ptr, end_ptr, LINE_SIZE) ) {
+	    if ( !fwrite(start_ptr, num_byte, 1, afp ) ) 
+	      {
+		throw(PosixError(errno, "unable to write to fzk file\n" ) );
+	      }
+	    fputc('\n', afp );
+
+	    start_ptr += num_byte;
+	  }
+
+	  // for current section and book level scopes, place the book and
+	  //  section ids into the indexed data.
+	  fprintf(afp, "\n%s\n%s\n", BookID, SectionID);
+#else
+	  char *ptr = (char*)start_ptr;
+	  for (; ptr <= end_ptr; ptr++) {
+	    if (*ptr == '\n')
+	      *ptr = ' ';
+	  }
+
+	  if (fwrite(start_ptr, table[pos]->GetSize(), 1, afp) == 0)
+	    throw(PosixError(errno, "unable to write to fzk file\n"));
+
+	  // for current section and book level scopes, place the book and
+	  //  section ids into the indexed data.
+	  fprintf(afp, "\t%s\t%s", BookID, SectionID);
+#endif
+	
+#ifdef DTSR_USE_CNTR_L
+	  // Then the ^L character at the end
+	  fprintf(afp, "\n%c\n", CNTR_L );
+#else
+	  fprintf(afp, "\n");
+#endif
+	}
+      }
+    }
+  }
+}
+	
+//-----------------------------------------------------------------------
+AusTextStore::~AusTextStore()
+{
+  if ( afp ) { fclose(afp); }
+  if ( austext_path ) { delete [] austext_path; }
+}
+
+