Files
cdesktop/cde/programs/dtinfo/DtMmdb/hmphf/sorter.C
2018-04-28 12:30:20 -06:00

327 lines
8.2 KiB
C

/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/*
* $XConsortium: sorter.cc /main/3 1996/06/11 17:20:47 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#include "hmphf/sorter.h"
/*#define NUM_BUCKETS 10 */
#define NUM_BUCKETS 5000
sorter::sorter(char* key_file) :
v_max_bucket_sz(0),
v_no_unique_keys(0),
v_unique_keys(0),
b_convertor(NUM_BUCKETS, 128)
{
fstream in(key_file, ios::in);
_init(in);
}
sorter::sorter(istream& in) :
v_max_bucket_sz(0),
v_no_unique_keys(0),
v_unique_keys(0),
b_convertor(NUM_BUCKETS, 128)
{
_init(in);
}
void sorter::_init(istream& in)
{
v_bucket_array = new bucketPtr[NUM_BUCKETS];
int i;
for ( i=0; i<NUM_BUCKETS; v_bucket_array[i++] = 0);
char key_buf[LBUFSIZ];
int k;
while ( in.getline(key_buf, LBUFSIZ) ) {
i = b_convertor.atoi(key_buf, strlen(key_buf), 0, NUM_BUCKETS);
if ( v_bucket_array[i] == 0 ) {
v_bucket_array[i] = new bucket(key_buf, i, true);
k = 1;
} else {
k = v_bucket_array[i] -> add_key(key_buf, true);
}
v_max_bucket_sz = MAX(v_max_bucket_sz, k);
in.getline(key_buf, LBUFSIZ); // skip the next info field
}
filter_by_hash();
assemble_unique_keys();
}
sorter::~sorter()
{
int i;
for ( i=0; i<NUM_BUCKETS; delete v_bucket_array[i++] );
delete [] v_bucket_array;
for ( i=0; i<v_no_unique_keys; delete v_unique_keys[i++] );
delete [] v_unique_keys;
}
void sorter::filter_by_hash()
{
// a small hash table for keys to map to
v_map_table = new charPtr[2*v_max_bucket_sz];
int i;
for ( i=0; i<2*v_max_bucket_sz; v_map_table[i++] = 0 );
// an int table remembering slots in the v_map_table
// that have been mapped.
v_index_table = new int[v_max_bucket_sz];
// a charPtr table remembering possibly duplicated keys
v_check_table = new charPtr[v_max_bucket_sz];
// a charPtr table remembering dupcated keys
v_dup_table = new charPtr[v_max_bucket_sz];
for ( i=0; i<NUM_BUCKETS; i++ ) {
if ( v_bucket_array[i] )
filter_a_bucket(v_bucket_array[i]);
}
delete [] v_map_table;
delete [] v_check_table;
delete [] v_index_table;
delete [] v_dup_table;
}
void sorter::filter_a_bucket(bucketPtr bkt)
{
slist_void_ptr_cell *lp = bkt -> key_ptr;
while ( lp ) {
remove_keys(bkt, (char*)(lp -> void_ptr()), lp);
lp = (slist_void_ptr_cell*)(lp -> v_succ);
}
}
void sorter::remove_keys(bucketPtr bkt, char* key, slist_void_ptr_cell* lp)
{
//debug(cerr, key);
slist_void_ptr_cell *llp = lp;
while ( llp && llp -> v_succ ) {
slist_void_ptr_cell *next_lp =
(slist_void_ptr_cell*)llp -> v_succ;
//debug(cerr, (char*)(next_lp -> void_ptr()));
if ( strcmp( key, (char*)(next_lp -> void_ptr()) ) == 0 ) {
//MESSAGE(cerr, "rmove key:");
//cerr << (char*)(next_lp -> void_ptr()) << "\n";
llp -> v_succ = next_lp -> v_succ;
delete (char*)(next_lp -> void_ptr());
delete next_lp;
bkt -> v_no_keys --;
} else
llp = (slist_void_ptr_cell*)(llp -> v_succ);
}
}
//
//
// int n_chk = 0;
// int n_idx = 0;
//
// slist_void_ptr_cell *lp = bkt -> key_ptr;
//
// while ( lp ) {
//
//// maintaint the order of key chains in v_check_table[]!!!
//
// char* key2 = ((char*)(lp -> void_ptr()));
//
// int hash = b_convertor.atoi(
// key2, strlen(key2),
// 1, 2*v_max_bucket_sz
// );
//
// if ( v_map_table[hash] != 0 ) {
//
// if ( v_map_table[hash] != (charPtr)1 ) {
// v_check_table[n_chk++] = v_map_table[hash];
// v_map_table[hash] = (charPtr)1;
// // set to occupied. so that the same
// // key will be in the v_check_table once.
// // assume 1 will never be the address
// // of any keys.
// }
//
// v_check_table[n_chk++] = key2;
//
// } else {
// //v_map_table[hash] = ((Ostring*)(lp -> void_ptr())) -> get();
// v_map_table[hash] = ((char*)(lp -> void_ptr()));
// }
//
// v_index_table[n_idx++] = hash; // remember the slot being set
//
// lp = (slist_void_ptr_cell*)(lp -> succ);
// }
////debug(cerr, n_chk);
//
//// double check possiblly collided keys
// int n_dup = 0;
// for ( int u=0; u<n_chk-1; u++ ) {
// for (int v=u+1; v<n_chk; v++ ) {
// if ( v_check_table[u] && v_check_table[v] &&
// strcmp(v_check_table[u], v_check_table[v]) == 0 )
// {
//// mark v as a duplicated key
// v_dup_table[n_dup++] = v_check_table[v];
// v_check_table[v] = 0;
// }
//
// }
// }
//
////debug(cerr, n_dup);
//
//// remove the duplicates
// if ( n_dup > 0 ) {
// slist_void_ptr_cell *lp = bkt -> key_ptr;
// slist_void_ptr_cell *prev_lp = lp;
//
// int u = 0;
// lp = (slist_void_ptr_cell*)(lp -> succ);
//
// while ( lp && u<n_dup ) {
//
//
///*
//debug(cerr, v_dup_table[u]);
//debug(cerr, int(v_dup_table[u]));
//debug(cerr, (char*)(lp -> void_ptr()));
//debug(cerr, int((char*)(lp -> void_ptr())));
//*/
//
//
// if ( v_dup_table[u] == ((char*)(lp -> void_ptr())) )
// {
//
//MESSAGE(cerr, "key removed:");
//debug(cerr, v_dup_table[u]);
//
// prev_lp -> succ = lp -> succ;
// bkt -> v_no_keys --;
// u++;
// } else
// prev_lp = lp;
//
// lp = (slist_void_ptr_cell*)(lp -> succ);
//
// }
// }
//
//// clean v_map_table
// for ( int k=0; k<n_idx; k++ ) {
// v_map_table[v_index_table[k]] = 0;
// }
//
////MESSAGE(cerr, "SOS0");
//}
void sorter::assemble_unique_keys()
{
int i;
for ( i=0; i<NUM_BUCKETS; i++ ) {
if ( v_bucket_array[i] ) {
v_no_unique_keys += v_bucket_array[i] -> v_no_keys;
}
}
v_unique_keys = new charPtr[v_no_unique_keys];
int j = 0;
slist_void_ptr_cell *lp = 0;
for ( i=0; i<NUM_BUCKETS; i++ ) {
if ( v_bucket_array[i] == 0 ) continue;
lp = v_bucket_array[i] -> key_ptr;
while ( lp ) {
v_unique_keys[j++] = ((char*)(lp -> void_ptr()));
lp -> set_vptr(0);
lp = (slist_void_ptr_cell*)(lp -> v_succ);
}
}
}
ostream& operator<< (ostream& out, sorter& st)
{
debug(out, st.v_no_unique_keys);
for ( int i=0; i<st.v_no_unique_keys; i++ ) {
out << st.v_unique_keys[i] << "\n";
}
return out;
}