DtMmdb: move to lib directory as a standalone library.

This commit is contained in:
Liang Chang
2022-01-13 00:15:14 +08:00
parent f3baea9faa
commit e93b2bc626
589 changed files with 31 additions and 2985 deletions

View File

@@ -0,0 +1,8 @@
MAINTAINERCLEANFILES = Makefile.in
noinst_LTLIBRARIES = libhmphf.la
libhmphf_la_CXXFLAGS = -DPORTABLE_DB -I..
libhmphf_la_SOURCES = buckets.C mphf_funcs.C mphf_hash_table.C \
params.C pattern.C sorter.C

View File

@@ -0,0 +1,386 @@
/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/*
* $XConsortium: buckets.cc /main/3 1996/06/11 17:19:53 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#include "hmphf/buckets.h"
bucket::bucket(char* key, int orig_position, Boolean copy) :
v_no_keys(1),
v_count(0),
v_control_bit(-1),
v_g_value(0),
v_orig_pos(orig_position)
{
char* x = 0;
int len;
switch (copy) {
case true:
len = strlen(key);
x = new char[len + 1];
*((char *) memcpy(x, key, len) + len) = '\0';
break;
case false:
x = key;
break;
}
key_ptr = new slist_void_ptr_cell(x);
}
bucket::~bucket()
{
slist_void_ptr_cell *lp = key_ptr;
slist_void_ptr_cell* tmp_lp = 0;
while ( lp ) {
tmp_lp = lp;
lp = (slist_void_ptr_cell*)(lp -> v_succ);
delete tmp_lp;
}
}
int bucket::add_key(char* key, Boolean copy)
{
char *x = 0;
int len;
switch (copy) {
case true:
len = strlen(key);
x = new char[len + 1];
*((char *) memcpy(x, key, len) + len) = '\0';
break;
case false:
x = key;
break;
}
slist_void_ptr_cell* z = new slist_void_ptr_cell(x);
z -> v_succ = key_ptr;
key_ptr = z;
v_no_keys++;
return v_no_keys;
}
ostream& operator<<(ostream& out, bucket& bt)
{
slist_void_ptr_cell *lp = bt.key_ptr;
while ( lp ) {
out << ((char*)(lp -> void_ptr())) << " ";
lp = (slist_void_ptr_cell*)(lp -> v_succ);
}
out << "\n";
return out;
}
////////////////////////////////////////////////////
//
//
////////////////////////////////////////////////////
//buckets::buckets(char* key_file, params& pms) :
buckets::buckets(char** keys, params& pms) :
v_no_buckets(pms.v_b), v_max_bucket_sz(0),
rnd(pms.v_seed),
b_convertor(pms.v_n, 128, rnd),
h_convertor(pms.v_n, 128, rnd)
{
v_bucket_array = new bucketPtr[v_no_buckets];
unsigned int i;
for ( i=0; i < (unsigned int) v_no_buckets; v_bucket_array[i++] = 0);
//debug(cerr, pms);
int hash, k;
for ( i=0; i<pms.v_n; i++ ) {
//debug(cerr, keys[i]);
hash = bucket_num(keys[i], pms);
if ( v_bucket_array[hash] == 0 ) {
v_bucket_array[hash] = new bucket(keys[i], hash, false);
k = 1;
} else {
k = v_bucket_array[hash] -> add_key(keys[i], false);
}
v_max_bucket_sz = MAX(v_max_bucket_sz, k);
}
sort_by_size();
}
buckets::~buckets()
{
for ( int i=0; i<v_no_buckets; i++ )
delete v_bucket_array[i];
delete [] v_bucket_array;
}
void buckets::set_control_bit(int cb)
{
for ( int i=0; i<v_no_buckets; i++ ) {
if ( (*this)[i] )
(*this)[i] -> set_control_bit(cb);
}
}
int buckets::bucket_num(char* k, params& pms)
{
//MESSAGE(cerr, "bucket_num");
//debug(cerr, k);
int sum = b_convertor.atoi(k, strlen(k), pms.v_r, pms.v_n);
//debug(cerr, sum);
if ( sum < (int) pms.v_p1 ) {
sum %= pms.v_p2;
} else {
sum %= (pms.v_b - pms.v_p2);
sum += pms.v_p2;
}
//debug(cerr, sum);
return sum;
}
//int buckets::hash_value(char* k, int offset, int range)
//{
///*
//MESSAGE(cerr, "hash_value");
//debug(cerr, k);
//*/
////debug(cerr, strlen(k));
//
//
//
///*
//debug(cerr, offset+1);
//debug(cerr, range);
//*/
// int hv = h_convertor.atoi(k, strlen(k), offset+1, range);
//
////debug(cerr, hv);
//
///*
//if ( strcmp(k, "mphf_funcs.h") == 0 ||
// strcmp(k, "mmdb_fast_mphf") == 0 ) {
//debug(cerr, k);
//debug(cerr, offset);
//debug(cerr, range);
//debug(cerr, hv);
//}
//*/
//
// return hv;
//}
void buckets::sort_by_size()
{
//MESSAGE(cerr, "sort()");
int* links = new int[v_no_buckets];
int i;
for ( i=0; i<v_no_buckets; links[i++]=-1 );
int* sizes = new int[v_max_bucket_sz+1];
for ( i=0; i<v_max_bucket_sz+1; sizes[i++]=-1 );
// arrage buckets according to their size
int x;
for ( i = 0; i < v_no_buckets; i++ ) {
if ( v_bucket_array[i] == 0 )
continue;
x = v_bucket_array[i] -> v_no_keys;
links[i] = sizes[x];
sizes[x] = i;
}
bucketPtr* new_bkt_array = new bucketPtr[v_no_buckets];
int j=0;
int idx;
for ( i = v_max_bucket_sz; i >= 0; i-- ) {
if ( sizes[i] == -1 )
continue;
idx = sizes[i];
//debug(cerr, i);
while ( idx != -1 ) {
new_bkt_array[j++] = v_bucket_array[idx];
//debug(cerr, new_bkt_array[j-1] -> v_no_keys);
//debug(cerr, *new_bkt_array[j-1]);
v_bucket_array[idx] = 0;
idx = links[idx];
}
}
for ( ; j<v_no_buckets; new_bkt_array[j++] = 0 );
delete [] sizes;
delete [] links;
delete [] v_bucket_array;
v_bucket_array = new_bkt_array;
}
/*************************************************/
/* return -1 if no more pattern can be generated */
/* return 0 if a pattern is generated */
/*************************************************/
int
buckets::get_pattern(int idx, int_pattern& pat, params& pms)
{
v_bucket_array[idx] -> v_control_bit++;
for ( ; v_bucket_array[idx] -> v_control_bit<2 ;
v_bucket_array[idx] -> v_control_bit ++
) {
if ( use_current_params(idx, pat, pms) == 0 )
return 0;
}
return -1;
}
int
buckets::use_current_params(int idx, int_pattern& pat,
params& pms, Boolean use_g_value)
{
int i = 0;
slist_void_ptr_cell *lp = v_bucket_array[idx] -> key_ptr;
while ( lp ) {
//debug(cerr, (char*)(lp -> void_ptr()));
//cerr << (char*)(lp -> void_ptr()) << "\n";
if ( use_g_value == false ) {
pat.insert(
hash_value( ((char*)(lp -> void_ptr())),
pms.v_r + v_bucket_array[idx] -> v_control_bit,
pms.v_n
),
i++
);
} else {
//cerr << (char*)(lp -> void_ptr()) << "\n";
//debug(cerr, pms.r + v_bucket_array[idx] -> v_control_bit);
/*
debug(cerr, ((ostring*)(lp -> void_ptr())) -> get());
int from_tbl1 = hash_value(
((char*)(lp -> void_ptr())),
pms.r + v_bucket_array[idx] -> v_control_bit,
pms.n
);
debug(cerr, from_tbl1);
debug(cerr, v_bucket_array[idx] -> v_g_value);
*/
pat.insert(
(
hash_value(
((char*)(lp -> void_ptr())),
pms.v_r + v_bucket_array[idx] -> v_control_bit,
pms.v_n
)
+
v_bucket_array[idx] -> v_g_value
) % pms.v_n,
i++
);
}
lp = (slist_void_ptr_cell*)(lp -> v_succ);
}
//debug(cerr, v_bucket_array[idx] -> no_keys());
pat.reset_elmts(v_bucket_array[idx] -> no_keys());
//debug(cerr, pat);
return pat.duplicate();
}
ostream& operator<<(ostream& out, buckets& bts)
{
for ( int i=0; i<bts.no_buckets(); i++ )
if ( bts[i] ) {
debug(cerr, bts[i] -> orig_pos());
debug(out, *bts[i]);
}
return out;
}

View File

@@ -0,0 +1,134 @@
/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/*
* $XConsortium: buckets.h /main/3 1996/06/11 17:19:58 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#ifndef _buckets_h
#define _buckets_h 1
#include <stdio.h>
#include "utility/atoi_pearson.h"
#include "dstr/slist_void_ptr_cell.h"
#include "hmphf/params.h"
#include "hmphf/pattern.h"
class bucket
{
public:
bucket(char*, int orig_pos = 0, Boolean copy = true);
~bucket();
int add_key(char*, Boolean copy = true);
short no_keys() { return v_no_keys; };
int g_value() { return v_g_value; };
int control_bit() { return v_control_bit; };
int orig_pos() { return v_orig_pos; };
void set_count(int i) { v_count = i; };
void set_control_bit(int cb) { v_control_bit = cb; };
void set_g_value(int gv) { v_g_value = gv; };
friend ostream& operator<<(ostream&, bucket&);
protected:
short v_no_keys;
int v_count;
int v_control_bit;
int v_g_value;
int v_orig_pos;
slist_void_ptr_cell* key_ptr;
friend class buckets;
friend class sorter;
};
typedef bucket* bucketPtr;
class buckets
{
public:
//buckets(char* key_file, params& params);
buckets(char** keys, params& params);
~buckets();
void set_control_bit(int cb);
int get_pattern(int i, int_pattern&, params&);
int use_current_params(int i, int_pattern&, params& pms,
Boolean use_g_value = false);
int no_buckets() { return v_no_buckets; };
int max_bucket_sz() { return v_max_bucket_sz; };
bucket* operator[](int i) { return v_bucket_array[i]; };
friend ostream& operator<<(ostream&, buckets&);
protected:
void sort_by_size();
int bucket_num(char* k, params& pms);
inline int hash_value(char* k, int offset, int range)
{
return h_convertor.atoi(k, strlen(k), offset+1, range);
};
protected:
bucket** v_bucket_array;
int v_no_buckets;
int v_max_bucket_sz;
pm_random rnd;
atoi_pearson b_convertor;
atoi_pearson h_convertor;
};
#endif

113
cde/lib/DtMmdb/hmphf/main.C Normal file
View File

@@ -0,0 +1,113 @@
/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/*
* $XConsortium: main.cc /main/3 1996/06/11 17:20:02 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#include "utility/xtime.h"
#include "hmphf/mphf_funcs.h"
#include "hmphf/sorter.h"
#define NO_BITS_INCREASES 5
main(int argc, char* argv[])
{
int ok;
if ( argc != 2 ) {
fprintf(stderr, form("usage: %s keyfile", argv[0]));
return(-1);
}
/*
xtime tmer;
tmer.start();
*/
sorter stor(argv[1]);
/*
float f1 = 0, f2 = 0;
tmer.stop(f1, f2);
debug(cerr, f1);
debug(cerr, f2);
*/
params pms;
pms.v_n = stor.no_unique_keys();
pms.select_value();
//debug(cerr, pms);
buffer mphf_spec(LBUFSIZ);
for ( int i=0; i<NO_BITS_INCREASES; i++ ) {
ok = compute_a_mphf(stor.unique_keys(), pms, mphf_spec) ;
switch (ok) {
case 0:
MESSAGE(cerr, "Hashing done");
exit(0);
case 1:
pms.re_select_value();
break;
case -1:
exit(-1);
}
}
MESSAGE(cerr, "finding a mphf failed");
exit(-1);
}

View File

@@ -0,0 +1,381 @@
/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/*
* $XConsortium: mphf_funcs.cc /main/4 1996/07/18 14:33:08 drk $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#include "hmphf/mphf_funcs.h"
#define NO_BACKTRACKS 50
#define NO_MAPPINGS 20
#define NO_SEARCHINGS 10
//compute_a_mphf(char* key_file, params& pms, char* mphf_spec_file)
int compute_a_mphf(char** keys, params& pms, buffer& mphf_buffer)
{
mphf_hash_table ht(pms);
int i, k;
for ( k=0; k<NO_MAPPINGS; k++ ) {
//buckets bs(key_file, pms);
buckets bs(keys, pms);
//MESSAGE(cerr, "buckets built");
/* search for a MPHF */
for ( i = 0; i<NO_SEARCHINGS; i++) {
//MESSAGE(cerr, form("%dth search:", i+1));
bs.set_control_bit(-1);
ht.clear();
if ( search(bs, ht, pms) == 0 ) {
//MESSAGE(cerr, "search done");
/* verify computed MPHF */
if ( verify(bs, ht, pms) == 0 ) {
/* output the computed MPHF */
return write_spec(bs, pms, mphf_buffer);
} else {
return -1;
}
}
}
pms.v_r++;
}
return 1;
}
int search(buckets& bs, mphf_hash_table& ht, params& pms)
{
int i = 0,
fails = 0,
patternFit = 0;
int num_backtracks = 0;
int no_search_fails = bs.no_buckets() / 2;
int_pattern new_pattern(bs.max_bucket_sz());
while ( i < bs.no_buckets() && fails < no_search_fails ) {
if ( bs[i] == 0 || bs[i] -> no_keys() == 0 ) {
i++;
continue;
};
patternFit = -1;
//MESSAGE(cerr, "before fit");
while ( bs.get_pattern(i, new_pattern, pms) == 0 ) {
//debug(cerr, new_pattern);
//cerr << new_pattern.no_elmts();
patternFit = ht.fast_fit(new_pattern);
if ( patternFit >= 0 ) {
//MESSAGE(cerr, " fit");
bs[i] -> set_g_value(patternFit);
break;
} else {
//MESSAGE(cerr, " fail");
fails++;
}
}
if ( patternFit == -1 ) {
//MESSAGE(cerr, "BACKTRACK");
if ( i <= 0 ) break;
i--;
//fails = 0;
if ( num_backtracks > bs.no_buckets() ) {
return -2;
} else
num_backtracks++;
} else {
//MESSAGE(cerr, "increment i");
i++;
}
}
return ( patternFit >= 0 ) ? 0 : -1;
}
int verify(buckets& bs, mphf_hash_table& ht, params& pms)
{
int i;
int_pattern new_pattern(bs.max_bucket_sz());
//debug(cerr, ht.num_filled_slots());
//debug(cerr, ht.no_slots());
//debug(cerr, pms.n);
if ( ht.num_filled_slots() != ht.no_slots() ) {
MESSAGE(cerr, "panic: hash table not full or 'too' full");
MESSAGE(cerr, form("filled_slots = %d\n", ht.num_filled_slots()));
MESSAGE(cerr, form("no_slots = %d\n", ht.no_slots()));
return -1;
}
ht.clear();
for ( i = 0; i < bs.no_buckets(); i++ ) {
if ( bs[i] == 0 || bs[i] -> no_keys() == 0 ) continue;
bs.use_current_params(i, new_pattern, pms, true);
//debug(cerr, bs[i] -> orig_pos());
//debug(cerr, new_pattern);
if ( ht.fit_hash_table(new_pattern) != 0 ) {
MESSAGE(cerr, "panic: collision occurred");
return -1 ;
}
}
//debug(cerr, ht.num_filled_slots());
//debug(cerr, ht.no_slots());
if ( ht.num_filled_slots() != ht.no_slots() ) {
MESSAGE(cerr, "panic: hash table not full after test insertion");
MESSAGE(cerr, form("filled_slots = %d\n", ht.num_filled_slots()));
MESSAGE(cerr, form("no_slots = %d\n", ht.no_slots()));
return -1;
} else {
MESSAGE(cerr, "verifying OK");
return 0;
}
}
int write_spec(buckets& bs, params& pms, buffer& mphf_buffer)
{
unsigned int gv_bits = 0;
if ( pms.v_n > 0 ) {
gv_bits = (int)(flog2(pms.v_n)) + 1; /* bits of each g value.*/
if ( floor(flog2(pms.v_n)) < flog2(pms.v_n) )
gv_bits++;
}
int uints_of_cmpat_gv = gv_bits * pms.v_b;
if ( uints_of_cmpat_gv % BITS_IN(unsigned) > 0 )
uints_of_cmpat_gv += BITS_IN(unsigned);
uints_of_cmpat_gv /= BITS_IN(unsigned);
unsigned int *c_array = new unsigned[uints_of_cmpat_gv];
compact(bs, c_array, gv_bits, mphf_buffer.get_swap_order());
unsigned int g_array_bytes = sizeof(unsigned int) * uints_of_cmpat_gv;
int spec_bytes = 7 * ( sizeof(unsigned int) + 1) + g_array_bytes + 1;
mphf_buffer.expand_chunk(spec_bytes);
ostringstream fout(mphf_buffer.get_base(), ios::out);
fout << pms.v_n << "\n";
fout << pms.v_b << "\n";
fout << pms.v_p1 << "\n";
fout << pms.v_p2 << "\n";
fout << pms.v_r << "\n";
fout << pms.v_seed << "\n";
fout << g_array_bytes << '\t';
fout.write((char*)c_array, g_array_bytes);
fout << '\n';
int fout_len = fout.str().size();
mphf_buffer.set_content_sz(fout_len);
memcpy(mphf_buffer.get_base(), fout.str().c_str(), fout_len);
delete [] c_array;
return 0;
}
int compact(buckets& bs, unsigned s[], int t, Boolean swap)
{
int target, k, i, remaining_bits, branch = 0;
unsigned unsigned_g, high_part_bits;
unsigned lower_part_bits = 0;
remaining_bits = BITS_IN(unsigned);
k = target = 0;
unsigned* y = new unsigned[bs.no_buckets()];
for ( i = 0; i < bs.no_buckets(); y[i++] = 0 );
for ( i = 0; i < bs.no_buckets(); i++ ) {
if ( bs[i] && bs[i] -> no_keys() > 0 ) {
y[bs[i] -> orig_pos()] =
((bs[i] -> g_value()) << 1) + bs[i] -> control_bit();
/*
cerr << bs[i] -> orig_pos() << " ";
cerr << bs[i] -> g_value() << " ";
cerr << bs[i] -> control_bit() << " ";
cerr << y[bs[i] -> orig_pos()] << "\n";
*/
}
}
/*
MESSAGE(cerr, "=======BIT ARRAY:");
debug(cerr, bs.no_buckets());
for ( i = 0; i < bs.no_buckets(); i++ )
cerr << i << " " << y[i] << "\n";
cerr << "=======\n";
*/
//MESSAGE(cerr, "=======BIT ARRAY (before swap):");
for ( i = 0; i < bs.no_buckets(); i++ ) {
unsigned_g = y[i];
/*
debug(cerr, i);
debug(cerr, hex(unsigned_g));
*/
/*
debug(cerr, form("%x", c_bit));
debug(cerr, "=====");
*/
if (remaining_bits >= t ) {
unsigned_g <<= (remaining_bits -t);
target |= unsigned_g;
remaining_bits -= t;
branch = 0;
} else {
high_part_bits = getbits(unsigned_g,t,remaining_bits);
lower_part_bits = unsigned_g & ~(~0 << (t-remaining_bits));
lower_part_bits <<= (BITS_IN(unsigned)- (t-remaining_bits));
s[k++] = target | high_part_bits;
#ifdef PORTABLE_DB
if ( swap == true )
ORDER_SWAP_UINT(s[k-1]);
#endif
target = lower_part_bits;
remaining_bits = BITS_IN(unsigned) - ( t - remaining_bits );
branch =1;
}
}
if ( bs.no_buckets() > 0 ) {
s[k] = ( branch == 0 ) ? target : lower_part_bits;
#ifdef PORTABLE_DB
if ( swap == true )
ORDER_SWAP_UINT(s[k]);
#endif
}
/*
MESSAGE(cerr, "=======BIT ARRAY (after swap):");
debug(cerr, k+1);
for ( i = 0; i <= k; i++ )
cerr << i << " " << s[i] << "\n";
cerr << "=======\n";
*/
delete [] y;
return 0;
}
int wc(char* file_name, unsigned int& lines, unsigned int& max_length)
{
char buf[BUFSIZ];
fstream in(file_name, ios::in);
if ( !in ) {
MESSAGE(cerr, "can not open key file");
throw(streamException(in.rdstate()));
}
lines = 0;
max_length = 0;
while ( in.getline(buf, BUFSIZ) ) {
max_length = MAX(strlen(buf)-1, max_length);
lines++;
}
if ( lines == 0 ) {
MESSAGE(cerr, "empty key file");
return -1;
} else
return 0;
}

View File

@@ -0,0 +1,81 @@
/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/*
* $XConsortium: mphf_funcs.h /main/4 1996/08/21 15:52:10 drk $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#ifndef _mphf_funcs_h
#define _mphf_funcs_h 1
#ifdef C_API
#include "utility/c_stringstream.h"
#else
#include <sstream>
#endif
#include "utility/funcs.h"
#include "utility/buffer.h"
#include "hmphf/buckets.h"
#include "utility/pm_random.h"
#include "hmphf/mphf_hash_table.h"
#include "hmphf/pattern.h"
int compute_a_mphf(char** keys, params& params_ptr, buffer& mphf_spec_buffer);
int search(buckets& bs, mphf_hash_table& ht, params& pms);
int verify(buckets& bs, mphf_hash_table& ht, params& pms);
int write_spec(buckets& bs, params& pms, buffer& mphf_spec_buffer);
int compact(buckets& bs, unsigned s[], int t, Boolean swap);
int wc(char* file_name, unsigned int& lines, unsigned int& max_length);
#endif

View File

@@ -0,0 +1,168 @@
/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/*
* $TOG: mphf_hash_table.C /main/4 1998/04/17 11:50:23 mgreess $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#include "hmphf/mphf_hash_table.h"
#define FULL 1
mphf_hash_table::mphf_hash_table(params& params_ptr) :
v_no_slots(params_ptr.v_n), v_num_filled_slots(0)
{
v_rep = new char[v_no_slots];
v_random_table = new int[v_no_slots];
v_map_table = new int[v_no_slots];
clear();
}
mphf_hash_table::~mphf_hash_table()
{
delete [] v_rep ;
delete [] v_random_table ;
delete [] v_map_table ;
}
void mphf_hash_table::clear()
{
int i;
//, right;
pm_random pm(19);
for ( i=0; i<v_no_slots; i++ ) {
v_rep[i] = 0;
}
for ( i=0; i<v_no_slots; i++ ) {
v_random_table[i] = i;
}
for ( i = 0; i < v_no_slots; i++) {
//right = pm.rand() % ( v_no_slots - i) + i;
int_swap( v_random_table[pm.rand() % ( v_no_slots - i) + i],
v_random_table[i]
);
}
for ( i = 0; i < v_no_slots; i++)
v_map_table[v_random_table[i]] = i;
v_num_filled_slots = 0;
}
int mphf_hash_table::fast_fit(int_pattern& pat)
{
int ok;
int i, j, alignment, landing_slot;
int right_rdm_tbl_index, left_rdm_tbl_cnt;
for ( i = v_num_filled_slots; i < v_no_slots; i++ ) {
ok = 0;
/**************************/
/* compute the alignment */
/**************************/
alignment = (v_no_slots + v_random_table[i] - pat[0])
% v_no_slots;
/**************************/
/* test fit the pattern */
/**************************/
for ( j=1; j<pat.no_elmts(); j++ ) {
landing_slot = (pat[j] + alignment) % v_no_slots;
if ( v_rep[landing_slot] == FULL ) {
ok = -1;
break; // try another alignment
}
}
/**************************/
/* really fit the pattern */
/**************************/
if ( ok == 0 ) {
for ( j=0; j < pat.no_elmts(); j++ ) {
landing_slot = (pat[j] + alignment) % v_no_slots;
v_rep[landing_slot] = FULL ;
right_rdm_tbl_index = v_map_table[landing_slot];
left_rdm_tbl_cnt = v_random_table[v_num_filled_slots + j];
v_random_table[right_rdm_tbl_index] = left_rdm_tbl_cnt;
v_map_table[left_rdm_tbl_cnt] = right_rdm_tbl_index;
v_random_table[v_num_filled_slots + j] = landing_slot;
v_map_table[landing_slot] = v_num_filled_slots + j;
}
v_num_filled_slots += pat.no_elmts();
return alignment;
}
}
return -1;
}
int mphf_hash_table::fit_hash_table(int_pattern& pat)
{
int i, j;
for ( i=0; i<pat.no_elmts(); i++ ) {
if ( v_rep[int(pat[i])] != (int) 0 ) {
for ( j=0; j<=i; j++ )
v_rep[int(pat[j])] = (int) 0;
v_num_filled_slots -= i+1;
return -1;
} else {
v_rep[int(pat[i])] = FULL;
v_num_filled_slots++;
}
}
return 0;
}

View File

@@ -0,0 +1,81 @@
/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/*
* $XConsortium: mphf_hash_table.h /main/3 1996/06/11 17:20:23 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#ifndef _mphf_hash_table_h
#define _mphf_hash_table_h 1
#include "pattern.h"
#include "utility/funcs.h"
#include "utility/pm_random.h"
#include "hmphf/params.h"
class mphf_hash_table {
public:
mphf_hash_table(params& params_ptr);
~mphf_hash_table();
void clear();
int fast_fit(int_pattern& pat);
int fit_hash_table(int_pattern& pat);
int num_filled_slots() { return v_num_filled_slots; };
int no_slots() { return v_no_slots; };
protected:
int* v_map_table;
int* v_random_table;
char* v_rep;
int v_no_slots;
int v_num_filled_slots;
};
#endif

View File

@@ -0,0 +1,103 @@
/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/*
* $XConsortium: params.cc /main/4 1996/07/18 14:33:30 drk $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#include "hmphf/params.h"
struct partition_t {
int upper_bound;
float bits;
float p1;
float p2;
} partition_tbl [] = {
{100, 3.2, 0.52, 0.31}, {1000, 3.4, 0.55, 0.3}, {10000, 3.4, 0.57, 0.29},
{MAXINT, 4.2, 0.58, 0.3}
};
void params::select_value(float bts)
{
unsigned int i;
for ( i=0; ; i++ ) {
if ( (int)v_n <= partition_tbl[i].upper_bound )
break;
}
if ( bts == 0.0 )
v_bits = partition_tbl[i].bits;
else
v_bits = bts;
v_r = 0;
v_b = ( v_n > 0 ) ? (int)( float(v_n*v_bits) / (1.0+flog2(v_n)) ) : 0;
v_p1 = (int)( partition_tbl[i].p1 * v_n);
v_p2 = (int)( partition_tbl[i].p2 * v_b);
if ( v_p1 == 0 ) v_p1++;
if ( v_p2 == 0 ) v_p2++;
v_seed = 1;
}
void params::re_select_value()
{
select_value(v_bits+0.2);
}
ostream& operator<<(ostream& out, params& pms)
{
debug(out, pms.v_n);
debug(out, pms.v_b);
debug(out, pms.v_p1);
debug(out, pms.v_p2);
debug(out, (int)pms.v_bits);
debug(out, pms.v_r);
debug(out, pms.v_seed);
return out;
}

View File

@@ -0,0 +1,83 @@
/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/*
* $XConsortium: params.h /main/3 1996/06/11 17:20:32 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#ifndef _params_h
#define _params_h 1
#include <limits.h>
#if defined(CSRG_BASED)
#define MAXINT INT_MAX
#else
#include <values.h>
#endif
#include "utility/funcs.h"
class params
{
public:
float v_bits;
unsigned int v_p1;
unsigned int v_p2;
unsigned int v_b; /* no. of buckets */
unsigned int v_n; /* no. of keys */
unsigned int v_r; /* offset into the random number table */
unsigned int v_seed; /* seed to the random number table generator */
params() {};
~params() {};
void select_value(float bts = 0.0);
void re_select_value();
friend ostream& operator<<(ostream&, params&);
};
#endif

View File

@@ -0,0 +1,75 @@
/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/*
* $XConsortium: pattern.cc /main/3 1996/06/11 17:20:37 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#include "pattern.h"
int_pattern::int_pattern(int i): v_rep_size(i), v_count(0)
{
v_rep = new int[i];
}
int_pattern::~int_pattern()
{
delete [] v_rep;
}
int int_pattern::duplicate()
{
int i,j;
for ( i=0; i<no_elmts()-1; i++ ) {
for ( j=i+1; j<no_elmts(); j++ ) {
if ( (*this)[i] == (*this)[j] )
return -1;
}
}
return 0;
}

View File

@@ -0,0 +1,82 @@
/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/*
* $XConsortium: pattern.h /main/3 1996/06/11 17:20:42 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#ifndef _int_pattern_h
#define _int_pattern_h 1
#include "utility/funcs.h"
#include "utility/debug.h"
class int_pattern
{
public:
int_pattern(int size = 0);
virtual ~int_pattern();
void reset_elmts(int i) { v_count = i; };
int no_elmts() { return v_count; };
int operator [](int i) { return v_rep[i]; };
void insert(int elmt, int i) { v_rep[i] = elmt; };
int duplicate(); // duplication check.
friend ostream& operator <<(ostream&, int_pattern&);
protected:
unsigned int v_rep_size ;
unsigned int v_count ;
int *v_rep;
};
#endif

View File

@@ -0,0 +1,326 @@
/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/*
* $XConsortium: sorter.cc /main/3 1996/06/11 17:20:47 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#include "hmphf/sorter.h"
/*#define NUM_BUCKETS 10 */
#define NUM_BUCKETS 5000
sorter::sorter(char* key_file) :
v_max_bucket_sz(0),
v_no_unique_keys(0),
v_unique_keys(0),
b_convertor(NUM_BUCKETS, 128)
{
fstream in(key_file, ios::in);
_init(in);
}
sorter::sorter(istream& in) :
v_max_bucket_sz(0),
v_no_unique_keys(0),
v_unique_keys(0),
b_convertor(NUM_BUCKETS, 128)
{
_init(in);
}
void sorter::_init(istream& in)
{
v_bucket_array = new bucketPtr[NUM_BUCKETS];
int i;
for ( i=0; i<NUM_BUCKETS; v_bucket_array[i++] = 0);
char key_buf[LBUFSIZ];
int k;
while ( in.getline(key_buf, LBUFSIZ) ) {
i = b_convertor.atoi(key_buf, strlen(key_buf), 0, NUM_BUCKETS);
if ( v_bucket_array[i] == 0 ) {
v_bucket_array[i] = new bucket(key_buf, i, true);
k = 1;
} else {
k = v_bucket_array[i] -> add_key(key_buf, true);
}
v_max_bucket_sz = MAX(v_max_bucket_sz, k);
in.getline(key_buf, LBUFSIZ); // skip the next info field
}
filter_by_hash();
assemble_unique_keys();
}
sorter::~sorter()
{
int i;
for ( i=0; i<NUM_BUCKETS; delete v_bucket_array[i++] );
delete [] v_bucket_array;
for ( i=0; i<v_no_unique_keys; delete v_unique_keys[i++] );
delete [] v_unique_keys;
}
void sorter::filter_by_hash()
{
// a small hash table for keys to map to
v_map_table = new charPtr[2*v_max_bucket_sz];
int i;
for ( i=0; i<2*v_max_bucket_sz; v_map_table[i++] = 0 );
// an int table remembering slots in the v_map_table
// that have been mapped.
v_index_table = new int[v_max_bucket_sz];
// a charPtr table remembering possibly duplicated keys
v_check_table = new charPtr[v_max_bucket_sz];
// a charPtr table remembering dupcated keys
v_dup_table = new charPtr[v_max_bucket_sz];
for ( i=0; i<NUM_BUCKETS; i++ ) {
if ( v_bucket_array[i] )
filter_a_bucket(v_bucket_array[i]);
}
delete [] v_map_table;
delete [] v_check_table;
delete [] v_index_table;
delete [] v_dup_table;
}
void sorter::filter_a_bucket(bucketPtr bkt)
{
slist_void_ptr_cell *lp = bkt -> key_ptr;
while ( lp ) {
remove_keys(bkt, (char*)(lp -> void_ptr()), lp);
lp = (slist_void_ptr_cell*)(lp -> v_succ);
}
}
void sorter::remove_keys(bucketPtr bkt, char* key, slist_void_ptr_cell* lp)
{
//debug(cerr, key);
slist_void_ptr_cell *llp = lp;
while ( llp && llp -> v_succ ) {
slist_void_ptr_cell *next_lp =
(slist_void_ptr_cell*)llp -> v_succ;
//debug(cerr, (char*)(next_lp -> void_ptr()));
if ( strcmp( key, (char*)(next_lp -> void_ptr()) ) == 0 ) {
//MESSAGE(cerr, "rmove key:");
//cerr << (char*)(next_lp -> void_ptr()) << "\n";
llp -> v_succ = next_lp -> v_succ;
delete (char*)(next_lp -> void_ptr());
delete next_lp;
bkt -> v_no_keys --;
} else
llp = (slist_void_ptr_cell*)(llp -> v_succ);
}
}
//
//
// int n_chk = 0;
// int n_idx = 0;
//
// slist_void_ptr_cell *lp = bkt -> key_ptr;
//
// while ( lp ) {
//
//// maintaint the order of key chains in v_check_table[]!!!
//
// char* key2 = ((char*)(lp -> void_ptr()));
//
// int hash = b_convertor.atoi(
// key2, strlen(key2),
// 1, 2*v_max_bucket_sz
// );
//
// if ( v_map_table[hash] != 0 ) {
//
// if ( v_map_table[hash] != (charPtr)1 ) {
// v_check_table[n_chk++] = v_map_table[hash];
// v_map_table[hash] = (charPtr)1;
// // set to occupied. so that the same
// // key will be in the v_check_table once.
// // assume 1 will never be the address
// // of any keys.
// }
//
// v_check_table[n_chk++] = key2;
//
// } else {
// //v_map_table[hash] = ((Ostring*)(lp -> void_ptr())) -> get();
// v_map_table[hash] = ((char*)(lp -> void_ptr()));
// }
//
// v_index_table[n_idx++] = hash; // remember the slot being set
//
// lp = (slist_void_ptr_cell*)(lp -> succ);
// }
////debug(cerr, n_chk);
//
//// double check possiblly collided keys
// int n_dup = 0;
// for ( int u=0; u<n_chk-1; u++ ) {
// for (int v=u+1; v<n_chk; v++ ) {
// if ( v_check_table[u] && v_check_table[v] &&
// strcmp(v_check_table[u], v_check_table[v]) == 0 )
// {
//// mark v as a duplicated key
// v_dup_table[n_dup++] = v_check_table[v];
// v_check_table[v] = 0;
// }
//
// }
// }
//
////debug(cerr, n_dup);
//
//// remove the duplicates
// if ( n_dup > 0 ) {
// slist_void_ptr_cell *lp = bkt -> key_ptr;
// slist_void_ptr_cell *prev_lp = lp;
//
// int u = 0;
// lp = (slist_void_ptr_cell*)(lp -> succ);
//
// while ( lp && u<n_dup ) {
//
//
///*
//debug(cerr, v_dup_table[u]);
//debug(cerr, int(v_dup_table[u]));
//debug(cerr, (char*)(lp -> void_ptr()));
//debug(cerr, int((char*)(lp -> void_ptr())));
//*/
//
//
// if ( v_dup_table[u] == ((char*)(lp -> void_ptr())) )
// {
//
//MESSAGE(cerr, "key removed:");
//debug(cerr, v_dup_table[u]);
//
// prev_lp -> succ = lp -> succ;
// bkt -> v_no_keys --;
// u++;
// } else
// prev_lp = lp;
//
// lp = (slist_void_ptr_cell*)(lp -> succ);
//
// }
// }
//
//// clean v_map_table
// for ( int k=0; k<n_idx; k++ ) {
// v_map_table[v_index_table[k]] = 0;
// }
//
////MESSAGE(cerr, "SOS0");
//}
void sorter::assemble_unique_keys()
{
int i;
for ( i=0; i<NUM_BUCKETS; i++ ) {
if ( v_bucket_array[i] ) {
v_no_unique_keys += v_bucket_array[i] -> v_no_keys;
}
}
v_unique_keys = new charPtr[v_no_unique_keys];
int j = 0;
slist_void_ptr_cell *lp = 0;
for ( i=0; i<NUM_BUCKETS; i++ ) {
if ( v_bucket_array[i] == 0 ) continue;
lp = v_bucket_array[i] -> key_ptr;
while ( lp ) {
v_unique_keys[j++] = ((char*)(lp -> void_ptr()));
lp -> set_vptr(0);
lp = (slist_void_ptr_cell*)(lp -> v_succ);
}
}
}
ostream& operator<< (ostream& out, sorter& st)
{
debug(out, st.v_no_unique_keys);
for ( int i=0; i<st.v_no_unique_keys; i++ ) {
out << st.v_unique_keys[i] << "\n";
}
return out;
}

View File

@@ -0,0 +1,101 @@
/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/*
* $XConsortium: sorter.h /main/3 1996/06/11 17:20:52 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#ifndef _sorter_h
#define _sorter_h 1
#include <limits.h>
#if defined(CSRG_BASED)
#define MAXINT INT_MAX
#else
#include <values.h>
#endif
#include "utility/funcs.h"
#include "utility/atoi_fast.h"
#include "hmphf/buckets.h"
class sorter
{
public:
sorter(char* key_file);
sorter(istream& key_stream);
~sorter();
char** unique_keys() { return v_unique_keys; };
int no_unique_keys() { return v_no_unique_keys; };
friend ostream& operator<< (ostream&, sorter&);
protected:
void _init(istream&);
void filter_by_hash();
void filter_a_bucket(bucketPtr bkt);
void assemble_unique_keys();
void remove_keys(bucketPtr bkt, char* key, slist_void_ptr_cell* lp);
protected:
bucketPtr* v_bucket_array;
int v_max_bucket_sz;
int v_no_unique_keys;
char** v_unique_keys;
atoi_fast b_convertor;
charPtr* v_map_table;
int* v_index_table;
charPtr* v_check_table;
charPtr* v_dup_table;
};
#endif