/* * CDE - Common Desktop Environment * * Copyright (c) 1993-2012, The Open Group. All rights reserved. * * These libraries and programs are free software; you can * redistribute them and/or modify them under the terms of the GNU * Lesser General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) * any later version. * * These libraries and programs are distributed in the hope that * they will be useful, but WITHOUT ANY WARRANTY; without even the * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU Lesser General Public License for more * details. * * You should have received a copy of the GNU Lesser General Public * License along with these libraries and programs; if not, write * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth * Floor, Boston, MA 02110-1301 USA */ /* * $XConsortium: disk_hash.cc /main/5 1996/07/18 14:28:19 drk $ * * Copyright (c) 1993 HAL Computer Systems International, Ltd. * All rights reserved. Unpublished -- rights reserved under * the Copyright Laws of the United States. USE OF A COPYRIGHT * NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION * OR DISCLOSURE. * * THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE * SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE, * DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE * PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS * INTERNATIONAL, LTD. * * RESTRICTED RIGHTS LEGEND * Use, duplication, or disclosure by the Government is subject * to the restrictions as set forth in subparagraph (c)(l)(ii) * of the Rights in Technical Data and Computer Software clause * at DFARS 252.227-7013. * * HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. * 1315 Dell Avenue * Campbell, CA 95008 * */ #include "diskhash/disk_hash.h" #include "api/transaction.h" extern transaction* g_transac; int dsteps[] = { 2, 3, 5, 7, 11, 13, 17, 21, 23, 29, 31 }; int no_dsteps; #define KPB 20 // keys per bucket #define REHASHS 5 // number of rehashs in a row with // fixed bucket size #define TOP_LEVEL_PARAMS (sizeof(k) + sizeof(p) + sizeof(M) + \ sizeof(n) + sizeof(v)) //static buffer buf(LBUFSIZ); disk_hash::disk_hash(page_storage* store, int prime, int expected_n) : index_agent(), key_store(store), buf(store -> aux_buf()) { if ( g_transac ) { g_transac -> book(store); } buf.set_swap_order(store -> swap_order()); rand_generator.seed(); no_dsteps = sizeof(dsteps) / sizeof(int); init_params(prime, expected_n); bucket_vector = new bucket_array(M+v, store); hash_vector = new void_ptr_array(2*MAX(expected_n, (int) n)); k_vector = new void_ptr_array(M+v); r_vector = new void_ptr_array(M+v); k_vector -> reset_vptr(voidPtr(1)); } disk_hash::~disk_hash() { delete bucket_vector; delete hash_vector; delete k_vector; delete r_vector; } void disk_hash::init_params(int prime, int expected_n) { int pgs = key_store -> pages(); if ( pgs > 0 ) { if ( (*key_store)(1, page_storage::READ) -> count() != 2 ) { throw(stringException("corruptted primary bucket")); } ///////////////////////////////// // read in params from the store ///////////////////////////////// buf.reset(); (*key_store)(1, page_storage::READ) -> get(1, buf); buf.get(k).get(p).get(M).get(n).get(v); } else { /////////////////////////////////////// // init the store. bucket 1 is reserved // for top level function params /////////////////////////////////////// set_p(prime); set_k(rand_generator.rand()); set_M(MAX(1, expected_n/KPB)); set_v(MAX(1, M/4)); set_n(0); key_store -> add_page_frames(1); int slot_num; char* z; (*key_store)(1, page_storage::WRITE) -> alloc_slot(slot_num, TOP_LEVEL_PARAMS, z); if ( slot_num != 1 ) { throw(stringException("corruptted primary bucket")); } sync_params(); } } void disk_hash::sync_params() { buf.reset(); buf.put(k).put(p).put(M).put(n).put(v); (*key_store)(1, page_storage::WRITE) -> update_slot(1, buf); } void disk_hash::clean() { throw(stringException("void disk_hash::clean(): not implemented yet")); } /////////////////////////////////////////////////////////////// // rehash all keys /////////////////////////////////////////////////////////////// Boolean disk_hash::rehash(data_t& w) { //MESSAGE(cerr, "REHASH:"); char tmp_name[PATHSIZ]; snprintf(tmp_name, sizeof(tmp_name), "%s.tmp", key_store -> my_name()); fstream pool(form("%s/%s", key_store -> my_path(), tmp_name), ios::in | ios::out ); for ( int i=0; i count(); i++ ) { pool << bucket_vector -> get_bucket(i); } pool << w; Boolean ok = false; for ( int rehashs=0; rehashs expandWith(delta); r_vector -> expandWith(delta); ////////////////////////////////////////// // expand the buckets and the hash table ////////////////////////////////////////// bucket_vector -> expandWith(MAX(0, M+v - bucket_vector -> count())); hash_vector -> expandWith(MAX(0, 2*n - hash_vector -> count())); ///////////////////// // clean buckets ///////////////////// for ( int i=0; i count(); i++ ) { bucket_vector -> get_bucket(i).remove_all(); } if ( _rehash(pool) == true ) { ok = true; break; } } if ( ok == false ) throw(stringException("rehash() failed")); del_file(tmp_name, key_store -> my_path()); return true; } Boolean disk_hash::_rehash(fstream& pool) { pool.clear(); pool.seekg(0, ios::beg); hash_vector -> reset_vptr(voidPtr(0)); k_vector -> reset_vptr(voidPtr(1)); r_vector -> reset_vptr(voidPtr(0)); set_k(rand_generator.rand()); data_t x((char*)0, 0, 0); while ( pool >> x ) { if ( _insert(x, false) == false ) return false; } return true; } /************************************/ // insert /************************************/ Boolean disk_hash::insert(data_t& w) { if ( _insert(w, true) == false ) throw(stringException("disk_hash::insert() failed")); n++; sync_params(); return true; } Boolean disk_hash::_insert(data_t& w, Boolean rehash_if_fail) { int hash = w.bucket_num(k, p, M); //int hash = w.key.int_key; //debug(cerr, hash); disk_bucket& b = bucket_vector -> get_bucket(hash); int slot_num = b.insert(&w); if ( slot_num != 0 ) { caching(b, w, slot_num); } else { /////////////////////////////////// // insert into the overflow bucket /////////////////////////////////// //MESSAGE(cerr, "INSERT to overflow buckets"); //debug(cerr, hash); for ( hash %= v; hash < (int) v; hash++ ) { disk_bucket& overflowb = bucket_vector -> get_bucket(hash+M); slot_num = overflowb.insert(&w); if ( slot_num != 0 ) { caching(overflowb, w, slot_num); break; } } if ( slot_num == 0 && rehash_if_fail == true ) return rehash(w); } if ( slot_num != 0 ) return true; else return false; } void disk_hash::caching(disk_bucket& b, data_t& w, int slot_num) { //debug(cerr, b.bnum()); //debug(cerr, k_vector -> count()); int kv = int((long)(*k_vector)[b.bnum()]); int rv = int((long)(*r_vector)[b.bnum()]); /////////////////////////////////////////// // cache all keys in the bycket except w. // In fact, only need to cache keys whose // hash vector slots have been updated. // It is to be enhanced. /////////////////////////////////////////// int ind = b.first(); while ( ind != 0 && ind != slot_num ) { data_t* x = b(ind); if ( x ) { hash_vector -> insert( (voidPtr)(size_t)ind, x -> slot_num(kv, rv, p, hash_vector -> count()) ); } delete x; b.next(ind); } //////////////////////////////////////// // cache w. it is always in the cache. // others may be overwritten. //////////////////////////////////////// hash_vector -> insert( (voidPtr)(size_t)slot_num, w.slot_num(kv, rv, p, hash_vector -> count()) ); } /******************************************/ // remove operation /******************************************/ Boolean disk_hash::remove(data_t& w) { int slot_num; disk_bucket* b = 0; if ( member(w, b, slot_num) == true ) { b -> remove(slot_num); n--; sync_params(); return true; } else return false; } Boolean disk_hash::member(data_t& w) { //MESSAGE(cerr, "disk_hash::member():"); //asciiOut(cerr); //MESSAGE(cerr, "+++++++++++++"); disk_bucket* b = 0; int slot_num; return member(w, b, slot_num); } Boolean disk_hash::member(data_t& w, disk_bucket*& b, int& slot_num) const { int hash = w.bucket_num(k, p, M); //int hash = w.key.int_key; //debug(cerr, hash); b = &bucket_vector -> get_bucket(hash); int kv = int((long)(*k_vector)[b -> bnum()]); int rv = int((long)(*r_vector)[b -> bnum()]); slot_num = int((long)(*hash_vector)[w.slot_num(kv, rv, p, hash_vector -> count())]); //debug(cerr, slot_num); if ( b -> member(w, slot_num) == true ) return true; if ( b -> overflow() == true ) { for ( hash %= v; hash < (int) v; hash++ ) { b = &bucket_vector -> get_bucket(hash+M); if ( b -> member(w, slot_num) == true ) { return true; } } } return false; } int disk_hash::first_bucket() { return ( M > 0 ) ? 0 : -1; } disk_bucket* disk_hash::get_bucket(int& ind) { return &bucket_vector -> get_bucket(ind); } void disk_hash::next_bucket(int& ind) { ind = ( ind >= (int)(M+v-1) ) ? -1 : (ind+1); } /*******************************************************/ // print operation /*******************************************************/ ostream& disk_hash::asciiOut(ostream& out) { int ind = first_bucket(); while ( ind != -1 ) { //MESSAGE(cerr, "New Bucket:"); //debug(cerr, ind); disk_bucket* bucket = get_bucket(ind); if ( bucket ) { out << *bucket; } next_bucket(ind); } return out; } istream& disk_hash::asciiIn(istream& in) { data_t actor; while ( in >> actor ) { _insert(actor, true); n++; } sync_params(); return in; } ostream& disk_hash::asciiOut(ostream& out, print_func_ptr_t print_f) { int ind = first_bucket(); while ( ind != -1 ) { disk_bucket* bucket = get_bucket(ind); if ( bucket ) { bucket -> asciiOut(out, print_f); } next_bucket(ind); } return out; } void disk_hash::out_params() { debug(cerr, k); debug(cerr, p); debug(cerr, M); debug(cerr, v); debug(cerr, n); }