Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

hashtable.cpp

Go to the documentation of this file.
00001 //****************************************************
00002 //  April, 1993, University of Illinois
00003 // Copyright (C) 1993, 1994 Tianlin Wang
00004 /* Copyright (C) 1994-2003 Matvec Development Team. 
00005 
00006   This program is free software; you can redistribute it and/or
00007   modify it under the terms of the GNU Library General Public
00008   License as published by the Free Software Foundation; either
00009   version 2 of the License, or (at your option) any later version.
00010   
00011   This program is distributed in the hope that it will be useful,
00012   but WITHOUT ANY WARRANTY; without even the implied warranty of
00013   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014   Library General Public License for more details.
00015     
00016   You should have received a copy of the GNU Library General Public
00017   License along with this library; if not, write to the Free
00018   Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
00019   MA 02111-1307, USA 
00020 */
00021 
00022 #define HASH_CONST   1482907          // prime close to 2^{20.5}
00023 #include <fstream>
00024 #include "exception.h"
00025 #include "util.h"
00026 #include "hashtable.h"
00027 
00028 namespace matvec {
00029 
00030 HashNode::HashNode(void)
00031 {
00032    data = 0;
00033    datasize = 0;
00034    id = 0;
00035 }
00036 
00037 void HashNode::copyfrom(const HashNode& A)
00038 {
00039    if (this == &A) return;
00040    release();
00041    if (A.data) {
00042       datasize = A.datasize;
00043       if(datasize>0){
00044         data = new char [datasize];
00045       }
00046       else {
00047         data = 0;
00048       }
00049       memcpy(data,A.data,datasize);
00050       id = A.id;
00051    }
00052 }
00053 
00054 void HashNode::resize(const size_t s)
00055 {
00056   if (datasize == s) return;
00057   datasize = s;
00058   if (data) {
00059     delete [] data;
00060     data=0;
00061   }
00062   if(datasize>0){
00063     data = new char [datasize];
00064   }
00065   else {
00066     data = 0;
00067   }
00068   id = 0;
00069 }
00070 
00071 const HashNode& HashNode::operator=(const HashNode& A)
00072 {
00073    copyfrom(A);
00074    return *this;
00075 }
00076 
00077 int HashNode::equal(const char *v)
00078 {
00079    for (unsigned i=0; i<datasize; i++) if (data[i] != v[i]) return 0;
00080    return 1;
00081 }
00082 
00083 void HashNode::insert(const char *v,const size_t ds,const unsigned idno)
00084    // if nodesize is constant, then memory should has been allocated already
00085 {
00086    if (datasize != ds) resize(ds);
00087    memcpy(data,v,datasize);
00088    id = idno;
00089 }
00090 
00091 void HashNode::release(void)
00092 {
00093    if (data) { delete [] data; data = 0;}
00094    datasize=0;
00095 }
00096 
00097 //////////////////////////////////////////////////////////
00098 //   this routine was taken from bibindex.c by
00099 //    Nelson H. F. Beebe <beebe@math.utah.edu>
00100 //  long next_prime(long n)
00101 //  Return the next prime number after n.
00102 /////////////////////////////////////////////////////////
00103 long next_prime(long n)
00104 {
00105    long prime;                  // tentative prime
00106    long factor;                 // prime factor
00107    int is_prime;                // 'prime' is a prime number
00108 
00109    n = (n < 0L) ? -n : n;       // be safe -- force n positive
00110    prime = 2L*(n/2L) + 1L;      // next odd number
00111    is_prime = (prime <= 3L);
00112    while (!is_prime) {
00113       factor = 5L;
00114       is_prime = (prime % 2L) && (prime % 3L);
00115       while (is_prime && (factor*factor <= prime)) {
00116          if ((prime % factor) == 0L)
00117             is_prime = 0;
00118          else if ((prime % (factor + 2L)) == 0L)
00119             is_prime = 0;
00120          else               // factor+4 is divisible by 3 (every 3rd odd is)
00121             factor += 6L;
00122       }
00123       if (!is_prime) prime += 2L;
00124    }
00125    return (prime);
00126 }
00127 
00128 HashTable::HashTable(void)
00129 {
00130    datasize      = 0;
00131    tablesize     = 0;
00132    id_changed    = 0;
00133    ext_tablesize = 0;
00134    act_tablesize = 0;
00135 
00136    hash_table    = 0;
00137    hash_storage  = 0;
00138 }
00139 
00140 HashTable::HashTable(const unsigned n, const size_t s)
00141 {
00142    datasize      = 0;
00143    tablesize     = 0;
00144    id_changed    = 0;
00145    ext_tablesize = 0;
00146 
00147    hash_table    = 0;
00148    hash_storage  = 0;
00149    resize(n,s);
00150 }
00151 
00152 HashTable::HashTable(const unsigned n, const char str[])
00153 {
00154 
00155    // str could be any string
00156    datasize      = 0;
00157    tablesize     = 0;
00158    id_changed    = 0;
00159    ext_tablesize = 0;
00160 
00161    hash_table    = 0;
00162    hash_storage  = 0;
00163    resize(n,0);
00164 }
00165 
00166 void HashTable::copyfrom(const HashTable& A)
00167 {
00168    if (this == &A) return;
00169    resize(A.tablesize,A.datasize);
00170    maxsize(A.ext_tablesize);
00171    id_changed = A.id_changed;
00172    act_tablesize = A.act_tablesize;
00173    HashNode *node;
00174    for (unsigned i=0; i<ext_tablesize; i++) {
00175       node = &A.hash_storage[i];
00176       if (node->data) {
00177          hash_storage[i] = *node;
00178          hash_table[node->id_no()-1] = &hash_storage[i];
00179       }
00180    }
00181 }
00182 
00183 const HashTable& HashTable::operator=(const HashTable& A)
00184 {
00185    copyfrom(A);
00186    return *this;
00187 }
00188 
00189 unsigned HashTable::hash(const char* v, HashTable::hashaction action)
00190 {
00191    register int i;
00192    unsigned long h,new_h,skip;
00193    unsigned strsize;
00194    HashNode *node;
00195 
00196    if (datasize==0) {
00197       strsize = strlen(v)+1;
00198       if (strsize==1) {
00199          warning("HashTable.hash(v): empty v or zero size of v");
00200          return 0;
00201       }
00202    }
00203    else strsize = datasize;
00204    for (h=0, skip=1, i=0; i<strsize; i++) {
00205       h = (h*HASH_CONST + v[i]) % ext_tablesize;
00206       skip += 2*h;
00207    }
00208    node = &hash_storage[h];
00209    for (int ntry=0; ntry<500; ntry++) {
00210       if ( node->id == 0) {                // node is empty
00211          if (action == HashTable::INSERT) {
00212             hash_table[act_tablesize] = node;
00213             act_tablesize++;
00214             node->insert(v,strsize,act_tablesize);
00215             return act_tablesize;
00216          }
00217          else if (action == HashTable::GETIDNO) {
00218             return 0;
00219          }
00220          else {
00221             warning("HashTable.hash(), unknown hash action type");
00222          }
00223       }
00224       else {   // node is not empty
00225          if (node->equal(v)) {
00226             return node->id_no();     // for both INSERT and GETIDNO
00227          }
00228          else {
00229             new_h = (h + skip) % ext_tablesize;
00230             h = (new_h == h) ? (h+1)% ext_tablesize : new_h;
00231             node = &hash_storage[h];
00232          }
00233       }
00234    }
00235    return 0;
00236 }
00237 
00238 unsigned HashTable::insert(const void* vv)
00239 {
00240    if (ext_tablesize==0) {
00241       warning("HashTable.insert(): HashTable is null, do resize() first");
00242       return 0;
00243    }
00244    const char *v = (const char *)vv;
00245    unsigned id;
00246    id = hash(v,HashTable::INSERT);   // return its id of vv in HashTable
00247    if (id == 0) {
00248       unsigned k = act_tablesize;
00249       HashTable TMP;
00250       TMP.copyfrom(*this);
00251       maxsize(static_cast<unsigned>(next_prime(static_cast<long>(ext_tablesize+51))));
00252       for (unsigned i=0; i<k; i++) {
00253          id = hash(TMP.hash_table[i]->data,HashTable::INSERT);
00254          if (id == 0) throw exception("hashtable size too small");
00255       }
00256       id = hash(v,HashTable::INSERT);
00257       if (id == 0) throw exception("hashtable size too small");
00258    }
00259    return id;
00260 }
00261 
00262 void HashTable::change_id(const unsigned oldid,const unsigned newid)
00263 {
00264    ///////////////////////////////////////////////////////////////////////
00265    //  once calling this routine, you must call it for each non-empty
00266    //  HashNode (id starts from 1 to act_tablesize). newif must starts
00267    //  1 to act_tablesize, too.
00268    //  After calling for each non-empty HashNode, then must call reorder()
00269    //  to re-order hash_table so that hash_table[i] points HashNode
00270    //  with id i+1 where i starts from 0 to act_tablesize.
00271    ///////////////////////////////////////////////////////////////////////
00272    if (oldid >act_tablesize || newid>act_tablesize) throw exception("HashTable.change_id(oldid,newid): invalid oldid or newid");
00273    hash_table[oldid-1]->id = newid;
00274    id_changed = 1;
00275 }
00276 
00277 void HashTable::reorder(void)
00278 {
00279    if (id_changed == 0) return;
00280    unsigned i,id;
00281    for (i=0; i<ext_tablesize; i++) {
00282       id = hash_storage[i].id_no();
00283       if (id > 0) hash_table[id-1] = &(hash_storage[i]);
00284    }
00285 }
00286 
00287 unsigned HashTable::get_id(const void *vv)
00288 {
00289    const char *v = (const char *)vv;
00290    return hash(v,HashTable::GETIDNO); // if 0, means it cannot found
00291 }
00292 
00293 const void* HashTable::find(const unsigned id) const
00294 {
00295    if (id == 0 || id > act_tablesize) {
00296      throw exception("HashTable.find(id): range error");
00297    }
00298    return (const void *)(hash_table[id-1]->data); // index starts from 0
00299 }
00300 
00301 HashTable& HashTable::resize(const unsigned n, const size_t s)
00302 {
00303    if (n==0) {
00304       release();
00305       return *this;
00306    }
00307    unsigned i;
00308    unsigned tmpsize = static_cast<unsigned>(next_prime(static_cast<long>(1.30*n + 50.0)));
00309                                                     //  30% of tablesize
00310    if (tmpsize != ext_tablesize || datasize != s) {   // definitely delete them
00311       ext_tablesize = tmpsize;
00312       datasize = s;
00313       if(hash_storage){
00314         delete [] hash_storage;
00315         hash_storage=0;
00316       }
00317       if (ext_tablesize>0){
00318         hash_storage = new HashNode [ext_tablesize];
00319       }
00320       else{
00321         hash_storage = 0;
00322       }
00323       check_ptr(hash_storage);
00324       for (int i=0; i<ext_tablesize; ++i) hash_storage[i].resize(datasize);
00325       if(hash_table){
00326         delete [] hash_table;
00327         hash_table=0;
00328       }
00329       if(ext_tablesize>0){
00330         hash_table = new HashNode*[ext_tablesize];
00331       }
00332       else {
00333         hash_table = 0;
00334       }
00335       check_ptr(hash_table);
00336    }
00337    else {
00338       for (i=0; i<act_tablesize; i++) hash_table[i]->id = 0;
00339    }
00340    tablesize = n;
00341    act_tablesize = 0;
00342    return *this;
00343 }
00344 
00345 void  HashTable::maxsize(const unsigned ms)
00346 {
00347    if (ext_tablesize == ms) return;
00348    ext_tablesize = ms;
00349    if(hash_storage){
00350      delete [] hash_storage;
00351      hash_storage=0;
00352    }
00353    if (ext_tablesize>0){
00354      hash_storage = new HashNode [ext_tablesize];
00355    }
00356    else{
00357      hash_storage = 0;
00358    }
00359    check_ptr(hash_storage);
00360    for (int i=0; i<ext_tablesize; ++i) hash_storage[i].resize(datasize);
00361 
00362    if(hash_table){
00363      delete [] hash_table;
00364      hash_table=0;
00365    }
00366    if(ext_tablesize>0){
00367      hash_table = new HashNode*[ext_tablesize];
00368    }
00369    else {
00370      hash_table = 0;
00371    }
00372    check_ptr(hash_table);
00373    act_tablesize = 0;
00374 }
00375 
00376 void HashTable::release(void)
00377 {
00378    if (hash_storage) {delete [] hash_storage; hash_storage = 0;}
00379    if (hash_table)   {delete [] hash_table; hash_table = 0;}
00380    tablesize     = 0;
00381    datasize      = 0;
00382    ext_tablesize = 0;
00383 }
00384 
00385 void HashTable::save(const char fname[],const int io_mode)
00386 {
00387    std::ofstream hfile;
00388    hfile.open(fname,(OpenModeType)io_mode);
00389    if (!hfile) throw exception(" HashTable::save(): cannot open file");
00390    save_to_disk(hfile,0);
00391    hfile.close();
00392 }
00393 
00394 void HashTable::input(const char fname[])
00395 {
00396    std::ifstream hfile(fname,std::ios::in);
00397    if (!hfile) throw exception(" HashTable::input(): cannot open file");
00398    input_from_disk(hfile);
00399    hfile.close();
00400 }
00401 
00402 /**********************************************************************
00403 * it save the hash_storage and other necessary information into file hfile
00404 * so that they can be loaded into other HashTable objects.
00405 *  hash_storage and hash_table have been released from memory
00406 ***********************************************************************/
00407 void HashTable::save_to_disk(std::ostream& stream,const int relse)
00408 {
00409    unsigned ts,act_ts,ext_ts,tpos;
00410    size_t   ds;
00411    ts = tablesize;
00412    act_ts = act_tablesize;
00413    ext_ts = ext_tablesize;
00414    ds = datasize;
00415 
00416    stream.write((char*)&ts,sizeof(unsigned));
00417    stream.write((char*)&act_ts,sizeof(unsigned));
00418    stream.write((char*)&ext_ts,sizeof(unsigned));
00419    stream.write((char*)&ds,sizeof(size_t));
00420 
00421    HashNode *node;
00422    for (tpos=0; tpos<ext_tablesize; tpos++) {
00423       node = &hash_storage[tpos];
00424       if (node->data) {
00425          stream.write((char*) &tpos,sizeof(unsigned));
00426          stream.write((char*) &(node->id),sizeof(unsigned));
00427          stream.write((char*) &(node->datasize),sizeof(size_t));
00428          stream.write((char*)node->data,node->datasize);
00429       }
00430    }
00431    if (relse) release();
00432 }
00433 
00434 void HashTable::input_from_disk(std::istream& stream)
00435 {
00436    unsigned ts,act_ts,ext_ts,tpos,id;
00437    size_t ds = 0;
00438    ts = act_ts = ext_ts = tpos = id = 0;
00439    stream.read((char*) &ts,sizeof(unsigned));
00440    stream.read((char*) &act_ts,sizeof(unsigned));
00441    stream.read((char*) &ext_ts,sizeof(unsigned));
00442    stream.read((char*) &ds,sizeof(size_t));
00443 
00444    resize(ts,ds);       // allocate momery for hash_storage, hash_table
00445    maxsize(ext_ts);
00446    act_tablesize = act_ts;
00447 
00448    HashNode *node;
00449    for (ts=0; ts<act_tablesize; ts++) {
00450       stream.read((char*) &tpos,sizeof(unsigned));
00451       stream.read((char*) &(id),sizeof(unsigned));
00452       stream.read((char*) &(ds),sizeof(size_t));
00453       node = &hash_storage[tpos];
00454       node->id = id;
00455       if (datasize == 0) {
00456          node->datasize = ds;
00457          if(ds>0){
00458            node->data = new char [ds];
00459          }
00460          else {
00461            node->data = 0;
00462          }
00463       }
00464       stream.read((char*)node->data,ds);
00465       hash_table[id-1] = node;
00466    }
00467 }
00468 
00469 void HashTable::display(displaytype dply)
00470 {
00471    for (unsigned i=0; i<act_tablesize; i++) dply(hash_table[i]->data);
00472 }
00473 }
00474 #undef HASH_CONST

Generated on Thu Jun 16 17:13:44 2005 for Matvec by doxygen1.2.16