00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #define HASH_CONST 1482907 // prime close to 2^{20.5}
00023 #include <fstream>
00024 #include "exception.h"
00025 #include "util.h"
00026 #include "hashtable.h"
00027
00028 namespace matvec {
00029
00030 HashNode::HashNode(void)
00031 {
00032 data = 0;
00033 datasize = 0;
00034 id = 0;
00035 }
00036
00037 void HashNode::copyfrom(const HashNode& A)
00038 {
00039 if (this == &A) return;
00040 release();
00041 if (A.data) {
00042 datasize = A.datasize;
00043 if(datasize>0){
00044 data = new char [datasize];
00045 }
00046 else {
00047 data = 0;
00048 }
00049 memcpy(data,A.data,datasize);
00050 id = A.id;
00051 }
00052 }
00053
00054 void HashNode::resize(const size_t s)
00055 {
00056 if (datasize == s) return;
00057 datasize = s;
00058 if (data) {
00059 delete [] data;
00060 data=0;
00061 }
00062 if(datasize>0){
00063 data = new char [datasize];
00064 }
00065 else {
00066 data = 0;
00067 }
00068 id = 0;
00069 }
00070
00071 const HashNode& HashNode::operator=(const HashNode& A)
00072 {
00073 copyfrom(A);
00074 return *this;
00075 }
00076
00077 int HashNode::equal(const char *v)
00078 {
00079 for (unsigned i=0; i<datasize; i++) if (data[i] != v[i]) return 0;
00080 return 1;
00081 }
00082
00083 void HashNode::insert(const char *v,const size_t ds,const unsigned idno)
00084
00085 {
00086 if (datasize != ds) resize(ds);
00087 memcpy(data,v,datasize);
00088 id = idno;
00089 }
00090
00091 void HashNode::release(void)
00092 {
00093 if (data) { delete [] data; data = 0;}
00094 datasize=0;
00095 }
00096
00097
00098
00099
00100
00101
00102
00103 long next_prime(long n)
00104 {
00105 long prime;
00106 long factor;
00107 int is_prime;
00108
00109 n = (n < 0L) ? -n : n;
00110 prime = 2L*(n/2L) + 1L;
00111 is_prime = (prime <= 3L);
00112 while (!is_prime) {
00113 factor = 5L;
00114 is_prime = (prime % 2L) && (prime % 3L);
00115 while (is_prime && (factor*factor <= prime)) {
00116 if ((prime % factor) == 0L)
00117 is_prime = 0;
00118 else if ((prime % (factor + 2L)) == 0L)
00119 is_prime = 0;
00120 else
00121 factor += 6L;
00122 }
00123 if (!is_prime) prime += 2L;
00124 }
00125 return (prime);
00126 }
00127
00128 HashTable::HashTable(void)
00129 {
00130 datasize = 0;
00131 tablesize = 0;
00132 id_changed = 0;
00133 ext_tablesize = 0;
00134 act_tablesize = 0;
00135
00136 hash_table = 0;
00137 hash_storage = 0;
00138 }
00139
00140 HashTable::HashTable(const unsigned n, const size_t s)
00141 {
00142 datasize = 0;
00143 tablesize = 0;
00144 id_changed = 0;
00145 ext_tablesize = 0;
00146
00147 hash_table = 0;
00148 hash_storage = 0;
00149 resize(n,s);
00150 }
00151
00152 HashTable::HashTable(const unsigned n, const char str[])
00153 {
00154
00155
00156 datasize = 0;
00157 tablesize = 0;
00158 id_changed = 0;
00159 ext_tablesize = 0;
00160
00161 hash_table = 0;
00162 hash_storage = 0;
00163 resize(n,0);
00164 }
00165
00166 void HashTable::copyfrom(const HashTable& A)
00167 {
00168 if (this == &A) return;
00169 resize(A.tablesize,A.datasize);
00170 maxsize(A.ext_tablesize);
00171 id_changed = A.id_changed;
00172 act_tablesize = A.act_tablesize;
00173 HashNode *node;
00174 for (unsigned i=0; i<ext_tablesize; i++) {
00175 node = &A.hash_storage[i];
00176 if (node->data) {
00177 hash_storage[i] = *node;
00178 hash_table[node->id_no()-1] = &hash_storage[i];
00179 }
00180 }
00181 }
00182
00183 const HashTable& HashTable::operator=(const HashTable& A)
00184 {
00185 copyfrom(A);
00186 return *this;
00187 }
00188
00189 unsigned HashTable::hash(const char* v, HashTable::hashaction action)
00190 {
00191 register int i;
00192 unsigned long h,new_h,skip;
00193 unsigned strsize;
00194 HashNode *node;
00195
00196 if (datasize==0) {
00197 strsize = strlen(v)+1;
00198 if (strsize==1) {
00199 warning("HashTable.hash(v): empty v or zero size of v");
00200 return 0;
00201 }
00202 }
00203 else strsize = datasize;
00204 for (h=0, skip=1, i=0; i<strsize; i++) {
00205 h = (h*HASH_CONST + v[i]) % ext_tablesize;
00206 skip += 2*h;
00207 }
00208 node = &hash_storage[h];
00209 for (int ntry=0; ntry<500; ntry++) {
00210 if ( node->id == 0) {
00211 if (action == HashTable::INSERT) {
00212 hash_table[act_tablesize] = node;
00213 act_tablesize++;
00214 node->insert(v,strsize,act_tablesize);
00215 return act_tablesize;
00216 }
00217 else if (action == HashTable::GETIDNO) {
00218 return 0;
00219 }
00220 else {
00221 warning("HashTable.hash(), unknown hash action type");
00222 }
00223 }
00224 else {
00225 if (node->equal(v)) {
00226 return node->id_no();
00227 }
00228 else {
00229 new_h = (h + skip) % ext_tablesize;
00230 h = (new_h == h) ? (h+1)% ext_tablesize : new_h;
00231 node = &hash_storage[h];
00232 }
00233 }
00234 }
00235 return 0;
00236 }
00237
00238 unsigned HashTable::insert(const void* vv)
00239 {
00240 if (ext_tablesize==0) {
00241 warning("HashTable.insert(): HashTable is null, do resize() first");
00242 return 0;
00243 }
00244 const char *v = (const char *)vv;
00245 unsigned id;
00246 id = hash(v,HashTable::INSERT);
00247 if (id == 0) {
00248 unsigned k = act_tablesize;
00249 HashTable TMP;
00250 TMP.copyfrom(*this);
00251 maxsize(static_cast<unsigned>(next_prime(static_cast<long>(ext_tablesize+51))));
00252 for (unsigned i=0; i<k; i++) {
00253 id = hash(TMP.hash_table[i]->data,HashTable::INSERT);
00254 if (id == 0) throw exception("hashtable size too small");
00255 }
00256 id = hash(v,HashTable::INSERT);
00257 if (id == 0) throw exception("hashtable size too small");
00258 }
00259 return id;
00260 }
00261
00262 void HashTable::change_id(const unsigned oldid,const unsigned newid)
00263 {
00264
00265
00266
00267
00268
00269
00270
00271
00272 if (oldid >act_tablesize || newid>act_tablesize) throw exception("HashTable.change_id(oldid,newid): invalid oldid or newid");
00273 hash_table[oldid-1]->id = newid;
00274 id_changed = 1;
00275 }
00276
00277 void HashTable::reorder(void)
00278 {
00279 if (id_changed == 0) return;
00280 unsigned i,id;
00281 for (i=0; i<ext_tablesize; i++) {
00282 id = hash_storage[i].id_no();
00283 if (id > 0) hash_table[id-1] = &(hash_storage[i]);
00284 }
00285 }
00286
00287 unsigned HashTable::get_id(const void *vv)
00288 {
00289 const char *v = (const char *)vv;
00290 return hash(v,HashTable::GETIDNO);
00291 }
00292
00293 const void* HashTable::find(const unsigned id) const
00294 {
00295 if (id == 0 || id > act_tablesize) {
00296 throw exception("HashTable.find(id): range error");
00297 }
00298 return (const void *)(hash_table[id-1]->data);
00299 }
00300
00301 HashTable& HashTable::resize(const unsigned n, const size_t s)
00302 {
00303 if (n==0) {
00304 release();
00305 return *this;
00306 }
00307 unsigned i;
00308 unsigned tmpsize = static_cast<unsigned>(next_prime(static_cast<long>(1.30*n + 50.0)));
00309
00310 if (tmpsize != ext_tablesize || datasize != s) {
00311 ext_tablesize = tmpsize;
00312 datasize = s;
00313 if(hash_storage){
00314 delete [] hash_storage;
00315 hash_storage=0;
00316 }
00317 if (ext_tablesize>0){
00318 hash_storage = new HashNode [ext_tablesize];
00319 }
00320 else{
00321 hash_storage = 0;
00322 }
00323 check_ptr(hash_storage);
00324 for (int i=0; i<ext_tablesize; ++i) hash_storage[i].resize(datasize);
00325 if(hash_table){
00326 delete [] hash_table;
00327 hash_table=0;
00328 }
00329 if(ext_tablesize>0){
00330 hash_table = new HashNode*[ext_tablesize];
00331 }
00332 else {
00333 hash_table = 0;
00334 }
00335 check_ptr(hash_table);
00336 }
00337 else {
00338 for (i=0; i<act_tablesize; i++) hash_table[i]->id = 0;
00339 }
00340 tablesize = n;
00341 act_tablesize = 0;
00342 return *this;
00343 }
00344
00345 void HashTable::maxsize(const unsigned ms)
00346 {
00347 if (ext_tablesize == ms) return;
00348 ext_tablesize = ms;
00349 if(hash_storage){
00350 delete [] hash_storage;
00351 hash_storage=0;
00352 }
00353 if (ext_tablesize>0){
00354 hash_storage = new HashNode [ext_tablesize];
00355 }
00356 else{
00357 hash_storage = 0;
00358 }
00359 check_ptr(hash_storage);
00360 for (int i=0; i<ext_tablesize; ++i) hash_storage[i].resize(datasize);
00361
00362 if(hash_table){
00363 delete [] hash_table;
00364 hash_table=0;
00365 }
00366 if(ext_tablesize>0){
00367 hash_table = new HashNode*[ext_tablesize];
00368 }
00369 else {
00370 hash_table = 0;
00371 }
00372 check_ptr(hash_table);
00373 act_tablesize = 0;
00374 }
00375
00376 void HashTable::release(void)
00377 {
00378 if (hash_storage) {delete [] hash_storage; hash_storage = 0;}
00379 if (hash_table) {delete [] hash_table; hash_table = 0;}
00380 tablesize = 0;
00381 datasize = 0;
00382 ext_tablesize = 0;
00383 }
00384
00385 void HashTable::save(const char fname[],const int io_mode)
00386 {
00387 std::ofstream hfile;
00388 hfile.open(fname,(OpenModeType)io_mode);
00389 if (!hfile) throw exception(" HashTable::save(): cannot open file");
00390 save_to_disk(hfile,0);
00391 hfile.close();
00392 }
00393
00394 void HashTable::input(const char fname[])
00395 {
00396 std::ifstream hfile(fname,std::ios::in);
00397 if (!hfile) throw exception(" HashTable::input(): cannot open file");
00398 input_from_disk(hfile);
00399 hfile.close();
00400 }
00401
00402
00403
00404
00405
00406
00407 void HashTable::save_to_disk(std::ostream& stream,const int relse)
00408 {
00409 unsigned ts,act_ts,ext_ts,tpos;
00410 size_t ds;
00411 ts = tablesize;
00412 act_ts = act_tablesize;
00413 ext_ts = ext_tablesize;
00414 ds = datasize;
00415
00416 stream.write((char*)&ts,sizeof(unsigned));
00417 stream.write((char*)&act_ts,sizeof(unsigned));
00418 stream.write((char*)&ext_ts,sizeof(unsigned));
00419 stream.write((char*)&ds,sizeof(size_t));
00420
00421 HashNode *node;
00422 for (tpos=0; tpos<ext_tablesize; tpos++) {
00423 node = &hash_storage[tpos];
00424 if (node->data) {
00425 stream.write((char*) &tpos,sizeof(unsigned));
00426 stream.write((char*) &(node->id),sizeof(unsigned));
00427 stream.write((char*) &(node->datasize),sizeof(size_t));
00428 stream.write((char*)node->data,node->datasize);
00429 }
00430 }
00431 if (relse) release();
00432 }
00433
00434 void HashTable::input_from_disk(std::istream& stream)
00435 {
00436 unsigned ts,act_ts,ext_ts,tpos,id;
00437 size_t ds = 0;
00438 ts = act_ts = ext_ts = tpos = id = 0;
00439 stream.read((char*) &ts,sizeof(unsigned));
00440 stream.read((char*) &act_ts,sizeof(unsigned));
00441 stream.read((char*) &ext_ts,sizeof(unsigned));
00442 stream.read((char*) &ds,sizeof(size_t));
00443
00444 resize(ts,ds);
00445 maxsize(ext_ts);
00446 act_tablesize = act_ts;
00447
00448 HashNode *node;
00449 for (ts=0; ts<act_tablesize; ts++) {
00450 stream.read((char*) &tpos,sizeof(unsigned));
00451 stream.read((char*) &(id),sizeof(unsigned));
00452 stream.read((char*) &(ds),sizeof(size_t));
00453 node = &hash_storage[tpos];
00454 node->id = id;
00455 if (datasize == 0) {
00456 node->datasize = ds;
00457 if(ds>0){
00458 node->data = new char [ds];
00459 }
00460 else {
00461 node->data = 0;
00462 }
00463 }
00464 stream.read((char*)node->data,ds);
00465 hash_table[id-1] = node;
00466 }
00467 }
00468
00469 void HashTable::display(displaytype dply)
00470 {
00471 for (unsigned i=0; i<act_tablesize; i++) dply(hash_table[i]->data);
00472 }
00473 }
00474 #undef HASH_CONST