00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef _PkgDb_hash_h
00023 #define _PkgDb_hash_h
00024
00025 #include <list>
00026 #include <vector>
00027 #include <string>
00028 #include <iostream>
00029 #include <assert.h>
00030
00031 const size_t primes[] = { 31, 101, 503, 1009, 2003, 5003 };
00032
00033 inline size_t hashfun( unsigned p ) { return size_t(p); };
00034 size_t hashfun( const std::string& s );
00035 size_t hashfun( std::string& s );
00036 size_t hashfun( const char * p );
00037
00038 template<class Key>
00039 struct basicHashElt {
00040 basicHashElt *next;
00041 Key key;
00042 basicHashElt(const Key& k) : next(0), key(k) {};
00043 };
00044
00045 template<class Key, class list_elem>
00046 class basic_hash {
00047 public:
00048 typedef size_t size_type;
00049 typedef list_elem list_type;
00050 typedef std::vector<list_elem*> vector_type;
00051 typedef size_type (*hashfun_t)( const Key& );
00052
00053 template<class le>
00054 class hash_iterator {
00055 friend class basic_hash<Key, list_elem>;
00056 typedef std::forward_iterator_tag iterator_category;
00057 private:
00058 le *current;
00059 size_type adr;
00060 const vector_type *vec;
00061
00062 public:
00063 hash_iterator() : vec(0) {}
00064 hash_iterator( le *c, size_type a, const vector_type *v )
00065 : current(c), adr(a), vec(v) {}
00066
00067 hash_iterator( const hash_iterator<list_elem>& i2 ) {
00068 const hash_iterator& i3 = (const hash_iterator&)i2;
00069 current = i3.current;
00070 adr = i3.adr;
00071 vec = i3.vec;
00072 }
00073
00074
00075
00076 operator const void* () const { return vec; }
00077 bool operator! () const { return vec == 0; }
00078
00079 le& operator* () const {
00080 assert(vec);
00081 return *current;
00082 }
00083 le* operator-> () const {
00084 assert(vec);
00085 return &*current;
00086 }
00087
00088 hash_iterator& operator++ () {
00089 current = (le *)current->next;
00090
00091
00092 if (!current) {
00093 while (++adr < vec->size())
00094 if ((current = (*vec)[adr]))
00095 break;
00096 if (adr == vec->size())
00097
00098 vec = 0;
00099 }
00100 return *this;
00101 }
00102 hash_iterator operator++ (int) {
00103 iterator temp = *this;
00104 operator++();
00105 return temp;
00106 }
00107
00108 bool operator== ( const hash_iterator& x ) const {
00109 return (!vec && !x.vec) ||
00110 (vec && x.vec && current == x.current);
00111 }
00112
00113 bool operator!= ( const hash_iterator& x ) const {
00114 return !operator==(x);
00115 }
00116 };
00117
00118 typedef hash_iterator<list_elem> iterator;
00119 typedef hash_iterator<const list_elem> const_iterator;
00120
00121 protected:
00122 vector_type v;
00123 size_type vsize;
00124 size_type n_elements;
00125 size_type n_buckets;
00126 hashfun_t hf;
00127 private:
00128
00129
00130 void construct( const basic_hash& S ) {
00131 hf = S.hf;
00132 vsize = S.vsize;
00133 v = vector_type( vsize, 0 );
00134 n_elements = 0;
00135 n_buckets = 0;
00136 for( iterator t = S.begin(); t != S.end(); ++t )
00137 insert( t );
00138 }
00139
00140 void resize( size_type new_size ) {
00141 basic_hash newhash(new_size, hf);
00142 for( iterator t = begin(); t != end(); ++t )
00143 newhash.insert( t );
00144 swap( newhash );
00145 }
00146
00147 size_type next_size() {
00148 unsigned i;
00149 for( i = 0; i < sizeof(primes)/sizeof(*primes)-1; ++i ) {
00150 if (primes[i] > vsize)
00151 break;
00152 }
00153 return primes[i];
00154 }
00155
00156 bool resize_if_needed() {
00157
00158 if (5*n_elements >= 9*n_buckets) {
00159 resize( next_size() );
00160 return true;
00161 }
00162 return false;
00163 }
00164
00165 public:
00166 basic_hash( size_type size = 31, hashfun_t f = hashfun ) :
00167 v(size, 0), vsize(size), n_elements(0), n_buckets(0), hf(f) {}
00168 basic_hash( const basic_hash& S ) { construct(S); }
00169 ~basic_hash() { clear(); }
00170 basic_hash& operator= ( const basic_hash& S ) {
00171 if (this != &S) {
00172 clear();
00173 construct(S);
00174 }
00175 return *this;
00176 }
00177
00178 iterator begin() {
00179 size_type adr = 0;
00180 while( adr < vsize ) {
00181 if (!v[adr])
00182 ++adr;
00183 else
00184 return iterator(v[adr], adr, &v);
00185 }
00186 return iterator();
00187 }
00188 iterator end() { return iterator(); }
00189 const_iterator const_begin() const {
00190 size_type adr = 0;
00191 while( adr < vsize ) {
00192 if (!v[adr])
00193 ++adr;
00194 else
00195 return const_iterator(v[adr], adr, &v);
00196 }
00197 return const_iterator();
00198 }
00199 const_iterator begin() const { return const_begin(); }
00200 const_iterator const_end() const { return const_iterator(); }
00201 const_iterator end() const { return const_end(); }
00202
00203
00204 void clear() {
00205 for( size_type i = 0; i < vsize; i++ ) {
00206 if (list_elem *p = v[i]) {
00207 list_elem *pnext;
00208 while( p ) {
00209 pnext = (list_elem *)p->next;
00210 delete p;
00211 p = pnext;
00212 }
00213 v[i] = 0;
00214 }
00215 }
00216 n_elements = 0;
00217 n_buckets = 0;
00218 }
00219
00220 list_elem *find( const Key& k, list_elem ***where = 0 ) {
00221 size_type adr = hf(k) % vsize;
00222 list_elem **p = &v[adr];
00223 if (!*p) {
00224 if (where) {
00225 *where = p;
00226 ++n_buckets;
00227 }
00228 return 0;
00229 }
00230 for( ; *p; p = (list_elem **)&((*p)->next) ) {
00231 if ((*p)->key == k)
00232 return *p;
00233 }
00234 if (where) *where = p;
00235 return 0;
00236 }
00237
00238 const list_elem *find( const Key& k ) const {
00239 return (const_cast<basic_hash<Key,list_elem>*>(this)->find(k));
00240 }
00241
00242 bool exists( const Key& k ) const {
00243 return (const_cast<basic_hash<Key,list_elem>*>(this)->find(k) != 0);
00244 };
00245
00246
00247 list_elem* insert( const Key& k ) {
00248 list_elem **p;
00249 list_elem *q;
00250 if ((q = find( k, &p )))
00251 return q;
00252 else {
00253 *p = new list_elem(k);
00254 ++n_elements;
00255 return *p;
00256 }
00257 }
00258 list_elem* insert( const list_elem *e ) { return insert( e->k ); }
00259 list_elem* insert( const iterator& i ) { return insert( i->k ); }
00260
00261 bool erase( const Key& k ) {
00262 size_type adr = hf(k) % vsize;
00263 list_elem **p = &v[adr];
00264 if (!*p)
00265 return false;
00266 for( ; *p; p = (list_elem **)&((*p)->next) ) {
00267 if ((*p)->key == k) {
00268 list_elem *q = *p;
00269 *p = (list_elem *)q->next;
00270 delete q;
00271 --n_elements;
00272 if (!*p) --n_buckets;
00273 return true;
00274 }
00275 }
00276 return false;
00277 }
00278 bool erase( const iterator& i ) { return erase( i->k ); }
00279
00280 size_type size() const { return n_elements; }
00281 size_type max_size() const { return vsize; }
00282 bool empty() const { return n_elements == 0; }
00283 void swap( basic_hash& s ) {
00284 v.swap(s.v);
00285 std::swap(n_elements, s.n_elements);
00286 std::swap(n_buckets, s.n_buckets);
00287 std::swap(hf, s.hf);
00288 std::swap(vsize, s.vsize);
00289 }
00290
00291 void statistics();
00292 };
00293
00294
00295 template<class Key>
00296 class noval_hash : public basic_hash<Key,basicHashElt<Key> > {};
00297
00298 template<class Key, class T>
00299 struct HashElt : public basicHashElt<Key> {
00300 T value;
00301 HashElt(const Key& k, const T& v) : basicHashElt<Key>(k), value(v) {};
00302 };
00303
00304 template<class Key, class T>
00305 class hash : public basic_hash<Key,HashElt<Key,T> > {
00306 typedef basicHashElt<Key> list_elem;
00307 typedef HashElt<Key,T> list_type;
00308
00309
00310 void construct( const hash& S ) {
00311 this->hf = S.hf;
00312 this->vsize = S.vsize;
00313 this->v = vector_type( this->vsize, 0 );
00314 this->n_elements = 0;
00315 this->n_buckets = 0;
00316 for( typename basic_hash<Key,HashElt<Key,T> >::const_iterator t = S.begin(); t != S.end(); ++t )
00317 insert( t );
00318 }
00319
00320 void resize( typename basic_hash<Key,HashElt<Key,T> >::size_type new_size ) {
00321 hash newhash(new_size, this->hf);
00322 for( typename basic_hash<Key,HashElt<Key,T> >::const_iterator t = this->begin(); t != this->end(); ++t )
00323 newhash.insert( t );
00324 swap( newhash );
00325 }
00326
00327 public:
00328 hash( typename basic_hash<Key,HashElt<Key,T> >::size_type size = 31, typename basic_hash<Key,HashElt<Key,T> >::hashfun_t f = hashfun ) :
00329 basic_hash<Key,HashElt<Key,T> >( size, hashfun ) {};
00330 hash( const hash& S ) { construct(S); }
00331 ~hash() { this->clear(); }
00332 hash& operator= ( const hash& S ) {
00333 if (this != &S) {
00334 this->clear();
00335 construct(S);
00336 }
00337 return *this;
00338 }
00339
00340 T& operator[] ( const Key& k ) {
00341 return insert( k, T() )->value;
00342 }
00343
00344 T operator[] ( const Key& k ) const {
00345 const list_type *p = find(k);
00346 return p ? p->value : T();
00347 }
00348
00349 list_type* insert( const Key& k, const T& v ) {
00350 list_type *q;
00351 list_type **p;
00352 if ((q = find( k, &p )))
00353 return q;
00354 else {
00355 *p = new list_type(k,v);
00356 ++this->n_elements;
00357 return *p;
00358 }
00359 }
00360 list_type* insert( const list_type *e ) {
00361 return insert( e->key, e->value ); }
00362 list_type* insert( const typename hash<Key,T>::iterator& i ) {
00363 return insert( i->key, i->value ); }
00364 list_type* insert( const typename hash<Key,T>::const_iterator& i ) {
00365 return insert( i->key, i->value ); }
00366 };
00367
00368 template <class Key, class list_elem>
00369 void basic_hash<Key, list_elem>::statistics() {
00370 std::cout << this->n_elements << " elements in " << this->n_buckets << "/" << this->vsize
00371 << " buckets\n";
00372 unsigned free = 0, alloced = 0, maxlen = 0, sumlen = 0;
00373 size_type maxbuck;
00374
00375 for( size_type adr = 0; adr < this->vsize; ++adr ) {
00376 if (!v[adr])
00377 ++free;
00378 else {
00379 ++alloced;
00380 size_type len = 0;
00381 for( list_elem *p = (list_elem*)v[adr]; p; p = (list_elem*)p->next )
00382 ++len;
00383 if (len > maxlen) {
00384 maxlen = len;
00385 maxbuck = adr;
00386 }
00387 sumlen += len;
00388 }
00389 }
00390 std::cout << free << " buckets free, " << alloced << " used\n";
00391 std::cout << "avg. list len " << (float)sumlen/alloced <<
00392 " max len " << maxlen << std::endl;
00393
00394 std::cout << "Estimated avg. list len " << (float)this->n_elements/this->n_buckets << std::endl;
00395 }
00396
00397 #endif
00398
00399
00400
00401