Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

hash.h

Go to the documentation of this file.
00001 /*---------------------------------------------------------------------\
00002 |                                                                      |
00003 |                      __   __    ____ _____ ____                      |
00004 |                      \ \ / /_ _/ ___|_   _|___ \                     |
00005 |                       \ V / _` \___ \ | |   __) |                    |
00006 |                        | | (_| |___) || |  / __/                     |
00007 |                        |_|\__,_|____/ |_| |_____|                    |
00008 |                                                                      |
00009 |                               core system                            |
00010 |                                                     (C) 2002 SuSE AG |
00011 \----------------------------------------------------------------------/
00012 
00013    File:       hash.h
00014    Purpose:    declare hash functions, see PHI docu
00015    Author:     Roman Hodek <roman.hodek@informatik.uni-erlangen.de>
00016    Maintainer: Ludwig Nussel <lnussel@suse.de>
00017 
00018    this file is taken from PHI
00019 
00020 /-*/
00021 
00022 #ifndef _PkgDb_hash_h                                        /* -*- C++ -*- */
00023 #define _PkgDb_hash_h
00024 
00025 #include <list>
00026 #include <vector>
00027 #include <string>
00028 #include <iostream>
00029 #include <assert.h>
00030 
00031 const size_t primes[] = { 31, 101, 503, 1009, 2003, 5003 };
00032 
00033 inline size_t hashfun( unsigned p ) { return size_t(p); };
00034 size_t hashfun( const std::string& s );
00035 size_t hashfun( std::string& s );
00036 size_t hashfun( const char * p );
00037 
00038 template<class Key>
00039 struct basicHashElt {
00040         basicHashElt *next;
00041         Key key;
00042         basicHashElt(const Key& k) : next(0), key(k) {};
00043 };
00044 
00045 template<class Key, class list_elem>
00046 class basic_hash {
00047 public:
00048         typedef size_t size_type;
00049         typedef list_elem list_type;
00050         typedef std::vector<list_elem*> vector_type;
00051         typedef size_type (*hashfun_t)( const Key& );
00052 
00053         template<class le>
00054         class hash_iterator {
00055                 friend class basic_hash<Key, list_elem>;
00056                 typedef std::forward_iterator_tag iterator_category;
00057           private:
00058                 le *current;
00059                 size_type adr;
00060                 const vector_type *vec;
00061 
00062           public:
00063                 hash_iterator() : vec(0) {}
00064                 hash_iterator( le *c, size_type a, const vector_type *v )
00065                         : current(c), adr(a), vec(v) {}
00066                 // make it possible to convert an iterator to a const_iterator
00067                 hash_iterator( const hash_iterator<list_elem>& i2 ) {
00068                         const hash_iterator& i3 = (const hash_iterator&)i2;
00069                         current = i3.current;
00070                         adr = i3.adr;
00071                         vec = i3.vec;
00072                 }
00073 
00074                 // The following operators allow to test a hash in a condition if the
00075                 // iterator is defined
00076                 operator const void* () const { return vec; }
00077                 bool operator! () const { return vec == 0; }
00078 
00079                 le& operator* () const {
00080                         assert(vec);
00081                         return *current;
00082                 }
00083                 le* operator-> () const {
00084                         assert(vec);
00085                         return &*current;
00086                 }
00087 
00088            hash_iterator& operator++ () {
00089                    current = (le *)current->next;
00090                    // If -- after stepping forth in the list -- we've reached the end,
00091                    // we have to go to the next bucket.
00092                    if (!current) {
00093                            while (++adr < vec->size())
00094                                    if ((current = (*vec)[adr]))
00095                                            break;
00096                            if (adr == vec->size())
00097                                    // end of vector reached
00098                                    vec = 0;
00099                    }
00100                    return *this;
00101            }
00102            hash_iterator operator++ (int) {
00103                    iterator temp = *this;
00104                    operator++();
00105                    return temp;
00106            }
00107 
00108            bool operator== ( const hash_iterator& x ) const {
00109                    return (!vec && !x.vec) ||
00110                                   (vec && x.vec && current == x.current);
00111            }
00112 
00113            bool operator!= ( const hash_iterator& x ) const {
00114                   return !operator==(x);
00115            }
00116         }; // iterator
00117 
00118         typedef hash_iterator<list_elem> iterator;
00119         typedef hash_iterator<const list_elem> const_iterator;
00120 
00121 protected:
00122         vector_type v;
00123         size_type vsize;      // # of buckets (vector size)
00124         size_type n_elements; // # of stored elements
00125         size_type n_buckets;  // # of used buckets
00126         hashfun_t hf;
00127 private:
00128 
00129         // helper called from copy constructor and operator=
00130         void construct( const basic_hash& S ) {
00131                 hf = S.hf;
00132                 vsize = S.vsize;
00133                 v = vector_type( vsize, 0 );
00134                 n_elements = 0;
00135                 n_buckets = 0;
00136                 for( iterator t = S.begin(); t != S.end(); ++t )
00137                         insert( t );
00138         }
00139 
00140         void resize( size_type new_size ) {
00141                 basic_hash newhash(new_size, hf);
00142                 for( iterator t = begin(); t != end(); ++t )
00143                         newhash.insert( t );
00144                 swap( newhash );
00145         }
00146 
00147         size_type next_size() {
00148                 unsigned i;
00149                 for( i = 0; i < sizeof(primes)/sizeof(*primes)-1; ++i ) {
00150                         if (primes[i] > vsize)
00151                                 break;
00152                 }
00153                 return primes[i];
00154         }
00155 
00156         bool resize_if_needed() {
00157                 // resize hash if average list length exceeds 1.8 = 9/5
00158                 if (5*n_elements >= 9*n_buckets) {
00159                         resize( next_size() );
00160                         return true;
00161                 }
00162                 return false;
00163         }
00164 
00165   public:
00166         basic_hash( size_type size = 31, hashfun_t f = hashfun ) :
00167                 v(size, 0), vsize(size), n_elements(0), n_buckets(0), hf(f) {}
00168         basic_hash( const basic_hash& S ) { construct(S); }
00169         ~basic_hash() { clear(); }
00170         basic_hash& operator= ( const basic_hash& S ) {
00171                 if (this != &S) {
00172                         clear();
00173                         construct(S);
00174                 }
00175                 return *this;
00176         }
00177 
00178         iterator begin() {
00179                 size_type adr = 0;
00180                 while( adr < vsize ) {
00181                         if (!v[adr])
00182                                 ++adr;
00183                         else
00184                                 return iterator(v[adr], adr, &v);
00185                 }
00186                 return iterator();
00187         }
00188         iterator end() { return iterator(); }
00189         const_iterator const_begin() const {
00190                 size_type adr = 0;
00191                 while( adr < vsize ) {
00192                         if (!v[adr])
00193                                 ++adr;
00194                         else
00195                                 return const_iterator(v[adr], adr, &v);
00196                 }
00197                 return const_iterator();
00198         }
00199         const_iterator begin() const { return const_begin(); }
00200         const_iterator const_end() const { return const_iterator(); }
00201         const_iterator end() const { return const_end(); }
00202 
00203         // destruct all contents
00204         void clear() {
00205                 for( size_type i = 0; i < vsize; i++ ) {
00206                         if (list_elem *p = v[i]) {
00207                                 list_elem *pnext;
00208                                 while( p ) {
00209                                         pnext = (list_elem *)p->next;
00210                                         delete p;
00211                                         p = pnext;
00212                                 }
00213                                 v[i] = 0;
00214                         }
00215                 }
00216                 n_elements = 0;
00217                 n_buckets = 0;
00218         }
00219 
00220         list_elem *find( const Key& k, list_elem ***where = 0 ) {
00221                 size_type adr = hf(k) % vsize;
00222                 list_elem **p = &v[adr];
00223                 if (!*p) {
00224                         if (where) {
00225                                 *where = p;
00226                                 ++n_buckets;
00227                         }
00228                         return 0;
00229                 }
00230                 for( ; *p; p = (list_elem **)&((*p)->next) ) {
00231                         if ((*p)->key == k)
00232                                 return *p;
00233                 }
00234                 if (where) *where = p;
00235                 return 0;
00236         }
00237 
00238         const list_elem *find( const Key& k ) const {
00239                 return (const_cast<basic_hash<Key,list_elem>*>(this)->find(k));
00240         }
00241 
00242         bool exists( const Key& k ) const {
00243                 return (const_cast<basic_hash<Key,list_elem>*>(this)->find(k) != 0);
00244         };
00245 
00246         // insert methods for compatibility with STL
00247         list_elem* insert( const Key& k ) {
00248                 list_elem **p;
00249                 list_elem *q;
00250                 if ((q = find( k, &p )))
00251                         return q;
00252                 else {
00253                         *p = new list_elem(k);
00254                         ++n_elements;
00255                         return *p;
00256                 }
00257         }
00258         list_elem* insert( const list_elem *e ) { return insert( e->k ); }
00259         list_elem* insert( const iterator& i )  { return insert( i->k ); }
00260 
00261         bool erase( const Key& k ) {
00262                 size_type adr = hf(k) % vsize;
00263                 list_elem **p = &v[adr];
00264                 if (!*p)
00265                         return false;
00266                 for( ; *p; p = (list_elem **)&((*p)->next) ) {
00267                         if ((*p)->key == k) {
00268                                 list_elem *q = *p;
00269                                 *p = (list_elem *)q->next;
00270                                 delete q;
00271                                 --n_elements;
00272                                 if (!*p) --n_buckets;
00273                                 return true;
00274                         }
00275                 }
00276                 return false;
00277         }
00278         bool erase( const iterator& i ) { return erase( i->k ); }
00279 
00280         size_type size()         const { return n_elements; }
00281         size_type max_size() const { return vsize; }
00282         bool empty()             const { return n_elements == 0; }
00283         void swap( basic_hash& s ) {
00284            v.swap(s.v);
00285            std::swap(n_elements, s.n_elements);
00286            std::swap(n_buckets, s.n_buckets);
00287            std::swap(hf, s.hf);
00288            std::swap(vsize, s.vsize);
00289         }
00290 
00291         void statistics();
00292 };
00293 
00294 
00295 template<class Key>
00296 class noval_hash : public basic_hash<Key,basicHashElt<Key> > {};
00297 
00298 template<class Key, class T>
00299 struct HashElt : public basicHashElt<Key> {
00300         T value;
00301         HashElt(const Key& k, const T& v) : basicHashElt<Key>(k), value(v) {};
00302 };
00303 
00304 template<class Key, class T>
00305 class hash : public basic_hash<Key,HashElt<Key,T> > {
00306         typedef basicHashElt<Key> list_elem;
00307         typedef HashElt<Key,T> list_type;
00308 
00309         // helper called from copy constructor and operator=
00310         void construct( const hash& S ) {
00311                 this->hf = S.hf;
00312                 this->vsize = S.vsize;
00313                 this->v = vector_type( this->vsize, 0 );
00314                 this->n_elements = 0;
00315                 this->n_buckets = 0;
00316                 for( typename basic_hash<Key,HashElt<Key,T> >::const_iterator t = S.begin(); t != S.end(); ++t )
00317                         insert( t );
00318         }
00319 
00320         void resize( typename basic_hash<Key,HashElt<Key,T> >::size_type new_size ) {
00321                 hash newhash(new_size, this->hf);
00322                 for( typename basic_hash<Key,HashElt<Key,T> >::const_iterator t = this->begin(); t != this->end(); ++t )
00323                         newhash.insert( t );
00324                 swap( newhash );
00325         }
00326 
00327 public:
00328         hash( typename basic_hash<Key,HashElt<Key,T> >::size_type size = 31, typename basic_hash<Key,HashElt<Key,T> >::hashfun_t f = hashfun ) :
00329                 basic_hash<Key,HashElt<Key,T> >( size, hashfun ) {};
00330         hash( const hash& S ) { construct(S); }
00331         ~hash() { this->clear(); }
00332         hash& operator= ( const hash& S ) {
00333                 if (this != &S) {
00334                         this->clear();
00335                         construct(S);
00336                 }
00337                 return *this;
00338         }
00339 
00340         T& operator[] ( const Key& k ) {
00341                 return insert( k, T() )->value;
00342         }
00343 
00344         T operator[] ( const Key& k ) const {
00345                 const list_type *p = find(k);
00346                 return p ? p->value : T();
00347         }
00348 
00349         list_type* insert( const Key& k, const T& v ) {
00350                 list_type *q;
00351                 list_type **p;
00352                 if ((q = find( k, &p )))
00353                         return q;
00354                 else {
00355                         *p = new list_type(k,v);
00356                         ++this->n_elements;
00357                         return *p;
00358                 }
00359         }
00360         list_type* insert( const list_type *e ) {
00361                 return insert( e->key, e->value ); }
00362         list_type* insert( const typename hash<Key,T>::iterator& i ) {
00363                 return insert( i->key, i->value ); }
00364         list_type* insert( const typename hash<Key,T>::const_iterator& i ) {
00365                 return insert( i->key, i->value ); }
00366 };
00367 
00368 template <class Key, class list_elem>
00369 void basic_hash<Key, list_elem>::statistics() {
00370         std::cout << this->n_elements << " elements in " << this->n_buckets << "/" << this->vsize
00371                  << " buckets\n";
00372         unsigned free = 0, alloced = 0, maxlen = 0, sumlen = 0;
00373         size_type maxbuck;
00374 
00375         for( size_type adr = 0; adr < this->vsize; ++adr ) {
00376                 if (!v[adr])
00377                         ++free;
00378                 else {
00379                         ++alloced;
00380                         size_type len = 0;
00381                         for( list_elem *p = (list_elem*)v[adr]; p; p = (list_elem*)p->next )
00382                                 ++len;
00383                         if (len > maxlen) {
00384                                 maxlen = len;
00385                                 maxbuck = adr;
00386                         }
00387                         sumlen += len;
00388                 }
00389         }
00390         std::cout << free << " buckets free, " << alloced << " used\n";
00391         std::cout << "avg. list len " << (float)sumlen/alloced <<
00392                 " max len " << maxlen << std::endl;
00393 
00394         std::cout << "Estimated avg. list len " << (float)this->n_elements/this->n_buckets << std::endl;
00395 }
00396 
00397 #endif  /* _PkgDb_hash_h */
00398 
00399 // Local Variables:
00400 // tab-width: 4
00401 // End:

Generated on Fri Feb 24 00:30:02 2006 for liby2util by  doxygen 1.4.4