// ============================================================================ // // = LIBRARY // ULib - c++ library // // = FILENAME // hash_map.cpp // // = AUTHOR // Stefano Casazza // // ============================================================================ #include #include bool UHashMap::istream_loading; UStringRep* UHashMap::pkey; UHashMap::UHashMap(uint32_t n, bPFptpcu _set_index) { U_TRACE_REGISTER_OBJECT(0, UHashMap, "%u,%p", n, _set_index) set_index = _set_index; init(n); } UHashMap::UHashMap(uint32_t n, bool ignore_case) { U_TRACE_REGISTER_OBJECT(0, UHashMap, "%u,%b", n, ignore_case) set_index = (ignore_case ? setIndexIgnoreCase : setIndex); init(n); } void UHashMap::allocate(uint32_t n) { U_TRACE(0, "UHashMap::allocate(%u)", n) U_CHECK_MEMORY if (_capacity) _deallocate(); _allocate(n); } void UHashMap::lookup(const UStringRep* keyr) { U_TRACE(0, "UHashMap::lookup(%V)", keyr) U_CHECK_MEMORY U_INTERNAL_ASSERT_POINTER(set_index) U_INTERNAL_ASSERT_MAJOR(_capacity, 0) const UStringRep* keyn; uint32_t sz1 = keyr->size(); const char* ptr1 = keyr->data(); bool ignore_case = set_index(this, ptr1, sz1); U_INTERNAL_DUMP("index = %u ignore_case = %b", index, ignore_case) U_INTERNAL_ASSERT_MINOR(index, _capacity) for (node = table[index]; node; node = node->next) { U_INTERNAL_ASSERT_POINTER(node->key) uint32_t sz2 = (keyn = node->key)->size(); U_INTERNAL_DUMP("node->key(%u) = %p %V", sz2, keyn, keyn) U_INTERNAL_ASSERT_MAJOR(sz2, 0) if (UStringRep::equal_lookup(keyr, ptr1, sz1, keyn, sz2, ignore_case)) { U_INTERNAL_DUMP("node = %p", node) return; } } } void* UHashMap::erase(const UStringRep* _key) { U_TRACE(0, "UHashMap::erase(%V)", _key) lookup(_key); if (node) { const void* _elem = node->elem; eraseAfterFind(); U_RETURN((void*)_elem); } U_RETURN((void*)0); } void UHashMap::insertAfterFind(const UStringRep* _key, const void* _elem) { U_TRACE(0, "UHashMap::insertAfterFind(%V,%p)", _key, _elem) U_CHECK_MEMORY U_INTERNAL_ASSERT_EQUALS(node, 0) U_INTERNAL_ASSERT_DIFFERS(_key, pkey) U_INTERNAL_DUMP("index = %u", index) /** * list self-organizing (move-to-front), we place before * the element at the beginning of the list of collisions */ U_NEW(UHashMapNode, table[index], UHashMapNode(_key, _elem, table[index], hash)); node = table[index]; ++_length; U_INTERNAL_DUMP("_length = %u", _length) } void UHashMap::_eraseAfterFind() { U_TRACE_NO_PARAM(0, "UHashMap::_eraseAfterFind()") U_CHECK_MEMORY U_INTERNAL_DUMP("node = %p", node) UHashMapNode* prev = 0; for (UHashMapNode* pnode = table[index]; pnode; pnode = pnode->next) { if (pnode == node) { /** * list self-organizing (move-to-front), we place before * the element at the beginning of the list of collisions */ if (prev) { prev->next = pnode->next; pnode->next = table[index]; table[index] = pnode; } U_INTERNAL_ASSERT_EQUALS(node,table[index]) break; } prev = pnode; } U_INTERNAL_DUMP("prev = %p", prev) /** * list self-organizing (move-to-front), we requires the * item to be deleted at the beginning of the list of collisions */ U_INTERNAL_ASSERT_EQUALS(node, table[index]) table[index] = node->next; } void UHashMap::eraseAfterFind() { U_TRACE_NO_PARAM(0, "UHashMap::eraseAfterFind()") _eraseAfterFind(); delete node; --_length; U_INTERNAL_DUMP("_length = %u", _length) } void UHashMap::replaceKey(const UString& _key) { U_TRACE(0, "UHashMap::replaceKey(%V)", _key.rep) UHashMapNode* pnode = node; _eraseAfterFind(); lookup(_key); U_INTERNAL_ASSERT_EQUALS(node,0) pnode->hash = hash; pnode->next = table[index]; ((UStringRep*)pnode->key)->release(); // NB: we decreases the reference string... pnode->key = _key.rep; ((UStringRep*)pnode->key)->hold(); // NB: we increases the reference string... /** * list self-organizing (move-to-front), we place before * the element at the beginning of the list of collisions */ node = table[index] = pnode; } void UHashMap::reserve(uint32_t n) { U_TRACE(0, "UHashMap::reserve(%u)", n) U_INTERNAL_ASSERT_MAJOR(_capacity, 1) uint32_t new_capacity = U_GET_NEXT_PRIME_NUMBER(n); if (new_capacity == _capacity) return; UHashMapNode** old_table = table; uint32_t old_capacity = _capacity, i; _allocate(new_capacity); #ifdef DEBUG int sum = 0, max = 0, min = 1024, width; #endif // we insert the old elements UHashMapNode* _next; for (i = 0; i < old_capacity; ++i) { if (old_table[i]) { node = old_table[i]; # ifdef DEBUG ++sum; width = -1; # endif do { # ifdef DEBUG ++width; # endif _next = node->next; index = node->hash % _capacity; U_INTERNAL_DUMP("i = %u index = %u hash = %u", i, index, node->hash) /** * list self-organizing (move-to-front), we place before * the element at the beginning of the list of collisions */ node->next = table[index]; table[index] = node; } while ((node = _next)); # ifdef DEBUG if (max < width) max = width; if (min > width) min = width; # endif } } UMemoryPool::_free(old_table, old_capacity, sizeof(UHashMapNode*)); U_INTERNAL_DUMP("OLD: collision(min,max) = (%3d,%3d) - distribution = %3f", min, max, (sum ? (double)_length / (double)sum : 0)) #ifdef DEBUG sum = 0, max = 0, min = 1024; UHashMapNode* _n; for (i = 0; i < _capacity; ++i) { if (table[i]) { _n = table[i]; ++sum; width = -1; do { ++width; _next = _n->next; } while ((_n = _next)); if (max < width) max = width; if (min > width) min = width; } } #endif U_INTERNAL_DUMP("NEW: collision(min,max) = (%3d,%3d) - distribution = %3f", min, max, (sum ? (double)_length / (double)sum : 0)) } #ifdef DEBUG bool UHashMap::check_memory() const // check all element { U_TRACE_NO_PARAM(0+256, "UHashMap::check_memory()") U_CHECK_MEMORY U_INTERNAL_DUMP("_length = %u", _length) if (_length) { const void* pelem; UHashMapNode* pnode; UHashMapNode* pnext; int sum = 0, max = 0, min = 1024, width; for (uint32_t _index = 0; _index < _capacity; ++_index) { pnode = table[_index]; if (pnode == 0) continue; ++sum; width = 0; U_INTERNAL_DUMP("_index = %u sum = %u", _index, sum) loop: U_INTERNAL_ASSERT_POINTER(pnode) U_INTERNAL_DUMP("pnode->key(%u) = %p %V", pnode->key->size(), pnode->key, pnode->key) U_INTERNAL_ASSERT_MAJOR(pnode->key->size(), 0) pelem = pnode->elem; U_INTERNAL_DUMP("pelem = %p width = %u", pelem, width) U_INTERNAL_ASSERT_EQUALS(((const UMemoryError*)pelem)->_this, (void*)U_CHECK_MEMORY_SENTINEL) if (pnode->next) { pnext = pnode->next; U_INTERNAL_DUMP("pnode = %p pnext = %p", pnode, pnext) U_INTERNAL_ASSERT_POINTER(pnext) U_INTERNAL_ASSERT_DIFFERS(pnode, pnext) ++width; pnode = pnext; goto loop; } if (max < width) max = width; if (min > width) min = width; } U_INTERNAL_DUMP("collision(min,max) = (%d,%d) - distribution = %f", min, max, (sum ? (double)_length / (double)sum : 0)) } U_RETURN(true); } #endif bool UHashMap::first() { U_TRACE_NO_PARAM(0, "UHashMap::first()") U_INTERNAL_DUMP("_length = %u", _length) for (index = 0; index < _capacity; ++index) { if (table[index]) { node = table[index]; U_RETURN(true); } } U_RETURN(false); } bool UHashMap::next() { U_TRACE_NO_PARAM(0, "UHashMap::next()") U_INTERNAL_DUMP("index = %u node = %p next = %p", index, node, node->next) if ((node = node->next)) U_RETURN(true); for (++index; index < _capacity; ++index) { if (table[index]) { node = table[index]; U_RETURN(true); } } U_RETURN(false); } void UHashMap::callForAllEntry(bPFprpv function) { U_TRACE(0, "UHashMap::callForAllEntry(%p)", function) #ifdef DEBUG int sum = 0, max = 0, min = 1024, width; #endif U_INTERNAL_DUMP("_length = %u", _length) UHashMapNode* _node; UHashMapNode* _next; UHashMapNode** ptr; UHashMapNode** end; for (end = (ptr = table) + _capacity; ptr < end; ++ptr) { if (*ptr) { _node = *ptr; # ifdef DEBUG ++sum; width = -1; # endif do { # ifdef DEBUG ++width; # endif _next = _node->next; if (function((UStringRep*)_node->key, (void*)_node->elem) == false) return; } while ((_node = _next)); # ifdef DEBUG if (max < width) max = width; if (min > width) min = width; # endif } } U_INTERNAL_DUMP("collision(min,max) = (%3d,%3d) - distribution = %3f", min, max, (sum ? (double)_length / (double)sum : 0)) } void UHashMap::getKeys(UVector& vec) { U_TRACE(0, "UHashMap::getKeys(%p)", &vec) UHashMapNode* _node; UHashMapNode* _next; UHashMapNode** ptr; UHashMapNode** end; for (end = (ptr = table) + _capacity; ptr < end; ++ptr) { if (*ptr) { _node = *ptr; do { vec.UVector::push(_node->key); _next = _node->next; } while ((_node = _next)); } } } void UHashMap::_callForAllEntrySorted(bPFprpv function) { U_TRACE(0, "UHashMap::_callForAllEntrySorted(%p)", function) U_INTERNAL_ASSERT_MAJOR(_length, 1) UVector vkey(_length); getKeys(vkey); U_ASSERT_EQUALS(_length, vkey.size()) vkey.sort(ignoreCase()); U_INTERNAL_ASSERT(check_memory()) for (uint32_t i = 0, n = _length; i < n; ++i) { UStringRep* r = vkey.UVector::at(i); lookup(r); U_INTERNAL_ASSERT_POINTER(node) if (function(r, (void*)node->elem) == false) return; } } // specializzazione stringa UString UHashMap::erase(const UString& _key) { U_TRACE(0, "UHashMap::erase(%V)", _key.rep) UHashMap::lookup(_key); if (node) { UString str(elem()); U_INTERNAL_DUMP("str.reference() = %u", str.reference()) U_INTERNAL_ASSERT_MAJOR(str.reference(), 0) eraseAfterFind(); U_INTERNAL_DUMP("str.reference() = %u", str.reference()) U_RETURN_STRING(str); } return UString::getStringNull(); } // OPERATOR [] UString UHashMap::at(const UStringRep* _key) { U_TRACE(0, "UHashMap::at(%V)", _key) UHashMap::lookup(_key); if (node) { UString str(elem()); U_RETURN_STRING(str); } return UString::getStringNull(); } UString UHashMap::operator[](const char* _key) { U_TRACE(0, "UHashMap::operator[](%S)", _key) U_INTERNAL_ASSERT_POINTER(pkey) pkey->str = _key; pkey->_length = u__strlen(_key, __PRETTY_FUNCTION__); return at(pkey); } void* UHashMap::erase(const char* _key) { U_TRACE(0, "UHashMap::erase(%S)", _key) U_INTERNAL_ASSERT_POINTER(pkey) pkey->str = _key; pkey->_length = u__strlen(_key, __PRETTY_FUNCTION__); return erase(pkey); } UString UHashMap::at(const char* _key, uint32_t keylen) { U_TRACE(0, "UHashMap::at(%.*S,%u)", keylen, _key, keylen) U_INTERNAL_ASSERT_POINTER(pkey) pkey->str = _key; pkey->_length = keylen; return at(pkey); } bool UHashMap::find(const char* _key, uint32_t keylen) { U_TRACE(0, "UHashMap::find(%.*S,%u)", keylen, _key, keylen) U_INTERNAL_ASSERT_POINTER(pkey) pkey->str = _key; pkey->_length = keylen; lookup(pkey); U_RETURN(node != 0); } uint32_t UHashMap::loadFromData(const char* ptr, uint32_t sz) { U_TRACE(0+256, "UHashMap::loadFromData(%.*S,%u)", sz, ptr, sz) U_INTERNAL_ASSERT_MAJOR(sz, 0) U_INTERNAL_ASSERT_MAJOR(_capacity, 1) const char* _end = ptr + sz; const char* _start = ptr; // NB: we need this way for plugin... char terminator = 0, c = *ptr; if (c == '{' || c == '[') { ++ptr; // skip '{' or '[' terminator = (c == '{' ? '}' : ']'); } U_INTERNAL_DUMP("terminator = %C", terminator) while (ptr < _end) { // U_INTERNAL_DUMP("ptr = %.*S", 20, ptr) c = *ptr++; if (u__isspace(c)) continue; // skip white-space // U_INTERNAL_DUMP("c = %C", c) if ( terminator == c || (terminator == 0 && (c == '}' || c == ']'))) { break; } if (c == '#') { do { c = *ptr++; } while (c != '\n' && ptr < _end); // skip line comment continue; } U_INTERNAL_ASSERT_EQUALS(u__isspace(c), false) UString _key(U_CAPACITY); // U_INTERNAL_DUMP("c = %C", c) if (c == '"') { // NB: check if we have a string null... if (*ptr != '"') _key.setFromData(&ptr, _end - ptr, '"'); else { ++ptr; _key.clear(); } } else { --ptr; _key.setFromData(&ptr, _end - ptr, terminator); } U_INTERNAL_ASSERT(_key) U_INTERNAL_ASSERT(_key.isNullTerminated()) do { c = *ptr++; } while (u__isspace(c) && ptr < _end); // skip white-space // U_INTERNAL_DUMP("c = %C", c) if (ptr >= _end) break; U_INTERNAL_ASSERT_EQUALS(u__isspace(c), false) UString str(U_CAPACITY); // U_INTERNAL_DUMP("c = %C", c) if (c == '"') { // NB: check if we have a string null... if (*ptr != '"') str.setFromData(&ptr, _end - ptr, '"'); else { ++ptr; str.clear(); } } else { --ptr; str.setFromData(&ptr, _end - ptr, terminator); } if (str.empty()) { U_WARNING("UHashMap::loadFromData() has found a key(%u) = %V without value", _key.size(), _key.rep); } else { U_INTERNAL_ASSERT(str.isNullTerminated()) insert(_key, str); } } U_INTERNAL_DUMP("ptr - _start = %lu", ptr - _start) U_INTERNAL_ASSERT((ptr - _start) <= sz) sz = ptr - _start; U_RETURN(sz); } // STREAMS #ifdef U_STDCPP_ENABLE U_EXPORT istream& operator>>(istream& is, UHashMap& t) { U_TRACE(0+256, "UHashMap::operator>>(%p,%p)", &is, &t) // problem with sanitize address U_INTERNAL_ASSERT_MAJOR(t._capacity, 1) int c = EOF; if (is.good()) { streambuf* sb = is.rdbuf(); // NB: we need this way for plugin... int terminator = EOF; if (is.peek() == '{' || is.peek() == '[') { c = sb->sbumpc(); // skip '{' or '[' terminator = (c == '{' ? '}' : ']'); } do { do { c = sb->sbumpc(); } while (c != EOF && u__isspace(c)); // skip white-space // U_INTERNAL_DUMP("c = %C", c) if ( EOF == c || terminator == c || (terminator == EOF && (c == '}' || c == ']'))) { break; } if (c == '#') { do { c = sb->sbumpc(); } while (c != '\n' && c != EOF); // skip line comment continue; } U_INTERNAL_ASSERT_EQUALS(u__isspace(c), false) sb->sputbackc(c); UString key(U_CAPACITY); key.get(is); U_INTERNAL_ASSERT(key) U_INTERNAL_ASSERT(key.isNullTerminated()) do { c = sb->sbumpc(); } while (c != EOF && u__isspace(c)); // skip white-space // U_INTERNAL_DUMP("c = %C", c) if (c == EOF) break; U_INTERNAL_ASSERT_EQUALS(u__isspace(c), false) sb->sputbackc(c); UString str(U_CAPACITY); str.get(is); U_INTERNAL_ASSERT(str) U_INTERNAL_ASSERT(str.isNullTerminated()) t.insert(key, str); } while (c != EOF); } if (c == EOF) is.setstate(ios::eofbit); // if (t._length == 0) is.setstate(ios::failbit); return is; } // DEBUG # ifdef DEBUG const char* UHashMapNode::dump(bool reset) const { *UObjectIO::os << "elem " << elem << '\n' << "hash " << hash << '\n' << "key (UStringRep " << (void*)key << ")\n" << "next (UHashMapNode " << (void*)next << ')'; if (reset) { UObjectIO::output(); return UObjectIO::buffer_output; } return 0; } const char* UHashMap::dump(bool reset) const { *UObjectIO::os << "hash " << hash << '\n' << "index " << index << '\n' << "table " << (void*)table << '\n' << "_length " << _length << "\n" << "_capacity " << _capacity << '\n' << "node (UHashMapNode " << (void*)node << ')'; if (reset) { UObjectIO::output(); return UObjectIO::buffer_output; } return 0; } # endif #endif