// ============================================================================ // // = LIBRARY // ULib - c++ library // // = FILENAME // cdb.h - A structure for constant databases (Bernstein) // // = AUTHOR // Stefano Casazza // // ============================================================================ #ifndef ULIB_CDB_H #define ULIB_CDB_H 1 #include #include /** * @class UCDB * * @brief UCDB is a fast, reliable, simple class for creating and reading constant databases. * * Its database structure provides several features: * Fast lookups: A successful lookup in a large database normally takes just two disk accesses. * An unsuccessful lookup takes only one. * Low overhead: A database uses 4096 bytes, plus 16 bytes per record, plus the space for keys and data. * A cdb is an associative array: it maps strings (keys) to strings (data). * A cdb contains 512 pointers to linearly probed open hash tables. * The hash tables contain pointers to (key,data) pairs. A cdb is stored in a single file on disk: * +----------------+------------+-------+-------+-----+---------+ * | p0 p1 ... p511 | records... | hash0 | hash1 | ... | hash511 | * +----------------+------------+-------+-------+-----+---------+ * Each of the 512 initial pointers states a position and a length. The position is the starting byte * position of the hash table. The length is the number of slots in the hash table. * Records are stored sequentially, without special alignment. A record states a key length, a data * length, the key, and the data. Each hash table slot states a hash value and a byte position. If the * byte position is 0, the slot is empty. Otherwise, the slot points to a record whose key has that hash value. * Positions, lengths, and hash values are 32-bit quantities, stored in 4 bytes. Thus a cdb must fit into 4 gigabytes. * A record is located as follows. Compute the hash value of the key in the record. * The hash value modulo 512 is the number of a hash table. * The hash value divided by 512, modulo the length of that table, is a slot number. * Probe that slot, the next higher slot, and so on, until you find the record or run into an empty slot */ #define CDB_NUM_HASH_TABLE_POINTER 512 class URDB; class UHTTP; typedef int (*iPFprpr) (UStringRep*, UStringRep*); typedef void (*vPFprpr) (UStringRep*, UStringRep*); #define U_cdb_ignore_case(obj) (obj)->UCDB::flag[0] #define U_cdb_result_call(obj) (obj)->UCDB::flag[1] #define U_cdb_add_entry_to_vector(obj) (obj)->UCDB::flag[2] #define U_cdb_no_hash(obj) (obj)->UCDB::flag[3] class U_EXPORT UCDB : public UFile { public: typedef struct datum { void* dptr; uint32_t dsize; } datum; typedef struct cdb_hash_table_pointer { uint32_t pos; // starting byte position of the hash table uint32_t slots; // number of slots in the hash table } cdb_hash_table_pointer; typedef struct cdb_record_header { uint32_t klen; // key length uint32_t dlen; // data length } cdb_record_header; typedef struct cdb_hash_table_slot { uint32_t hash; // hash value of the key uint32_t pos; // starting byte position of the record (0 -> slot empty) } cdb_hash_table_slot; UCDB(int ignore_case = 0) { U_TRACE_REGISTER_OBJECT(0, UCDB, "%d", ignore_case) init_internal(ignore_case); } UCDB(const UString& path, int ignore_case) : UFile(path) { U_TRACE_REGISTER_OBJECT(0, UCDB, "%V,%d", path.rep, ignore_case) init_internal(ignore_case); } #ifdef U_COVERITY_FALSE_POSITIVE virtual #endif ~UCDB() { U_TRACE_UNREGISTER_OBJECT(0, UCDB) } // Open a Constant DataBase bool open(bool brdonly = true); bool open(const UString& pathdb, bool brdonly = true) { U_TRACE(0, "UCDB::open(%V)", pathdb.rep) UFile::setPath(pathdb); return UCDB::open(brdonly); } bool ignoreCase() const { return ignoreCase(this); } void setKey(UStringRep* _key) { key.dptr = (void*) _key->data(); key.dsize = _key->size(); } void setKey(const UString& _key) { key.dptr = (void*) _key.data(); key.dsize = _key.size(); } void setKey(const void* dptr, uint32_t dsize) { key.dptr = (void*) dptr; key.dsize = dsize; } void setData(const UString& _data) { data.dptr = (void*)_data.data(); data.dsize = _data.size(); } void setData(const void* dptr, uint32_t dsize) { data.dptr = (void*) dptr; data.dsize = dsize; } bool find(const UString& _key) { U_TRACE(0, "UCDB::find(%V)", _key.rep) setKey(_key); cdb_hash(); return find(); } bool findNext(); // handles repeated keys... // Get methods uint32_t size() const { U_TRACE_NO_PARAM(0, "UCDB::size()") U_RETURN(nrecord); } UString elem() { U_TRACE_NO_PARAM(0, "UCDB::elem()") UString str((const char*)data.dptr, data.dsize); U_RETURN_STRING(str); } // Set methods void setSize(uint32_t sz) { U_TRACE(0, "UCDB::setSize(%u)", sz) nrecord = sz; } // operator [] UString operator[](const UString& _key) { U_TRACE(0, "UCDB::operator[](%V)", _key.rep) setKey(_key); return at(); } UString operator[](UStringRep* _key) { U_TRACE(0, "UCDB::operator[](%V)", _key) setKey(_key); return at(); } // Call function for all entry char* getPattern() { return pattern; } void addEntryToVector() { U_cdb_add_entry_to_vector(this) = true; } iPFprpr getFunctionToCall() { return function_to_call; } void setFunctionToCall(iPFprpr func) { function_to_call = func; } UVector* getVector() { return ptr_vector; } void setVector(UVector* ptr) { ptr_vector = ptr; } void callForAllEntrySorted( iPFprpr function); void callForAllEntryWithPattern(iPFprpr function, UString* pattern); iPFprpr getFilterToFunctionToCall() { return filter_function_to_call; } void resetFilterToFunctionToCall() { filter_function_to_call = functionCall; } void setFilterToFunctionToCall(iPFprpr function) { filter_function_to_call = function; } uint32_t getValuesWithKeyNask(UVector& vec_values, const UString& mask_key, uint32_t* size = U_NULLPTR); // Save memory hash table as Constant DataBase static uint32_t sizeFor(uint32_t _nrecord) { U_TRACE(0, "UCDB::sizeFor(%u)", _nrecord) uint32_t size = CDB_NUM_HASH_TABLE_POINTER * sizeof(cdb_hash_table_pointer) + _nrecord * (sizeof(cdb_record_header) + sizeof(cdb_hash_table_slot)); U_RETURN(size); } bool writeTo( UHashMap* t, uint32_t tbl_space, pvPFpvpb f = U_NULLPTR) { return UCDB::writeTo(*this, t, tbl_space, f); } static bool writeTo(const UString& path, UHashMap* t, uint32_t tbl_space, pvPFpvpb f = U_NULLPTR) { return UCDB(path, t->ignoreCase()).writeTo(t, tbl_space, f); } // STREAM UString print(); #ifdef U_STDCPP_ENABLE static vpFpcu getValueFromBuffer; friend U_EXPORT istream& operator>>(istream& is, UCDB& cdb); friend U_EXPORT ostream& operator<<(ostream& os, UCDB& cdb); // DEBUG # ifdef DEBUG const char* dump(bool reset) const; # endif #endif protected: datum key; // initialized in find() datum data; // initialized if findNext() returns 1 cdb_record_header* hr; // initialized if findNext() returns 1 cdb_hash_table_slot* slot; // initialized in find() cdb_hash_table_pointer* hp; // initialized in find() // internal char* pattern; UString* pbuffer; UVector* ptr_vector; iPFprpr function_to_call, filter_function_to_call; // when mmap not available we use this storage... cdb_hash_table_pointer hp_buf; cdb_record_header hr_buf; cdb_hash_table_slot slot_buf; uint32_t loop, // number of hash slots searched under key nslot, // initialized in find() khash, // initialized in find() nrecord, // initialized in makeStart() offset, start_hash_table_slot; unsigned char flag[4]; bool find(); UString at(); void init_internal(int ignore_case); static bool ignoreCase(const UCDB* pcdb) { return (U_cdb_ignore_case(pcdb) != 0); } uint32_t cdb_hash(const char* t, uint32_t tlen) { U_TRACE(0, "UCDB::cdb_hash(%.*S,%u)", tlen, t, tlen) if (U_cdb_no_hash(this)) { U_INTERNAL_ASSERT_EQUALS(tlen, sizeof(uint32_t)) U_RETURN(*(uint32_t*)t); } int flags = (U_cdb_ignore_case(this) == 0xff ? -1 : U_cdb_ignore_case(this)); U_INTERNAL_DUMP("flags = %d U_cdb_ignore_case(this) = %d", flags, U_cdb_ignore_case(this)) uint32_t result = u_cdb_hash((unsigned char*)t, tlen, flags); U_RETURN(result); } void cdb_hash() { khash = cdb_hash((const char*)key.dptr, key.dsize); } void setHash(uint32_t _hash) { khash = _hash; } void setHash(const char* t, uint32_t tlen) { khash = cdb_hash(t, tlen); } // START-END of record data char* start() const { return (UFile::map + CDB_NUM_HASH_TABLE_POINTER * sizeof(cdb_hash_table_pointer)); } char* end() const { return (UFile::map + start_hash_table_slot); } // Call function for all entry void callForAllEntry(vPFpvpc function); static int functionCall(UStringRep* key, UStringRep* data) { return 1; } // Save memory hash table as Constant DataBase static bool writeTo(UStringRep* key, void* elem); // callWithDeleteForAllEntry()... static bool writeTo(UCDB& cdb, UHashMap* table, uint32_t tbl_space, pvPFpvpb f = U_NULLPTR); // FOR RDB void makeStart() { U_TRACE_NO_PARAM(0, "UCDB::makeStart()") U_INTERNAL_ASSERT_DIFFERS(map, MAP_FAILED) nrecord = start_hash_table_slot = 0; hr = (UCDB::cdb_record_header*) start(); } uint32_t makeFinish(bool reset); void call1(); void call1(const char* key_ptr, uint32_t key_size, const char* data_ptr, uint32_t data_size); static void call2(UCDB* pcdb, char* src); static void print2(UCDB* pcdb, char* src); static void getKeys2(UCDB* pcdb, char* src); static void makeAdd2(UCDB* pcdb, char* src); #ifdef DEBUG void checkForAllEntry() U_NO_EXPORT; #endif private: inline bool match(uint32_t pos) U_NO_EXPORT; U_DISALLOW_COPY_AND_ASSIGN(UCDB) friend class URDB; friend class UHTTP; template friend class URDBObjectHandler; }; #endif