| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231 |
- /*
- Copyright (c) 2009-2010 Christopher A. Taylor. All rights reserved.
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
- * Neither the name of LibCat nor the names of its contributors may be used
- to endorse or promote products derived from this software without
- specific prior written permission.
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- POSSIBILITY OF SUCH DAMAGE.
- */
- #ifndef CAT_BOMBAY_TABLE_INDEX_HPP
- #define CAT_BOMBAY_TABLE_INDEX_HPP
- #include <cat/db/BombayTable.hpp>
- namespace cat {
- namespace bombay {
- /*
- Returning 0 from one of these hash functions will cause insertion or lookup to fail,
- which is how invalid input is intended to be handled.
- */
- class IHash
- {
- public:
- virtual u64 HashField(const void *just_field) = 0;
- virtual u64 HashComplete(const void *complete_record) = 0;
- virtual u64 HashVarField(const void *just_field, u32 bytes) { return HashField(just_field); }
- };
- #define DECL_BOMBAY_SCHEMA_VAR_FIELD_HASH(T) \
- class T : public bombay::IHash \
- { \
- public: \
- u64 HashField(const void *just_field); \
- u64 HashComplete(const void *complete_record); \
- u64 HashVarField(const void *just_field, u32 bytes); \
- };
- #define DECL_BOMBAY_SCHEMA_FIXED_FIELD_HASH(T) \
- class T : public bombay::IHash \
- { \
- public: \
- u64 HashField(const void *just_field); \
- u64 HashComplete(const void *complete_record); \
- };
- /*
- Table index must present a complete index of the contents of a Table.
- The index uses some region of bytes in each entry as a key to find
- the entry given just that set of bytes. For example, mapping a user
- name to a database node.
- The table index should be loaded from disk on startup. If this is
- not possible, then an index rebuild will need to be done on startup.
- Hash table size will be at least twice as large as the number of
- database entries, growing as needed.
- To avoid a lot of expensive setup, each element is arranged like this:
- <-- MSB LSB -->
- C(1 bit) || OFFSET+1 (63 bits)
- HASH(64 bits)
- C: Collision flag
- 0 = No collision
- 1 = Collision, actual data may be stored at next walk location
- OFFSET+1: Database file offset for this entry that contains the
- full unique identifier. One(1) is added to the offset in
- the memory representation of the index table element, so that
- OFFSET = ~0 will be set by zeroing out the table.
- 0 = No data at this table element.
- Other values = Valid offset+1
- HASH: 64-bit full hash
- Only low bits are used for table lookup, so hash does not need
- to be recomputed if the table grows and lookup of something that
- is not in the table does not have collisions half the time.
- The whole structure fits in one cache line on an x86 server.
- */
- class Table;
- class TableIndex : public AsyncFile
- {
- friend class Table;
- ShutdownObserver *_shutdown_observer;
- Table *_parent;
- IHash *_index_hash;
- TableIndex *_next, *_next_unique, *_next_loading;
- protected:
- // (multiplier-1) divisible by all prime factors of table size
- // (multiplier-1) is a multiple of 4 if table size is a multiple of 4
- // Increment is relatively prime to the table size.
- static const u32 COLLISION_MULTIPLIER = 71*7487 * 4 + 1;
- static const u32 COLLISION_INCREMENTER = 1017234223;
- static const u64 OFFSET_MASK = (~(u64)0) >> 1;
- static const u64 COLLIDE_MASK = ~OFFSET_MASK;
- static const u32 MIN_ELEMENTS = 1024;
- static const u32 TABLE_FOOTER_BYTES = 16;
- static const u64 TABLE_CHECK_HASH_SALT = 0x74B1301234DEADBE;
- RWLock _lock;
- u64 *_table;
- u32 _table_raw_bytes;
- u32 _table_elements; // A power of 2; just subtract 1 to make a mask
- u32 _used_elements;
- char _file_path[MAX_PATH+1];
- bool AllocateTable();
- bool DoubleTable();
- void FreeTable();
- protected:
- void Save();
- public:
- TableIndex(Table *parent, const char *index_file_path,
- IHash *hash_function, ShutdownObserver *shutdown_observer);
- ~TableIndex();
- bool Initialize();
- protected:
- virtual bool OnRead(ThreadPoolLocalStorage *tls, int error, AsyncBuffer *buffer, u32 bytes);
- public:
- CAT_INLINE const char *GetFilePath() { return _file_path; }
- public:
- CAT_INLINE u64 VarField(const void *data, u32 bytes)
- {
- return _index_hash->HashVarField(data, bytes);
- }
- CAT_INLINE u64 Field(const void *data)
- {
- return _index_hash->HashField(data);
- }
- CAT_INLINE u64 Complete(const void *data)
- {
- return _index_hash->HashComplete(data);
- }
- public:
- // Hash value of 0 will be ignored
- u64 Lookup(u64 hash);
- void Insert(u64 hash, u64 offset);
- void Remove(u64 hash);
- public:
- CAT_INLINE u64 LookupVarField(const void *data, u32 bytes)
- {
- return Lookup(_index_hash->HashVarField(data, bytes));
- }
- CAT_INLINE u64 LookupField(const void *data)
- {
- return Lookup(_index_hash->HashField(data));
- }
- CAT_INLINE u64 LookupComplete(const void *data)
- {
- return Lookup(_index_hash->HashComplete(data));
- }
- public:
- CAT_INLINE void InsertVarField(const void *data, u32 bytes, u64 offset)
- {
- Insert(_index_hash->HashVarField(data, bytes), offset);
- }
- CAT_INLINE void InsertField(const void *data, u64 offset)
- {
- Insert(_index_hash->HashField(data), offset);
- }
- CAT_INLINE void InsertComplete(const void *data, u64 offset)
- {
- Insert(_index_hash->HashComplete(data), offset);
- }
- public:
- CAT_INLINE void RemoveVarField(const void *data, u32 bytes)
- {
- Remove(_index_hash->HashVarField(data, bytes));
- }
- CAT_INLINE void RemoveField(const void *data)
- {
- Remove(_index_hash->HashField(data));
- }
- CAT_INLINE void RemoveComplete(const void *data)
- {
- Remove(_index_hash->HashComplete(data));
- }
- };
- } // namespace bombay
- } // namespace cat
- #endif // CAT_BOMBAY_TABLE_INDEX_HPP
|