BombayTableIndex.hpp 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. /*
  2. Copyright (c) 2009-2010 Christopher A. Taylor. All rights reserved.
  3. Redistribution and use in source and binary forms, with or without
  4. modification, are permitted provided that the following conditions are met:
  5. * Redistributions of source code must retain the above copyright notice,
  6. this list of conditions and the following disclaimer.
  7. * Redistributions in binary form must reproduce the above copyright notice,
  8. this list of conditions and the following disclaimer in the documentation
  9. and/or other materials provided with the distribution.
  10. * Neither the name of LibCat nor the names of its contributors may be used
  11. to endorse or promote products derived from this software without
  12. specific prior written permission.
  13. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  14. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  15. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  16. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  17. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  18. CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  19. SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  20. INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  21. CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  22. ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  23. POSSIBILITY OF SUCH DAMAGE.
  24. */
  25. #ifndef CAT_BOMBAY_TABLE_INDEX_HPP
  26. #define CAT_BOMBAY_TABLE_INDEX_HPP
  27. #include <cat/db/BombayTable.hpp>
  28. namespace cat {
  29. namespace bombay {
  30. /*
  31. Returning 0 from one of these hash functions will cause insertion or lookup to fail,
  32. which is how invalid input is intended to be handled.
  33. */
  34. class IHash
  35. {
  36. public:
  37. virtual u64 HashField(const void *just_field) = 0;
  38. virtual u64 HashComplete(const void *complete_record) = 0;
  39. virtual u64 HashVarField(const void *just_field, u32 bytes) { return HashField(just_field); }
  40. };
  41. #define DECL_BOMBAY_SCHEMA_VAR_FIELD_HASH(T) \
  42. class T : public bombay::IHash \
  43. { \
  44. public: \
  45. u64 HashField(const void *just_field); \
  46. u64 HashComplete(const void *complete_record); \
  47. u64 HashVarField(const void *just_field, u32 bytes); \
  48. };
  49. #define DECL_BOMBAY_SCHEMA_FIXED_FIELD_HASH(T) \
  50. class T : public bombay::IHash \
  51. { \
  52. public: \
  53. u64 HashField(const void *just_field); \
  54. u64 HashComplete(const void *complete_record); \
  55. };
  56. /*
  57. Table index must present a complete index of the contents of a Table.
  58. The index uses some region of bytes in each entry as a key to find
  59. the entry given just that set of bytes. For example, mapping a user
  60. name to a database node.
  61. The table index should be loaded from disk on startup. If this is
  62. not possible, then an index rebuild will need to be done on startup.
  63. Hash table size will be at least twice as large as the number of
  64. database entries, growing as needed.
  65. To avoid a lot of expensive setup, each element is arranged like this:
  66. <-- MSB LSB -->
  67. C(1 bit) || OFFSET+1 (63 bits)
  68. HASH(64 bits)
  69. C: Collision flag
  70. 0 = No collision
  71. 1 = Collision, actual data may be stored at next walk location
  72. OFFSET+1: Database file offset for this entry that contains the
  73. full unique identifier. One(1) is added to the offset in
  74. the memory representation of the index table element, so that
  75. OFFSET = ~0 will be set by zeroing out the table.
  76. 0 = No data at this table element.
  77. Other values = Valid offset+1
  78. HASH: 64-bit full hash
  79. Only low bits are used for table lookup, so hash does not need
  80. to be recomputed if the table grows and lookup of something that
  81. is not in the table does not have collisions half the time.
  82. The whole structure fits in one cache line on an x86 server.
  83. */
  84. class Table;
  85. class TableIndex : public AsyncFile
  86. {
  87. friend class Table;
  88. ShutdownObserver *_shutdown_observer;
  89. Table *_parent;
  90. IHash *_index_hash;
  91. TableIndex *_next, *_next_unique, *_next_loading;
  92. protected:
  93. // (multiplier-1) divisible by all prime factors of table size
  94. // (multiplier-1) is a multiple of 4 if table size is a multiple of 4
  95. // Increment is relatively prime to the table size.
  96. static const u32 COLLISION_MULTIPLIER = 71*7487 * 4 + 1;
  97. static const u32 COLLISION_INCREMENTER = 1017234223;
  98. static const u64 OFFSET_MASK = (~(u64)0) >> 1;
  99. static const u64 COLLIDE_MASK = ~OFFSET_MASK;
  100. static const u32 MIN_ELEMENTS = 1024;
  101. static const u32 TABLE_FOOTER_BYTES = 16;
  102. static const u64 TABLE_CHECK_HASH_SALT = 0x74B1301234DEADBE;
  103. RWLock _lock;
  104. u64 *_table;
  105. u32 _table_raw_bytes;
  106. u32 _table_elements; // A power of 2; just subtract 1 to make a mask
  107. u32 _used_elements;
  108. char _file_path[MAX_PATH+1];
  109. bool AllocateTable();
  110. bool DoubleTable();
  111. void FreeTable();
  112. protected:
  113. void Save();
  114. public:
  115. TableIndex(Table *parent, const char *index_file_path,
  116. IHash *hash_function, ShutdownObserver *shutdown_observer);
  117. ~TableIndex();
  118. bool Initialize();
  119. protected:
  120. virtual bool OnRead(ThreadPoolLocalStorage *tls, int error, AsyncBuffer *buffer, u32 bytes);
  121. public:
  122. CAT_INLINE const char *GetFilePath() { return _file_path; }
  123. public:
  124. CAT_INLINE u64 VarField(const void *data, u32 bytes)
  125. {
  126. return _index_hash->HashVarField(data, bytes);
  127. }
  128. CAT_INLINE u64 Field(const void *data)
  129. {
  130. return _index_hash->HashField(data);
  131. }
  132. CAT_INLINE u64 Complete(const void *data)
  133. {
  134. return _index_hash->HashComplete(data);
  135. }
  136. public:
  137. // Hash value of 0 will be ignored
  138. u64 Lookup(u64 hash);
  139. void Insert(u64 hash, u64 offset);
  140. void Remove(u64 hash);
  141. public:
  142. CAT_INLINE u64 LookupVarField(const void *data, u32 bytes)
  143. {
  144. return Lookup(_index_hash->HashVarField(data, bytes));
  145. }
  146. CAT_INLINE u64 LookupField(const void *data)
  147. {
  148. return Lookup(_index_hash->HashField(data));
  149. }
  150. CAT_INLINE u64 LookupComplete(const void *data)
  151. {
  152. return Lookup(_index_hash->HashComplete(data));
  153. }
  154. public:
  155. CAT_INLINE void InsertVarField(const void *data, u32 bytes, u64 offset)
  156. {
  157. Insert(_index_hash->HashVarField(data, bytes), offset);
  158. }
  159. CAT_INLINE void InsertField(const void *data, u64 offset)
  160. {
  161. Insert(_index_hash->HashField(data), offset);
  162. }
  163. CAT_INLINE void InsertComplete(const void *data, u64 offset)
  164. {
  165. Insert(_index_hash->HashComplete(data), offset);
  166. }
  167. public:
  168. CAT_INLINE void RemoveVarField(const void *data, u32 bytes)
  169. {
  170. Remove(_index_hash->HashVarField(data, bytes));
  171. }
  172. CAT_INLINE void RemoveField(const void *data)
  173. {
  174. Remove(_index_hash->HashField(data));
  175. }
  176. CAT_INLINE void RemoveComplete(const void *data)
  177. {
  178. Remove(_index_hash->HashComplete(data));
  179. }
  180. };
  181. } // namespace bombay
  182. } // namespace cat
  183. #endif // CAT_BOMBAY_TABLE_INDEX_HPP
粤ICP备19079148号