-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhashtable_api.h
446 lines (391 loc) · 16.8 KB
/
hashtable_api.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
/*! \mainpage A lightweight separate-chaining, arena-backed hashtable in C
*
* \section intro_sec Introduction
*
* See \link hashtable_api.h API documentation for hashtable_api.h \endlink .
*
* This module implements a lightweight hashtable that uses separate chaining to resolve collisions.
*
* This hashtable is designed to be flexible enough for use on embedded systems that have
* no dynamic memory, and/or limited memory in general.
*
* \section gettingstarted_sec Getting started
*
* -# Compile <code>hashtable.c</code> along with the rest of the files in your project
* -# Include <code>hashtable_api.h</code> in files where you want to use hashtables
* -# Use the functions detailed in <code>hashtable_api.h</code> to create and interact
* with hashtables
*
* \section codesample_sec Example program
*
* The following sample program creates a hashtable instance, inserts a single key/value pair,
* and the retrieves the stored value with the same key, and prints it to stdout.
*
* See \link hashtable_api.h API documentation for hashtable_api.h \endlink for comprehensive details about
* all available functions.
*
* \include example_main.c
*
* \section perftest_sec Performance visualization
*
* The following graph shows the results from a test program which creates a hashtable
* instance with a 32MB buffer, and inserts items until the buffer is full (each key
* is a 32-bit unsigned integer, and all values are NULL / 0 bytes).
*
* After every 2,000 items inserted, the test program performs the following checks;
*
* - Divide the time taken for the last 2,000 item insertions, by 2,000, to get the
* average insertion time
* - Retrieve all items in the table, and divide the time taken by the number of items
* in the table, to get the average item retrieval time
* - Generate 1,000 keys that are not in the table, and check if they exist using
* <code>hashtable_has_key</code>. Divide the time taken for checking all keys by
* 1000 to get the average time to check for a bad key.
*
* Test was compiled with "-O2" optimization level and executed on a system with an
* Intel Core-i7 running Windows 10.
*
* \image html extras/performance_graph.png
*
* \section features_sec Features/limitations
*
* - Implemented in pure C99, and requires only `stdint.h` and `string.h`.
* - Uses <a href="https://en.wikipedia.org/wiki/Hash_table#Separate_chaining">separate chaining</a> to resolve collisions.
* - Keys and values are byte streams of arbitrary length/contents, so keys and values can
* be any data type.
* - No dynamic memory allocation, and no table re-sizing. All table data is stored
* in a buffer that must be provided by the caller on hashtable creation,
* and when there is not enough space remaining in that buffer, insertion of new
* items will fail.
* - Provide/write your own hash function (FNV-1a is used by default if you don't provide one).
*
* \section buildopts_sec Build/compile options
*
* There are a number of preprocessor symbols you can define to change certain things,
* here are the details about those symbols.
*
* \subsection ht_size_sec Datatype used for key/value sizes
*
* By default, `size_t` is used to hold the sizes of keys/values. If you
* know that all your keys/values are below a certain size, however, and you want to save
* some memory, then you can define one of the following options to set the datatype used
* to hold key/value sizes:
*
* Symbol name | Effect
* -------------------------------------|-------------------------------------------
* `HASHTABLE_SIZE_T_UINT16` | hashtable_size_t is `uint16_t`
* `HASHTABLE_SIZE_T_UINT32` | hashtable_size_t is `uint32_t`
* `HASHTABLE_SIZE_T_SYS` | hashtable_size_t is `size_t` <b>(default)</b>
*
* \subsection disable_paramval_sec Disable function parameter validation
*
* By default, all function parameters are checked for validity (e.g. no NULL pointers,
* no size values of 0). However, if you have a stable system and have worked most of
* those types of bugs out, then you may want to compile these checks out for a performance gain.
* Define the following option to compile out all function parameter validation checks:
*
* Symbol name | Effect
* -------------------------------------|---------------------------------------------------
* `HASHTABLE_DISABLE_PARAM_VALIDATION` | All function param. validation checks compiled out
*
* \subsection packed_struct_sec Enable packed struct option
*
* Use `__attribute__((packed))` for key/value pair struct definition, may save
* some extra space for stored key/value pair data:
*
* Symbol name | Effect
* --------------------------|-----------------------------------------------------
* `HASHTABLE_PACKED_STRUCT` | Key/value pair struct uses `__attribute__((packed))`
*
*/
/**
* @file hashtable_api.h
*
* @author Erik K. Nyquist
*
* @brief Implements a lightweight separate-chaining hashtable designed to be flexible
* enough for embedded systems.
*/
#ifndef HASHTABLE_API_H
#define HASHTABLE_API_H
#include <stdint.h>
#define HASHTABLE_LIB_VERSION "v0.1.1"
#if !defined(HASHTABLE_SIZE_T_UINT16) && !defined(HASHTABLE_SIZE_T_UINT32) && !defined(HASHTABLE_SIZE_T_SYS)
#define HASHTABLE_SIZE_T_SYS
#endif // if !defined..
/**
* @brief Defines the type used to represent the size of keys and values stored in the hashtable
*/
#if defined(HASHTABLE_SIZE_T_UINT16)
typedef uint16_t hashtable_size_t;
#elif defined(HASHTABLE_SIZE_T_UINT32)
typedef uint32_t hashtable_size_t;
#elif defined(HASHTABLE_SIZE_T_SYS)
typedef size_t hashtable_size_t;
#else
#error("HASHTABLE_SIZE_T option not defined")
#endif // if defined....
/**
* @brief Minimum number of slots in the table array
*/
#define HASHTABLE_MIN_ARRAY_COUNT (10u)
/**
* @brief Helper macro, gets the min. required buffer size for a specific array count.
* When creating a hashtable with a specific array count, this macro will tell
* you how much memory is required at a minimum to hold the 'housekeeping' data
* for that table. Any remaining space is used for key/value pair data storage.
*/
#define HASHTABLE_MIN_BUFFER_SIZE(array_count) \
(sizeof(_keyval_pair_table_data_t) + sizeof(_keyval_pair_list_table_t) + \
((array_count) * sizeof(_keyval_pair_list_t)) + \
sizeof(_keyval_pair_data_block_t))
/**
* Hash function used for hashing key data
*
* @param data Pointer to key data
* @param size Key data size in bytes
*
* @return Computed hash value
*/
typedef uint32_t (*hashtable_hashfunc_t)(const char *data, const hashtable_size_t size);
/**
* @brief Configuration data for a single hashtable instance
*/
typedef struct
{
hashtable_hashfunc_t hash; ///< Hash function to use, must not be NULL
uint32_t array_count; ///< Number of table array slots, must not be 0
} hashtable_config_t;
/**
* @brief All data for a single hashtable instance
*/
typedef struct
{
hashtable_config_t config; ///< Hashtable config data
uint32_t entry_count; ///< Number of entries in the table
size_t data_size; ///< Size of data section
void *table_data; ///< Pointer to buffer for data section
} hashtable_t;
/**
* Initialize a new hashtable instance
*
* @param table Pointer to hashtable instance
* @param config Pointer to hashtable configuration data. May be NULL.
* If NULL, a default general-purpose configuration will be used.
* @param buffer Pointer to buffer to use for hashtable data
* @param buffer_size Size of buffer in bytes
*
* @return 0 if successful, 1 if buffer size is not large enough, and -1 if an
* error occurred. Use #hashtable_error_message to get an error message
* if -1 is returned.
*/
int hashtable_create(hashtable_t *table, const hashtable_config_t *config,
void *buffer, size_t buffer_size);
/**
* Insert a new key/value pair into a table. If a key/value pair with the
* given key already exists, then it will be over-written with the new value.
*
* @param table Pointer to hashtable instance
* @param key Pointer to key data
* @param key_size Key data size in bytes
* @param value Pointer to value data, may be NULL
* @param value_size Value data size in bytes, may be 0
*
* @return 0 if successful, 1 if there is not enough space left in the buffer for
key/value pair data, and -1 if an error occurred. Use #hashtable_error_message
* to get an error message if -1 is returned.
*/
int hashtable_insert(hashtable_t *table, const char *key, const hashtable_size_t key_size,
const char *value, const hashtable_size_t value_size);
/**
* Remove a stored value from a table by key. If the given key does not exist in
* the table, then the return value will indicate success.
*
* @param table Pointer to hashtable instance
* @param key Pointer to key data
* @param key_size Key data size in bytes
*
* @return 0 if successful, 1 if the key does not exist, and -1 if an error occurred.
* Use #hashtable_error_message to get an error message if -1 is returned.
*/
int hashtable_remove(hashtable_t *table, const char *key, const hashtable_size_t key_size);
/**
* Retrieve a pointer to a value stored in a table by key.
*
* @param table Pointer to hashtable instance
* @param key Pointer to key data
* @param key_size Key data size in bytes
* @param value Pointer to location to store value pointer, may be NULL
* @param value_size Pointer to location to store value size, may be NULL
*
* @return 0 if successful, 1 if the key does not exist, and -1 if an error occurred.
* Use #hashtable_error_message to get an error message if -1 is returned.
*/
int hashtable_retrieve(hashtable_t *table, const char *key, const hashtable_size_t key_size,
char **value, hashtable_size_t *value_size);
/**
* Check if a key exists in a table.
*
* @param table Pointer to hashtable instance
* @param key Pointer to key data
* @param key_size Key data size in bytes
*
* @return 1 if key exists, 0 if key does not exist, and -1 if an error occurred.
* Use #hashtable_error_message to get an error message.
*/
int hashtable_has_key(hashtable_t *table, const char *key, const hashtable_size_t key_size);
/**
* Number of bytes remaining for key/value pair data storage
*
* @param table Pointer to hashtable instance
* @param bytes_remaining Pointer to location to store number of bytes remaining
*
* @return 0 if successful, -1 if an error occurred. Use #hashtable_error_message
* to get an error message.
*/
int hashtable_bytes_remaining(hashtable_t *table, size_t *bytes_remaining);
/**
* Retrieve pointers to the next key/value pair in the table. This function can
* be used to iterate over all key/value pairs stored in the table.
*
* @param table Pointer to hashtable instance
* @param key Pointer to location to store key pointer
* @param key_size Pointer to location to store key data size in bytes
* @param value Pointer to location to store value pointer, may be NULL
* @param value_size Pointer to location to store value data size in bytes, may be NULL
*
* @return 0 if next item was read successfully, 1 if no item was read because
* all items in the table have been iterated over (in this case you can
use hashtable_reset_cursor to reset for another iteration if you need to),
and -1 if an error occurred. Use #hashtable_error_message to get an error
message.
*/
int hashtable_next_item(hashtable_t *table, char **key, hashtable_size_t *key_size,
char **value, hashtable_size_t *value_size);
/**
* Reset the key/value pair cursor, which is used for iteration via #hashtable_next_item.
* This allows iterating through a table again after #hashtable_next_item has already iterated
* over the whole table and returned a value of 1.
*
* @param table Pointer to hashtable instance
*
* @return 0 if successful, -1 if an error occurred. Use #hashtable_error_message
* to get an error message.
*/
int hashtable_reset_cursor(hashtable_t *table);
/**
* Clear all stored data from a hashtable instance
*
* @param table Pointer to hashtable instance
*
* @return 0 if successful, -1 if an error occurred. Use #hashtable_error_message
* to get an error message.
*/
int hashtable_clear(hashtable_t *table);
/**
* Populate a configuration structure with the default hash function (FNV-1a), and
* an array count optimized for the given buffer size.
*
* @param config Pointer to configuration data structure to populate
* @param buffer_size Buffer size to generate configuration for
*
* @return 0 if successful, -1 if an error occurred. Use #hashtable_error_message
* to get an error message.
*/
int hashtable_default_config(hashtable_config_t *config, size_t buffer_size);
/**
* Return a pointer to the last stored error message. When any hashtable function
* returns -1 to indicate an error, you can call this function to get a pointer to
* the corresponding error message string. If no error has occurred then this
* function will return a pointer to an empty string.
*
* @return Pointer to error message string
*/
char *hashtable_error_message(void);
/**
* Private definitions-- not strictly needed in the public API, but required for
* the #HASHTABLE_MIN_BUFFER_SIZE macro definition.
*/
#ifdef HASHTABLE_PACKED_STRUCT
#define _HASHTABLE_PACKED __attribute__((packed))
#else
#define _HASHTABLE_PACKED
#endif // HASHTABLE_PACKED_STRUCT
/**
* Represents a single key/value pair stored in the data block area of a table instance.
* Also represents a single node in a singly-linked list of key/value pairs.
*/
typedef struct _keyval_pair
{
struct _keyval_pair *next; ///< Pointer to next key/val pair in the list
hashtable_size_t key_size; ///< Size of key data in bytes
hashtable_size_t value_size; ///< Size of value data in bytes
uint8_t data[]; ///< Start of key + value data packed together
} _HASHTABLE_PACKED _keyval_pair_t;
/**
* Represents a singly-linked list of key/value pairs
*/
typedef struct
{
_keyval_pair_t *head; ///< Head (first) item
_keyval_pair_t *tail; ///< Tail (last) item
} _keyval_pair_list_t;
/**
* Represents the area where key/val pair data is stored
*/
typedef struct
{
_keyval_pair_list_t freelist; ///< List of freed key/value pairs
size_t total_bytes; ///< Total bytes available for key/value pair data
size_t bytes_used; ///< Total bytes used (including freed) by key/value pair data
uint8_t data[]; ///< Pointer to key/value data section, size not known at compile time
} _keyval_pair_data_block_t;
/**
* Represents a table of singly-linked lists of key-value pair
*/
typedef struct
{
uint32_t array_count; ///< Number of _keyval_pair_list_t slots in the array
_keyval_pair_list_t table[]; ///< Pointer to first slot in table
} _keyval_pair_list_table_t;
/**
* First section in the table->table_data field, holds some misc. housekeeping data.
*
* table->table_data layout/format:
*
* The table->table_data field points to the buffer area passed to 'hashtable_create',
* and contains the following data:
*
* +-----------------------------+ <--- Lowest address of table->table_data
* | |
* | _keyval_pair_table_data_t |
* | |
* +-----------------------------+
* | |
* | _keyval_pair_list_table_t |
* | |
* +-----------------------------+
* | |
* | _keyval_pair_list_t table[] |
* | array items |
* | |
* +-----------------------------+
* | |
* | _keyval_pair_data_block_t |
* | |
* +-----------------------------+
* | |
* | data block data[] section |
* | |
* +-----------------------------+
*/
typedef struct
{
_keyval_pair_list_table_t *list_table; ///< Convenience pointer to table array
_keyval_pair_data_block_t *data_block; ///< Convenience pointer to key/val data block
uint32_t cursor_array_index; ///< Cursor current table index for iteration
uint32_t cursor_items_traversed; ///< Number of items traversed by cursor
_keyval_pair_t *cursor_item; ///< Cursor current item pointer for iteration
uint8_t cursor_limit; ///< Set to 1 when all items have been iterated through
} _keyval_pair_table_data_t;
#endif // HASHTABLE_API_H