Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use FNV1a for string hashing #1806

Merged
merged 1 commit into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ shlib-exports-*.txt
/test/test_index
/test/test_introspection
/test/test_kfunc
/test/test_khash
/test/test_kstring
/test/test_mod
/test/test_nibbles
Expand Down
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ BUILT_TEST_PROGRAMS = \
test/test_expr \
test/test_faidx \
test/test_kfunc \
test/test_khash \
test/test_kstring \
test/test_mod \
test/test_nibbles \
Expand Down Expand Up @@ -605,6 +606,7 @@ check test: all $(HTSCODECS_TEST_TARGETS)
test/hts_endian
test/test_expr
test/test_kfunc
test/test_khash
test/test_kstring
test/test_nibbles -v
test/test_str2int
Expand Down Expand Up @@ -669,6 +671,9 @@ test/test_faidx: test/test_faidx.o libhts.a
test/test_kfunc: test/test_kfunc.o libhts.a
$(CC) $(LDFLAGS) -o $@ test/test_kfunc.o libhts.a -lz $(LIBS) -lpthread

test/test_khash: test/test_khash.o libhts.a
$(CC) $(LDFLAGS) -o $@ test/test_khash.o libhts.a -lz $(LIBS) -lpthread

test/test_kstring: test/test_kstring.o libhts.a
$(CC) $(LDFLAGS) -o $@ test/test_kstring.o libhts.a -lz $(LIBS) -lpthread

Expand Down Expand Up @@ -778,6 +783,7 @@ test/sam.o: test/sam.c config.h $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_fa
test/test_bgzf.o: test/test_bgzf.c config.h $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_hts_log_h) $(hfile_internal_h)
test/test_expr.o: test/test_expr.c config.h $(htslib_hts_expr_h)
test/test_kfunc.o: test/test_kfunc.c config.h $(htslib_kfunc_h)
test/test_khash.o: test/test_khash.c config.h $(htslib_khash_h) $(htslib_kroundup_h)
test/test_kstring.o: test/test_kstring.c config.h $(htslib_kstring_h)
test/test_mod.o: test/test_mod.c config.h $(htslib_sam_h)
test/test_nibbles.o: test/test_nibbles.c config.h $(htslib_sam_h) $(sam_internal_h)
Expand Down
87 changes: 83 additions & 4 deletions htslib/khash.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/* The MIT License

Copyright (c) 2008, 2009, 2011 by Attractive Chaos <[email protected]>
Copyright (C) 2014-2015, 2018 Genome Research Ltd.
Copyright (C) 2014-2015, 2018, 2024 Genome Research Ltd.

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
Expand Down Expand Up @@ -356,7 +356,39 @@ static const double __ac_HASH_UPPER = 0.77;
__ac_set_isdel_true(h->flags, x); \
--h->size; \
} \
}
} \
SCOPE int kh_stats_##name(kh_##name##_t *h, khint_t *empty, \
khint_t *deleted, khint_t *hist_size, \
khint_t **hist_out) \
{ \
khint_t i, *hist = NULL, dist_max = 0, k, dist, step; \
khint_t mask = h->n_buckets - 1; \
*empty = *deleted = *hist_size = 0; \
hist = (khint_t *) calloc(1, sizeof(*hist)); \
if (!hist) { return -1; } \
for (i = kh_begin(h); i < kh_end(h); ++i) { \
if (__ac_isempty(h->flags, i)) { (*empty)++; continue; } \
if (__ac_isdel(h->flags, i)) { (*deleted)++; continue; } \
k = __hash_func(h->keys[i]) & (h->n_buckets - 1); \
dist = 0; \
step = 0; \
while (k != i) { \
dist++; \
k = (k + (++step)) & mask; \
} \
if (dist_max <= dist) { \
khint_t *new_hist = (khint_t *) realloc(hist, sizeof(*new_hist) * (dist + 1)); \
if (!new_hist) { free(hist); return -1; } \
for (k = dist_max + 1; k <= dist; k++) new_hist[k] = 0; \
hist = new_hist; \
dist_max = dist; \
} \
hist[dist]++; \
} \
*hist_out = hist; \
*hist_size = dist_max + 1; \
return 0; \
}

#define KHASH_DECLARE(name, khkey_t, khval_t) \
__KHASH_TYPE(name, khkey_t, khval_t) \
Expand Down Expand Up @@ -391,6 +423,7 @@ static const double __ac_HASH_UPPER = 0.77;
@abstract 64-bit integer comparison function
*/
#define kh_int64_hash_equal(a, b) ((a) == (b))

/*! @function
@abstract const char* hash function
@param s Pointer to a null terminated string
Expand All @@ -402,12 +435,28 @@ static kh_inline khint_t __ac_X31_hash_string(const char *s)
if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s;
return h;
}

/*! @function
@abstract const char* FNV1a hash function
@param s Pointer to a null terminated string
@return The hash value
*/
static kh_inline khint_t __ac_FNV1a_hash_string(const char *s)
{
const khint_t offset_basis = 2166136261;
const khint_t FNV_prime = 16777619;
khint_t h = offset_basis;
for (; *s; ++s) h = (h ^ (uint8_t) *s) * FNV_prime;
return h;
}

/*! @function
@abstract Another interface to const char* hash function
@param key Pointer to a nul terminated string [const char*]
@return The hash value [khint_t]
*/
#define kh_str_hash_func(key) __ac_X31_hash_string(key)
#define kh_str_hash_func(key) __ac_FNV1a_hash_string(key)

/*! @function
@abstract Const char* comparison function
*/
Expand All @@ -426,12 +475,29 @@ static kh_inline khint_t __ac_X31_hash_kstring(const kstring_t ks)
h = (h << 5) - h + (khint_t)ks.s[i];
return h;
}

/*! @function
@abstract Kstring hash function
@param s Pointer to a kstring
@return The hash value
*/
static kh_inline khint_t __ac_FNV1a_hash_kstring(const kstring_t ks)
{
const khint_t offset_basis = 2166136261;
const khint_t FNV_prime = 16777619;
khint_t h = offset_basis;
size_t i;
for (i = 0; i < ks.l; i++)
h = (h ^ (uint8_t) ks.s[i]) * FNV_prime;
return h;
}

/*! @function
@abstract Interface to kstring hash function.
@param key Pointer to a khash; permits hashing on non-nul terminated strings.
@return The hash value [khint_t]
*/
#define kh_kstr_hash_func(key) __ac_X31_hash_kstring(key)
#define kh_kstr_hash_func(key) __ac_FNV1a_hash_kstring(key)
/*! @function
@abstract kstring comparison function
*/
Expand Down Expand Up @@ -604,6 +670,19 @@ static kh_inline khint_t __ac_Wang_hash(khint_t key)
code; \
} }

/*! @function
@abstract Gather hash table statistics
@param name Name of the hash table [symbol]
@param h Pointer to the hash table [khash_t(name)*]
@param empty[out] Number of empty hash bins
@param deleted[out] Number of hash bins with the deleted flag
@param hist_size[out] Size of @p hist array
@param hist[out] Probe count histogram
@return 0 on success; -1 on failure
*/
#define kh_stats(name, h, empty, deleted, hist_size, hist) \
kh_stats_##name(h, empty, deleted, hist_size, hist)

/* More convenient interfaces */

/*! @function
Expand Down
Loading