Skip to content

Commit

Permalink
feat: added hashtables
Browse files Browse the repository at this point in the history
  • Loading branch information
HotPotatoC committed Mar 22, 2022
1 parent 103c41f commit 3e49c46
Show file tree
Hide file tree
Showing 9 changed files with 772 additions and 1 deletion.
5 changes: 4 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@ module github.com/HotPotatoC/sture

go 1.18

require golang.org/x/exp v0.0.0-20220318154914-8dddf5d87bd8 // indirect
require (
github.com/cespare/xxhash/v2 v2.1.2
golang.org/x/exp v0.0.0-20220318154914-8dddf5d87bd8
)
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cbYE=
github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
golang.org/x/exp v0.0.0-20220318154914-8dddf5d87bd8 h1:s/+U+w0teGzcoH2mdIlFQ6KfVKGaYpgyGdUefZrn9TU=
golang.org/x/exp v0.0.0-20220318154914-8dddf5d87bd8/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE=
210 changes: 210 additions & 0 deletions hashtable/chained_hashtable.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
package hashtable

const (
// load factor for resizing (cht stands for chained hash table to
// avoid redefining in hashtable.go)
chtLoadFactor = 0.85
)

// bucket is a bucket in a chained hash table.
type bucket[K comparable, V any] struct {
head *Entry[K, V] // head of the doubly linked list
tail *Entry[K, V] // tail of the doubly linked list
}

// ChainedHashTable is a chained hash table.
type ChainedHashTable[K comparable, V any] struct {
// cap is the capacity of the table.
cap int
// size is the number of entries in the table.
size int
// growAt this size (n buckets * load factor) the table will grow with
// a new capacity of cap * 2
growAt int
// shrinkAt this size (n buckets * (1 - load factor)) the table will shrink
// with a new capacity of cap / 2
shrinkAt int
// buckets is the array of buckets.
buckets []*bucket[K, V]
// hashFunc is the hashing function.
hashFunc HashFunction[K]
}

// NewChainedHashTable returns a new chained hash table.
func NewChainedHashTable[K comparable, V any](cap int, hashFunc ...HashFunction[K]) *ChainedHashTable[K, V] {
var hf HashFunction[K]

if hashFunc == nil {
// use default hash function if not specified by the caller
hf = DefaultHashFunction[K]
} else {
hf = hashFunc[0]
}

c := 8
for c < cap {
c *= 2
}

cht := &ChainedHashTable[K, V]{
buckets: make([]*bucket[K, V], c),
hashFunc: hf,
cap: c,
}

cht.growAt = int(float64(len(cht.buckets)) * chtLoadFactor)
cht.shrinkAt = int(float64(len(cht.buckets)) * (1 - chtLoadFactor))

return cht
}

// resize resizes the chained hash table.
func (cht *ChainedHashTable[K, V]) resize(newCap int) {
ht := NewChainedHashTable[K, V](newCap)

for _, bucket := range cht.buckets {
for bucket != nil && bucket.head != nil {
entry := bucket.head
bucket.head = entry.next
ht.set(entry.key, entry.value)
}
}

*cht = *ht
}

// Set sets the value of the key.
func (cht *ChainedHashTable[K, V]) Set(key K, value V) {
if cht.size >= cht.growAt {
cht.resize(len(cht.buckets) * 2)
}

cht.set(key, value)
}

// set sets the value of the key in the chained hash table.
func (cht *ChainedHashTable[K, V]) set(key K, value V) {
idx := cht.hashFunc(key, len(cht.buckets))
newEntry := NewEntry(key, value)

if cht.buckets[idx] == nil {
cht.buckets[idx] = &bucket[K, V]{
head: newEntry,
tail: newEntry,
}
cht.size++
return
}

cht.buckets[idx].tail.next = newEntry
newEntry.prev = cht.buckets[idx].tail
cht.buckets[idx].tail = newEntry
cht.size++
}

// Get gets the value of the key.
func (cht *ChainedHashTable[K, V]) Get(key K) (V, bool) {
return cht.get(key)
}

// get returns the value of the key in the chained hash table.
func (cht *ChainedHashTable[K, V]) get(key K) (V, bool) {
if cht.size == 0 {
return *new(V), false
}

idx := cht.hashFunc(key, len(cht.buckets))

if cht.buckets[idx] == nil {
return *new(V), false // *new(V) zero value of V
}

if cht.buckets[idx].head == nil || cht.buckets[idx].tail == nil {
return *new(V), false // *new(V) zero value of V
}

if cht.buckets[idx].head.key == key {
return cht.buckets[idx].head.value, true
}

if cht.buckets[idx].tail.key == key {
return cht.buckets[idx].tail.value, true
}

for curr := cht.buckets[idx].head; curr != nil; curr = curr.next {
if curr.key == key {
return curr.value, true
}
}

return *new(V), false
}

// Del deletes the key.
func (cht *ChainedHashTable[K, V]) Del(key K) bool {
if cht.size <= cht.shrinkAt {
cht.resize(len(cht.buckets) / 2)
}

return cht.del(key)
}

// del deletes the key from the chained hash table.
func (cht *ChainedHashTable[K, V]) del(key K) bool {
if cht.size == 0 {
return false
}

idx := cht.hashFunc(key, len(cht.buckets))

if cht.buckets[idx] == nil {
return false
}

if cht.buckets[idx].head == nil || cht.buckets[idx].tail == nil {
return false // *new(V) zero value of V
}

if cht.buckets[idx].head.key == key {
if cht.buckets[idx].head.next == nil {
cht.buckets[idx] = nil
cht.size--
return true
}
cht.buckets[idx].head = cht.buckets[idx].head.next
cht.buckets[idx].head.prev = nil
return true
}

if cht.buckets[idx].tail.key == key {
cht.buckets[idx].tail = cht.buckets[idx].tail.prev
cht.buckets[idx].tail.next = nil
return true
}

for curr := cht.buckets[idx].head; curr != nil; curr = curr.next {
if curr.key == key {
curr.prev.next = curr.next
curr.next.prev = curr.prev
return true
}
}

return false
}

// Exists returns true if the key exists.
func (cht *ChainedHashTable[K, V]) Exists(key K) bool {
_, ok := cht.get(key)
return ok
}

// Size returns the number of elements in the hash table.
func (cht *ChainedHashTable[K, V]) Size() int {
return cht.size
}

// Cap returns the capacity of the hash table.
func (cht *ChainedHashTable[K, V]) Cap() int {
return len(cht.buckets)
}
101 changes: 101 additions & 0 deletions hashtable/chained_hashtable_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
package hashtable_test

import (
"fmt"
"testing"

"github.com/HotPotatoC/sture/hashtable"
)

func TestChainedHashTable_SetGet(t *testing.T) {
ht := hashtable.NewChainedHashTable[string, string](8)

ht.Set("a", "A")
ht.Set("b", "B")
ht.Set("c", "C")
ht.Set("d", "D")
ht.Set("e", "E")

if v, ok := ht.Get("a"); !ok || v != "A" {
t.Errorf("\nExpected: %s\nGot: %s", "A", v)
}

if v, ok := ht.Get("b"); !ok || v != "B" {
t.Errorf("\nExpected: %s\nGot: %s", "B", v)
}

if v, ok := ht.Get("c"); !ok || v != "C" {
t.Errorf("\nExpected: %s\nGot: %s", "C", v)
}

if v, ok := ht.Get("d"); !ok || v != "D" {
t.Errorf("\nExpected: %s\nGot: %s", "D", v)
}

if v, ok := ht.Get("e"); !ok || v != "E" {
t.Errorf("\nExpected: %s\nGot: %s", "E", v)
}

if v, ok := ht.Get("f"); ok {
t.Errorf("\nExpected: %s\nGot: %s", "", v)
}
}

func TestChainedHashTable_Del(t *testing.T) {
ht := hashtable.NewChainedHashTable[string, string](8)

ht.Set("a", "A")
ht.Set("b", "B")
ht.Set("c", "C")
ht.Set("d", "D")
ht.Set("e", "E")

ht.Del("a")
ht.Del("b")
ht.Del("c")
ht.Del("d")
ht.Del("e")

if v, ok := ht.Get("a"); ok {
t.Errorf("\nExpected: %s\nGot: %s", "", v)
}

if v, ok := ht.Get("b"); ok {
t.Errorf("\nExpected: %s\nGot: %s", "", v)
}

if v, ok := ht.Get("c"); ok {
t.Errorf("\nExpected: %s\nGot: %s", "", v)
}

if v, ok := ht.Get("d"); ok {
t.Errorf("\nExpected: %s\nGot: %s", "", v)
}

if v, ok := ht.Get("e"); ok {
t.Errorf("\nExpected: %s\nGot: %s", "", v)
}
}

func TestChainedHashTable_Set10m(t *testing.T) {
ht := hashtable.NewChainedHashTable[string, string](8)

for i := 0; i < 10000000; i++ {
ht.Set(fmt.Sprintf("k%d", i), fmt.Sprintf("v%d", i))
}

if ht.Size() != 10000000 {
t.Errorf("\nExpected size: %d\nGot: %d", 10000000, ht.Size())
}

failures := 0
for i := 0; i < 10000000; i++ {
if v, ok := ht.Get(fmt.Sprintf("k%d", i)); !ok || v != fmt.Sprintf("v%d", i) {
failures++
}
}

if failures > 0 {
t.Errorf("\nFailures: %d", failures)
}
}
38 changes: 38 additions & 0 deletions hashtable/entry.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package hashtable

// Entry is an entry in a hash table.
type Entry[K comparable, V any] struct {
key K
value V

next *Entry[K, V]
prev *Entry[K, V]
}

// NewEntry returns a new entry.
func NewEntry[K comparable, V any](key K, value V) *Entry[K, V] {
return &Entry[K, V]{
key: key,
value: value,
}
}

// Key returns the key of the entry.
func (e *Entry[K, V]) Key() K {
return e.key
}

// Value returns the value of the entry.
func (e *Entry[K, V]) Value() V {
return e.value
}

// Next returns the next entry in the list.
func (e *Entry[K, V]) Next() *Entry[K, V] {
return e.next
}

// Prev returns the previous entry in the list.
func (e *Entry[K, V]) Prev() *Entry[K, V] {
return e.prev
}
26 changes: 26 additions & 0 deletions hashtable/hash.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package hashtable

import (
"unsafe"

"github.com/cespare/xxhash/v2"
)

// HashFunction is a hash function type.
type HashFunction[K comparable] func(key K, size int) uint64

// DefaultHashFunction is the default hash function (using xxhash).
func DefaultHashFunction[K comparable](key K, size int) uint64 {
// reference: https://github.com/tidwall/hashmap/blob/master/map.go#L46
var sKey string
switch any(key).(type) {
case string:
sKey = *(*string)(unsafe.Pointer(&key))
default:
sKey = *(*string)(unsafe.Pointer(&struct {
data unsafe.Pointer
len int
}{unsafe.Pointer(&key), int(unsafe.Sizeof(key))}))
}
return xxhash.Sum64String(sKey) % uint64(size)
}
Loading

0 comments on commit 3e49c46

Please sign in to comment.