In the last post I described how linear hashing works. If you haven't read it yet, you should go read it first. I can wait, this post presents a new implementation which is a lot simpler than the disk based version referenced in the last post.

In the demonstration version (that would be this one) I represent the buckets as binary search trees. Find the exact code for this article in my goplay repository. An updated and slightly improved version is available in my data-structures repository along with several other interesting algorithms.

## Profiling Results

``````              iterations, ns per operation
BenchmarkGoMap    100000,      26292 ns/op -- native map
BenchmarkHash      20000,      79526 ns/op -- classical separate chaining
BenchmarkMLHash    20000,      80820 ns/op -- in memory LH
BenchmarkLHash       500,    5733882 ns/op -- disk based LH, LRU cached in memory
``````

## Structs

``````type bst struct {
hash int
key Hashable
value interface{}
left *bst
right *bst
}

type linearhash struct {
table []*bst
n uint
r uint
i uint
}
``````

## Bucket

``````func (self *linearhash) bucket(key Hashable) uint {
m := uint(key.Hash() & ((1<<self.i)-1))
if m < self.n {
return m
} else {
return m ^ (1<<(self.i-1))
}
}
``````

## Has, Put, Get and Remove

``````func (self *linearhash) Put(key Hashable, value interface{}) (err error) {
var updated bool
bkt_idx := self.bucket(key)
self.table[bkt_idx], updated = self.table[bkt_idx].Put(key, value)
if !updated {
self.r += 1
}
if float64(self.r) > UTILIZATION * float64(self.n) * float64(RECORDS_PER_BLOCK) {
return self.split()
}
return nil
}

func (self *linearhash) Get(key Hashable) (value interface{}, err error) {
bkt_idx := self.bucket(key)
return self.table[bkt_idx].Get(key)
}

func (self *linearhash) Has(key Hashable) (bool) {
bkt_idx := self.bucket(key)
return self.table[bkt_idx].Has(key)
}

func (self *linearhash) Remove(key Hashable) (value interface{}, err error) {
bkt_idx := self.bucket(key)
self.table[bkt_idx], value, err = self.table[bkt_idx].Remove(key)
if err == nil {
self.r -= 1
}
return
}
``````

## Split

``````func (self *linearhash) split() (err error) {
bkt_idx := self.n % (1 << (self.i - 1))
old_bkt := self.table[bkt_idx]
var bkt_a, bkt_b *bst
self.n += 1
if self.n > (1 << self.i) {
self.i += 1
}
for k, v, next := old_bkt.Iterate()(); next != nil; k, v, next = next() {
if self.bucket(k) == bkt_idx {
bkt_a, _ = bkt_a.Put(k, v)
} else {
bkt_b, _ = bkt_b.Put(k, v)
}
}
self.table[bkt_idx] = bkt_a
self.table = append(self.table, bkt_b)
return nil
}
``````

## Buckets

One cool thing about my BST implementation is it has a functional iterator. As you may know you can't make a generic generator function in Go without using a separate goroutine, which isn't really appropriate here. The `Iterate` function makes a `BSTIterator` which is a function which yields the current key, value pair and a function which provides the next item. You iterate by calling `next` over and over again until the function pointer is nil. I hadn't written an iterator like this before but I like the way it works so I will probably re-use this pattern for future Go iterators.

``````type BSTIterator func()(key Hashable, value interface{}, next BSTIterator)
func (self *bst) Iterate() BSTIterator {
pop := func(stack []*bst) ([]*bst, *bst) {
if len(stack) <= 0 {
return stack, nil
} else {
return stack[0:len(stack)-1], stack[len(stack)-1]
}
}
procnode := func(stack []*bst, node *bst) []*bst {
if node == nil {
return stack
}
if node.right != nil {
stack = append(stack, node.right)
}
if node.left != nil {
stack = append(stack, node.left)
}
return stack
}
var make_iterator func(stack []*bst) BSTIterator
make_iterator = func(stack []*bst) BSTIterator {
return func()(Hashable, interface{}, BSTIterator){

var node *bst
stack, node = pop(stack)
if node == nil {
return nil, nil, nil
}
stack = procnode(stack, node)
return node.key, node.value, make_iterator(stack)
}
}
return make_iterator([]*bst{self})
}
``````

### Standard Has, Get, Put, Remove, Size

``````func (self *bst) Has(key Hashable) (has bool) {
if self == nil {
return false
}
if self.key.Equals(key) {
return true
} else if key.Less(self.key) {
return self.left.Has(key)
} else {
return self.right.Has(key)
}
}

func (self *bst) Get(key Hashable) (value interface{}, err error) {
if self == nil {
return nil, Errors["not-found"]
}
if self.key.Equals(key) {
return self.value, nil
} else if key.Less(self.key) {
return self.left.Get(key)
} else {
return self.right.Get(key)
}
}

func (self *bst) Put(key Hashable, value interface{}) (_ *bst, updated bool) {
if self == nil {
return &bst{hash: key.Hash(), key: key, value: value}, false
}
if self.key.Equals(key) {
self.value = value
return self, true
}

if key.Less(self.key) {
self.left, updated = self.left.Put(key, value)
} else {
self.right, updated = self.right.Put(key, value)
}
return self, updated
}

func (self *bst) Remove(key Hashable) (_ *bst, value interface{}, err error) {
if self == nil {
return nil, nil, Errors["not-found"]
}

if self.key.Equals(key) {
if self.left != nil && self.right != nil {
if self.left.Size() < self.right.Size() {
lmd := self.right.lmd()
lmd.left = self.left
return self.right, self.value, nil
} else {
rmd := self.left.rmd()
rmd.right = self.right
return self.left, self.value, nil
}
} else if self.left == nil {
return self.right, self.value, nil
} else if self.right == nil {
return self.left, self.value, nil
} else {
return nil, self.value, nil
}
}
if key.Less(self.key) {
self.left, value, err = self.left.Remove(key)
} else {
self.right, value, err = self.right.Remove(key)
}
return self, value, err
}

func (self *bst) Size() int {
if self == nil {
return 0
}
return 1 + self.left.Size() + self.right.Size()
}
``````

### The _md, lmr, rmd

These implement "left most descendent" and "right most descendent". Used by remove to hook up the nodes correctly.

``````func (self *bst) _md(side func(*bst)*bst) (*bst) {
if self == nil {
return nil
} else if side(self) != nil {
return side(self)._md(side)
} else {
return self
}
}

func (self *bst) lmd() (*bst) {
return self._md(func(node *bst)*bst { return node.left })
}

func (self *bst) rmd() (*bst) {
return self._md(func(node *bst)*bst { return node.right })
}
``````