Skip to content

Commit

Permalink
gcs: add package for building and using Golomb-coded set filters
Browse files Browse the repository at this point in the history
  • Loading branch information
aakselrod authored and Roasbeef committed May 16, 2018
1 parent 06f32ab commit 3069dcf
Show file tree
Hide file tree
Showing 7 changed files with 616 additions and 1 deletion.
3 changes: 2 additions & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
ISC License

Copyright (c) 2013-2016 The btcsuite developers
Copyright (c) 2013-2017 The btcsuite developers
Copyright (c) 2016-2017 The Lightning Network Developers

Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
Expand Down
24 changes: 24 additions & 0 deletions gcs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
base58
==========

[![Build Status](http://img.shields.io/travis/btcsuite/btcutil.svg)]
(https://travis-ci.org/btcsuite/btcutil) [![ISC License]
(http://img.shields.io/badge/license-ISC-blue.svg)](http://copyfree.org)
[![GoDoc](https://godoc.org/github.com/btcsuite/btcutil/gcs?status.png)]
(http://godoc.org/github.com/btcsuite/btcutil/gcs)

Package gcs provides an API for building and using a Golomb-coded set filter
similar to that described [here].(http://giovanni.bajo.it/post/47119962313/golomb-coded-sets-smaller-than-bloom-filters)

A comprehensive suite of tests is provided to ensure proper functionality.

## Installation and Updating

```bash
$ go get -u github.com/btcsuite/btcutil/gcs
```

## License

Package gcs is licensed under the [copyfree](http://copyfree.org) ISC
License.
24 changes: 24 additions & 0 deletions gcs/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright (c) 2016-2017 The btcsuite developers
// Copyright (c) 2016-2017 The Lightning Network Developers
// Use of this source code is governed by an ISC
// license that can be found in the LICENSE file.

/*
Package gcs provides an API for building and using a Golomb-coded set filter.
Golomb-Coded Set
A Golomb-coded set is a probabilistic data structure used similarly to a Bloom
filter. A filter uses constant-size overhead plus on average n+2 bits per
item added to the filter, where 2^-1 is the desired false positive (collision)
probability.
GCS use in Bitcoin
GCS filters are a proposed mechanism for storing and transmitting per-block
filters in Bitcoin. The usage is intended to be the inverse of Bloom filters:
a full node would send an SPV node the GCS filter for a block, which the SPV
node would check against its list of relevant items. The suggested collision
probability for Bitcoin use is 2^-20.
*/
package gcs
280 changes: 280 additions & 0 deletions gcs/gcs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
// Copyright (c) 2016-2017 The btcsuite developers
// Copyright (c) 2016-2017 The Lightning Network Developers
// Use of this source code is governed by an ISC
// license that can be found in the LICENSE file.

package gcs

import (
"fmt"
"io"
"sort"

"github.com/aead/siphash"
"github.com/kkdai/bstream"
)

// Inspired by https://github.com/rasky/gcs

var (
// ErrNTooBig signifies that the filter can't handle N items.
ErrNTooBig = fmt.Errorf("N is too big to fit in uint32")

// ErrPTooBig signifies that the filter can't handle `1/2**P`
// collision probability.
ErrPTooBig = fmt.Errorf("P is too big to fit in uint32")

// ErrNoData signifies that an empty slice was passed.
ErrNoData = fmt.Errorf("No data provided")
)

const (
//KeySize is the size of the byte array required for key material for
// the SipHash keyed hash function.
KeySize = 16
)

// gcsFilter describes an immutable filter that can be built from
// a set of data elements, serialized, deserialized, and queried
// in a thread-safe manner. The serialized form is compressed as
// a Golomb Coded Set (GCS), but does not include N or P to allow
// the user to encode the metadata separately if necessary. The
// hash function used is SipHash, a keyed function; the key used
// in building the filter is required in order to match filter
// values and is not included in the serialized form.
type gcsFilter struct {
n uint32
p uint8
modulusP uint64
modulusNP uint64
filterData []byte
}

// BuildGCSFilter builds a new GCS filter with the collision probability of
// `1/(2**P)`, key `key`, and including every `[]byte` in `data` as a member of
// the set.
func BuildGCSFilter(P uint8, key [KeySize]byte,
data [][]byte) (*gcsFilter, error) {

// Some initial parameter checks: make sure we have data from which to
// build the filter, and make sure our parameters will fit the hash
// function we're using.
if len(data) == 0 {
return nil, ErrNoData
}
if len(data) > ((1 << 32) - 1) {
return nil, ErrNTooBig
}
if P > 32 {
return nil, ErrPTooBig
}

// Create the filter object and insert metadata.
f := gcsFilter{
n: uint32(len(data)),
p: P,
}
f.modulusP = uint64(1 << f.p)
f.modulusNP = uint64(f.n) * f.modulusP

// Build the filter.
var values uint64Slice
b := bstream.NewBStreamWriter(0)

// Insert the hash (modulo N*P) of each data element into a slice and
// sort the slice.
for _, d := range data {
v := siphash.Sum64(d, &key) % f.modulusNP
values = append(values, v)
}
sort.Sort(values)

// Write the sorted list of values into the filter bitstream,
// compressing it using Golomb coding.
var value, lastValue, remainder uint64
for _, v := range values {
// Calculate the difference between this value and the last,
// modulo P.
remainder = (v - lastValue) % f.modulusP
// Calculate the difference between this value and the last,
// divided by P.
value = (v - lastValue - remainder) / f.modulusP
lastValue = v
// Write the P multiple into the bitstream in unary; the
// average should be around 1 (2 bits - 0b10).
for value > 0 {
b.WriteBit(true)
value--
}
b.WriteBit(false)
// Write the remainder as a big-endian integer with enough bits
// to represent the appropriate collision probability.
b.WriteBits(remainder, int(f.p))
}

// Copy the bitstream into the filter object and return the object.
f.filterData = b.Bytes()
return &f, nil
}

// FromBytes deserializes a GCS filter from a known N, P, and serialized
// filter as returned by Bytes().
func FromBytes(N uint32, P uint8, d []byte) (*gcsFilter, error) {

// Basic sanity check.
if P > 32 {
return nil, ErrPTooBig
}

// Create the filter object and insert metadata.
f := &gcsFilter{
n: N,
p: P,
}
f.modulusP = uint64(1 << f.p)
f.modulusNP = uint64(f.n) * f.modulusP

// Copy the filter.
f.filterData = make([]byte, len(d))
copy(f.filterData, d)
return f, nil
}

// Bytes returns the serialized format of the GCS filter, which does not
// include N or P (returned by separate methods) or the key used by SipHash.
func (f *gcsFilter) Bytes() []byte {
filterData := make([]byte, len(f.filterData))
copy(filterData, f.filterData)
return filterData
}

// P returns the filter's collision probability as a negative power of 2 (that
// is, a collision probability of `1/2**20` is represented as 20).
func (f *gcsFilter) P() uint8 {
return f.p
}

// N returns the size of the data set used to build the filter.
func (f *gcsFilter) N() uint32 {
return f.n
}

// Match checks whether a []byte value is likely (within collision
// probability) to be a member of the set represented by the filter.
func (f *gcsFilter) Match(key [KeySize]byte, data []byte) (bool, error) {

// Create a filter bitstream.
filterData := f.Bytes()
b := bstream.NewBStreamReader(filterData)

// Hash our search term with the same parameters as the filter.
term := siphash.Sum64(data, &key) % f.modulusNP

// Go through the search filter and look for the desired value.
var lastValue uint64
for lastValue < term {
// Read the difference between previous and new value
// from bitstream.
value, err := f.readFullUint64(b)
if err != nil {
if err == io.EOF {
return false, nil
}
return false, err
}
// Add the previous value to it.
value += lastValue
if value == term {
return true, nil
}
lastValue = value
}
return false, nil
}

// MatchAny returns checks whether any []byte value is likely (within
// collision probability) to be a member of the set represented by the
// filter faster than calling Match() for each value individually.
func (f *gcsFilter) MatchAny(key [KeySize]byte, data [][]byte) (bool, error) {

// Basic sanity check.
if len(data) == 0 {
return false, ErrNoData
}

// Create a filter bitstream.
filterData := f.Bytes()
b := bstream.NewBStreamReader(filterData)

// Create an uncompressed filter of the search values.
var values uint64Slice
for _, d := range data {
v := siphash.Sum64(d, &key) % f.modulusNP
values = append(values, v)
}
sort.Sort(values)

// Zip down the filters, comparing values until we either run out of
// values to compare in one of the filters or we reach a matching value.
var lastValue1, lastValue2 uint64
lastValue2 = values[0]
i := 1
for lastValue1 != lastValue2 {
// Check which filter to advance to make sure we're comparing
// the right values.
switch {
case lastValue1 > lastValue2:
// Advance filter created from search terms or return
// false if we're at the end because nothing matched.
if i < len(values) {
lastValue2 = values[i]
i++
} else {
return false, nil
}
case lastValue2 > lastValue1:
// Advance filter we're searching or return false if
// we're at the end because nothing matched.
value, err := f.readFullUint64(b)
if err != nil {
if err == io.EOF {
return false, nil
}
return false, err
}
lastValue1 += value
}
}
// If we've made it this far, an element matched between filters so
// we return true.
return true, nil
}

// readFullUint64 reads a value represented by the sum of a unary multiple
// of the filter's P modulus (`2**P`) and a big-endian P-bit remainder.
func (f *gcsFilter) readFullUint64(b *bstream.BStream) (uint64, error) {
var v uint64

// Count the 1s until we reach a 0.
c, err := b.ReadBit()
if err != nil {
return 0, err
}
for c == true {
v++
c, err = b.ReadBit()
if err != nil {
return 0, err
}
}

// Read P bits.
remainder, err := b.ReadBits(int(f.p))
if err != nil {
return 0, err
}

// Add the multiple and the remainder.
v = v*f.modulusP + remainder
return v, nil
}
Loading

0 comments on commit 3069dcf

Please sign in to comment.