This implements new version 2 filters which have 4 changes as compared to version 1 filters: - Support for independently specifying the false positive rate and Golomb coding bin size which allows minimizing the filter size - A faster (incompatible with version 1) reduction function - A more compact serialization for the number of members in the set - Deduplication of all hash collisions prior to reducing and serializing the deltas In addition, it adds a full set of tests and updates the benchmarks to use the new version 2 filters. The primary motivating factor for these changes is the ability to minimize the size of the filters, however, the following is a before and after comparison of version 1 and 2 filters in terms of performance and allocations. It is interesting to note the results for attempting to match a single item is not very representative due to the fact the actual hash value itself dominates to the point it can significantly vary due to the very low ns timings involved. Those differences average out when matching multiple items, which is the much more realistic scenario, and the performance increase is in line with the expected values. It is also worth nothing that filter construction now takes a bit longer due to the additional deduplication step. While the performance numbers for filter construction are about 25% larger in relative terms, it is only a few ms difference in practice and therefore is an acceptable trade off for the size savings provided. benchmark old ns/op new ns/op delta ----------------------------------------------------------------- BenchmarkFilterBuild50000 16194920 20279043 +25.22% BenchmarkFilterBuild100000 32609930 41629998 +27.66% BenchmarkFilterMatch 620 593 -4.35% BenchmarkFilterMatchAny 2687 2302 -14.33% benchmark old allocs new allocs delta ----------------------------------------------------------------- BenchmarkFilterBuild50000 6 17 +183.33% BenchmarkFilterBuild100000 6 18 +200.00% BenchmarkFilterMatch 0 0 +0.00% BenchmarkFilterMatchAny 0 0 +0.00% benchmark old bytes new bytes delta ----------------------------------------------------------------- BenchmarkFilterBuild50000 688366 2074653 +201.39% BenchmarkFilterBuild100000 1360064 4132627 +203.86% BenchmarkFilterMatch 0 0 +0.00% BenchmarkFilterMatchAny 0 0 +0.00%
162 lines
4.2 KiB
Go
162 lines
4.2 KiB
Go
// Copyright (c) 2018-2019 The Decred developers
|
|
// Use of this source code is governed by an ISC
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package gcs
|
|
|
|
import (
|
|
"math/rand"
|
|
"testing"
|
|
|
|
"github.com/decred/dcrd/chaincfg/chainhash"
|
|
)
|
|
|
|
var (
|
|
// globalMatch is used to ensure the benchmarks do not elide code.
|
|
globalMatch bool
|
|
|
|
// globalHashResult is used to ensure the benchmarks do not elide code.
|
|
globalHashResult chainhash.Hash
|
|
)
|
|
|
|
const (
|
|
// benchB is used as the B parameter when creating new filters throughout
|
|
// the benchmarks.
|
|
benchB = uint8(19)
|
|
|
|
// benchM is used as the M parameter when creating new filters throughout
|
|
// the benchmarks.
|
|
benchM = 784931
|
|
)
|
|
|
|
// genFilterElements generates the given number of elements using the provided
|
|
// prng. This allows a prng with a fixed seed to be provided so the same values
|
|
// are produced for each benchmark iteration.
|
|
func genFilterElements(numElements uint, prng *rand.Rand) ([][]byte, error) {
|
|
result := make([][]byte, numElements)
|
|
for i := uint(0); i < numElements; i++ {
|
|
randElem := make([]byte, 32)
|
|
if _, err := prng.Read(randElem); err != nil {
|
|
return nil, err
|
|
}
|
|
result[i] = randElem
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// BenchmarkFilterBuild benchmarks building a filter with 50000 elements.
|
|
func BenchmarkFilterBuild50000(b *testing.B) {
|
|
// Use a fixed prng seed for stable benchmarks.
|
|
prng := rand.New(rand.NewSource(0))
|
|
contents, err := genFilterElements(50000, prng)
|
|
if err != nil {
|
|
b.Fatalf("unable to generate random item: %v", err)
|
|
}
|
|
|
|
b.ReportAllocs()
|
|
b.ResetTimer()
|
|
var key [KeySize]byte
|
|
for i := 0; i < b.N; i++ {
|
|
_, err := NewFilterV2(benchB, benchM, key, contents)
|
|
if err != nil {
|
|
b.Fatalf("unable to generate filter: %v", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// BenchmarkFilterBuild benchmarks building a filter with 100000 elements.
|
|
func BenchmarkFilterBuild100000(b *testing.B) {
|
|
// Use a fixed prng seed for stable benchmarks.
|
|
prng := rand.New(rand.NewSource(0))
|
|
contents, err := genFilterElements(100000, prng)
|
|
if err != nil {
|
|
b.Fatalf("unable to generate random item: %v", err)
|
|
}
|
|
|
|
b.ReportAllocs()
|
|
b.ResetTimer()
|
|
var key [KeySize]byte
|
|
for i := 0; i < b.N; i++ {
|
|
_, err := NewFilterV2(benchB, benchM, key, contents)
|
|
if err != nil {
|
|
b.Fatalf("unable to generate filter: %v", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// BenchmarkFilterMatch benchmarks querying a filter for a single value.
|
|
func BenchmarkFilterMatch(b *testing.B) {
|
|
// Use a fixed prng seed for stable benchmarks.
|
|
prng := rand.New(rand.NewSource(0))
|
|
contents, err := genFilterElements(20, prng)
|
|
if err != nil {
|
|
b.Fatalf("unable to generate random item: %v", err)
|
|
}
|
|
|
|
var key [KeySize]byte
|
|
filter, err := NewFilterV2(benchB, benchM, key, contents)
|
|
if err != nil {
|
|
b.Fatalf("Failed to build filter")
|
|
}
|
|
|
|
b.ReportAllocs()
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
globalMatch = filter.Match(key, []byte("Something"))
|
|
globalMatch = filter.Match(key, []byte("Nates"))
|
|
}
|
|
}
|
|
|
|
// BenchmarkFilterMatchAny benchmarks querying a filter for a list of values.
|
|
func BenchmarkFilterMatchAny(b *testing.B) {
|
|
// Generate elements for filter.
|
|
prng1 := rand.New(rand.NewSource(0))
|
|
contents, err := genFilterElements(20, prng1)
|
|
if err != nil {
|
|
b.Fatalf("unable to generate random item: %v", err)
|
|
}
|
|
|
|
// Generate matches using a separate prng seed so they're very likely all
|
|
// misses.
|
|
prng2 := rand.New(rand.NewSource(1))
|
|
matchList, err := genFilterElements(20, prng2)
|
|
if err != nil {
|
|
b.Fatalf("unable to generate random item: %v", err)
|
|
}
|
|
|
|
var key [KeySize]byte
|
|
filter, err := NewFilterV2(benchB, benchM, key, contents)
|
|
if err != nil {
|
|
b.Fatalf("Failed to build filter")
|
|
}
|
|
|
|
b.ReportAllocs()
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
globalMatch = filter.MatchAny(key, matchList)
|
|
}
|
|
}
|
|
|
|
// BenchmarkHash benchmarks performance of hashing a filter.
|
|
func BenchmarkHash(b *testing.B) {
|
|
// Use a fixed prng seed for stable benchmarks.
|
|
prng := rand.New(rand.NewSource(0))
|
|
contents, err := genFilterElements(100, prng)
|
|
if err != nil {
|
|
b.Fatalf("unable to generate random item: %v", err)
|
|
}
|
|
|
|
var key [KeySize]byte
|
|
filter, err := NewFilterV2(benchB, benchM, key, contents)
|
|
if err != nil {
|
|
b.Fatalf("Failed to build filter")
|
|
}
|
|
|
|
b.ReportAllocs()
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
globalHashResult = filter.Hash()
|
|
}
|
|
}
|