Skip to content

Commit 91421d7

Browse files
committed
gopls/internal/cache: share type checking with analysis
Consolidate the logic of type checking and analysis, for the following benefits: - Simplify by eliminating redundant logic to type check and import packages in the analysis driver. - Reduce work by reusing type checked packages in analysis. By the time we run analysis on open packages, we likely have type checked the packages already, so the work to type check inside the analysis driver is very much redundant. - Reduce work by reusing the package key from packageHandle (which we have gone to great pains to optimize). - Reduce work (and file cache space) by avoiding the need to store export data alongside analysis facts. - Leverage the precision of our reachability analysis by using a bloom filter of reachable packages to better filter facts. - This indirectly fixes golang/go#64227 and golang/go#64236, since the crashing logic is deleted. For golang/go#53275 Fixes golang/go#64227 Fixes golang/go#64236 Change-Id: I431b8da35b2dce7c63f56ec1a3727e0747b79740 Reviewed-on: https://go-review.googlesource.com/c/tools/+/622038 Reviewed-by: Alan Donovan <adonovan@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
1 parent 36684df commit 91421d7

File tree

5 files changed

+444
-523
lines changed

5 files changed

+444
-523
lines changed

gopls/internal/bloom/filter.go

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
// Copyright 2024 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package bloom
6+
7+
import (
8+
"hash/maphash"
9+
"math"
10+
)
11+
12+
// block is the element type of the filter bitfield.
13+
type block = uint8
14+
15+
const blockBits = 8
16+
17+
// Filter is a bloom filter for a set of strings.
18+
type Filter struct {
19+
seeds []maphash.Seed
20+
blocks []block
21+
}
22+
23+
// NewFilter constructs a new Filter with the given elements.
24+
func NewFilter(elems []string) *Filter {
25+
// Tolerate a 5% false positive rate.
26+
nblocks, nseeds := calibrate(0.05, len(elems))
27+
f := &Filter{
28+
blocks: make([]block, nblocks),
29+
seeds: make([]maphash.Seed, nseeds),
30+
}
31+
for i := range nseeds {
32+
f.seeds[i] = maphash.MakeSeed()
33+
}
34+
for _, elem := range elems {
35+
for _, seed := range f.seeds {
36+
index, bit := f.locate(seed, elem)
37+
f.blocks[index] |= bit
38+
}
39+
}
40+
return f
41+
}
42+
43+
// locate returns the block index and bit corresponding to the given hash seed and
44+
// string.
45+
func (f *Filter) locate(seed maphash.Seed, s string) (index int, bit block) {
46+
h := uint(maphash.String(seed, s))
47+
blk := h / blockBits % uint(len(f.blocks))
48+
bit = block(1 << (h % blockBits))
49+
return int(blk), bit
50+
}
51+
52+
func assert(cond bool, msg string) {
53+
if !cond {
54+
panic(msg)
55+
}
56+
}
57+
58+
// calibrate approximates the number of blocks and seeds to use for a bloom
59+
// filter with desired false positive rate fpRate, given n elements.
60+
func calibrate(fpRate float64, n int) (blocks, seeds int) {
61+
// We following the terms of https://en.wikipedia.org/wiki/Bloom_filter:
62+
// - k is the number of hash functions,
63+
// - m is the size of the bit field;
64+
// - n is the number of set bits.
65+
66+
assert(0 < fpRate && fpRate < 1, "invalid false positive rate")
67+
assert(n >= 0, "invalid set size")
68+
69+
if n == 0 {
70+
// degenerate case; use the simplest filter
71+
return 1, 1
72+
}
73+
74+
// Calibrate the number of blocks based on the optimal number of bits per
75+
// element. In this case we round up, as more bits leads to fewer false
76+
// positives.
77+
logFpRate := math.Log(fpRate) // reused for k below
78+
m := -(float64(n) * logFpRate) / (math.Ln2 * math.Ln2)
79+
blocks = int(m) / blockBits
80+
if float64(blocks*blockBits) < m {
81+
blocks += 1
82+
}
83+
84+
// Estimate the number of hash functions (=seeds). This is imprecise, not
85+
// least since the formula in the article above assumes that the number of
86+
// bits per element is not rounded.
87+
//
88+
// Here we round to the nearest integer (not unconditionally round up), since
89+
// more hash functions do not always lead to better results.
90+
k := -logFpRate / math.Ln2
91+
seeds = max(int(math.Round(k)), 1)
92+
93+
return blocks, seeds
94+
}
95+
96+
// MayContain reports whether the filter may contain s.
97+
func (f *Filter) MayContain(s string) bool {
98+
for _, seed := range f.seeds {
99+
index, bit := f.locate(seed, s)
100+
if f.blocks[index]&bit == 0 {
101+
return false
102+
}
103+
}
104+
return true
105+
}

gopls/internal/bloom/filter_test.go

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Copyright 2024 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package bloom
6+
7+
import (
8+
"math"
9+
"math/rand/v2"
10+
"testing"
11+
)
12+
13+
func TestFilter(t *testing.T) {
14+
elems := []string{
15+
"a", "apple", "b", "banana", "an arbitrarily long string", "", "世界",
16+
}
17+
18+
// First, sanity check that the filter contains all the given elements.
19+
f := NewFilter(elems)
20+
for _, elem := range elems {
21+
if got := f.MayContain(elem); !got {
22+
t.Errorf("MayContain(%q) = %t, want true", elem, got)
23+
}
24+
}
25+
26+
// Measure the false positives rate.
27+
//
28+
// Of course, we can't assert on the results, since they are probabilistic,
29+
// but this can be useful for interactive use.
30+
31+
fpRate := falsePositiveRate(len(f.blocks), len(f.seeds), len(elems))
32+
t.Logf("%d blocks, %d seeds, %.2g%% expected false positives", len(f.blocks), len(f.seeds), 100*fpRate)
33+
34+
// In practice, all positives below will be false, but be precise anyway.
35+
truePositive := make(map[string]bool)
36+
for _, e := range elems {
37+
truePositive[e] = true
38+
}
39+
40+
// Generate a large number of random strings to measure the false positive
41+
// rate.
42+
g := newStringGenerator()
43+
const samples = 1000
44+
falsePositives := 0
45+
for range samples {
46+
s := g.next()
47+
got := f.MayContain(s)
48+
if false {
49+
t.Logf("MayContain(%q) = %t", s, got)
50+
}
51+
if got && !truePositive[s] {
52+
falsePositives++
53+
}
54+
}
55+
t.Logf("false positives: %.1f%% (%d/%d)", 100*float64(falsePositives)/float64(samples), falsePositives, samples)
56+
}
57+
58+
// falsePositiveRate estimates the expected false positive rate for a filter
59+
// with the given number of blocks, seeds, and elements.
60+
func falsePositiveRate(block, seeds, elems int) float64 {
61+
k, m, n := float64(seeds), float64(block*blockBits), float64(elems)
62+
return math.Pow(1-math.Exp(-k*n/m), k)
63+
}
64+
65+
type stringGenerator struct {
66+
r *rand.Rand
67+
}
68+
69+
func newStringGenerator() *stringGenerator {
70+
return &stringGenerator{rand.New(rand.NewPCG(1, 2))}
71+
}
72+
73+
func (g *stringGenerator) next() string {
74+
l := g.r.IntN(50) // length
75+
var runes []rune
76+
for range l {
77+
runes = append(runes, rune(' '+rand.IntN('~'-' ')))
78+
}
79+
return string(runes)
80+
}
81+
82+
// TestDegenerateFilter checks that the degenerate filter with no elements
83+
// results in no false positives.
84+
func TestDegenerateFilter(t *testing.T) {
85+
f := NewFilter(nil)
86+
g := newStringGenerator()
87+
for range 100 {
88+
s := g.next()
89+
if f.MayContain(s) {
90+
t.Errorf("MayContain(%q) = true, want false", s)
91+
}
92+
}
93+
}

0 commit comments

Comments
 (0)