Skip to content

Commit b3932ba

Browse files
committed
runtime: fix sudog leak
The SudoG used to sit on the stack, so it was cheap to allocated and didn't need to be cleaned up when finished. For the conversion to Go, we had to move sudog off the stack for a few reasons, so we added a cache of recently used sudogs to keep allocation cheap. But we didn't add any of the necessary cleanup before adding a SudoG to the new cache, and so the cached SudoGs had stale pointers inside them that have caused all sorts of awful, hard to debug problems. CL 155760043 made sure SudoG.elem is cleaned up. CL 150520043 made sure SudoG.selectdone is cleaned up. This CL makes sure SudoG.next, SudoG.prev, and SudoG.waitlink are cleaned up. I should have done this when I did the other two fields; instead I wasted a week tracking down a leak they caused. A dangling SudoG.waitlink can point into a sudogcache list that has been "forgotten" in order to let the GC collect it, but that dangling .waitlink keeps the list from being collected. And then the list holding the SudoG with the dangling waitlink can find itself in the same situation, and so on. We end up with lists of lists of unusable SudoGs that are still linked into the object graph and never collected (given the right mix of non-trivial selects and non-channel synchronization). More details in golang.org/issue/9110. Fixes #9110. LGTM=r R=r CC=dvyukov, golang-codereviews, iant, khr https://golang.org/cl/177870043
1 parent 6150414 commit b3932ba

File tree

6 files changed

+121
-0
lines changed

6 files changed

+121
-0
lines changed

src/runtime/chan.go

+1
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,7 @@ func (q *waitq) dequeue() *sudog {
630630
return nil
631631
}
632632
q.first = sgp.next
633+
sgp.next = nil
633634
if q.last == sgp {
634635
q.last = nil
635636
}

src/runtime/mgc0.go

+16
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,26 @@ func clearpools() {
5151
if c := p.mcache; c != nil {
5252
c.tiny = nil
5353
c.tinysize = 0
54+
55+
// disconnect cached list before dropping it on the floor,
56+
// so that a dangling ref to one entry does not pin all of them.
57+
var sg, sgnext *sudog
58+
for sg = c.sudogcache; sg != nil; sg = sgnext {
59+
sgnext = sg.next
60+
sg.next = nil
61+
}
5462
c.sudogcache = nil
5563
}
64+
5665
// clear defer pools
5766
for i := range p.deferpool {
67+
// disconnect cached list before dropping it on the floor,
68+
// so that a dangling ref to one entry does not pin all of them.
69+
var d, dlink *_defer
70+
for d = p.deferpool[i]; d != nil; d = dlink {
71+
dlink = d.link
72+
d.link = nil
73+
}
5874
p.deferpool[i] = nil
5975
}
6076
}

src/runtime/proc.go

+10
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ func acquireSudog() *sudog {
152152
gothrow("acquireSudog: found s.elem != nil in cache")
153153
}
154154
c.sudogcache = s.next
155+
s.next = nil
155156
return s
156157
}
157158

@@ -177,6 +178,15 @@ func releaseSudog(s *sudog) {
177178
if s.selectdone != nil {
178179
gothrow("runtime: sudog with non-nil selectdone")
179180
}
181+
if s.next != nil {
182+
gothrow("runtime: sudog with non-nil next")
183+
}
184+
if s.prev != nil {
185+
gothrow("runtime: sudog with non-nil prev")
186+
}
187+
if s.waitlink != nil {
188+
gothrow("runtime: sudog with non-nil waitlink")
189+
}
180190
gp := getg()
181191
if gp.param != nil {
182192
gothrow("runtime: releaseSudog with non-nil gp.param")

src/runtime/select.go

+2
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,7 @@ loop:
404404
}
405405
}
406406
sgnext = sglist.waitlink
407+
sglist.waitlink = nil
407408
releaseSudog(sglist)
408409
sglist = sgnext
409410
}
@@ -641,6 +642,7 @@ func (q *waitq) dequeueSudoG(s *sudog) {
641642
if q.last == sgp {
642643
q.last = prevsgp
643644
}
645+
s.next = nil
644646
return
645647
}
646648
l = &sgp.next

src/runtime/sema.go

+2
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ func syncsemacquire(s *syncSema) {
201201
}
202202
unlock(&s.lock)
203203
if wake != nil {
204+
wake.next = nil
204205
goready(wake.g)
205206
}
206207
} else {
@@ -242,6 +243,7 @@ func syncsemrelease(s *syncSema, n uint32) {
242243
if wake.releasetime != 0 {
243244
wake.releasetime = cputicks()
244245
}
246+
wake.next = nil
245247
goready(wake.g)
246248
n--
247249
}

test/fixedbugs/issue9110.go

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
// run
2+
3+
// Copyright 2014 The Go Authors. All rights reserved.
4+
// Use of this source code is governed by a BSD-style
5+
// license that can be found in the LICENSE file.
6+
7+
// Scenario that used to leak arbitrarily many SudoG structs.
8+
// See golang.org/issue/9110.
9+
10+
package main
11+
12+
import (
13+
"runtime"
14+
"runtime/debug"
15+
"sync"
16+
"time"
17+
)
18+
19+
func main() {
20+
debug.SetGCPercent(1000000) // only GC when we ask for GC
21+
22+
var stats, stats1, stats2 runtime.MemStats
23+
24+
release := func() {}
25+
for i := 0; i < 20; i++ {
26+
if i == 10 {
27+
// Should be warmed up by now.
28+
runtime.ReadMemStats(&stats1)
29+
}
30+
31+
c := make(chan int)
32+
for i := 0; i < 10; i++ {
33+
go func() {
34+
select {
35+
case <-c:
36+
case <-c:
37+
case <-c:
38+
}
39+
}()
40+
}
41+
time.Sleep(1 * time.Millisecond)
42+
release()
43+
44+
close(c) // let select put its sudog's into the cache
45+
time.Sleep(1 * time.Millisecond)
46+
47+
// pick up top sudog
48+
var cond1 sync.Cond
49+
var mu1 sync.Mutex
50+
cond1.L = &mu1
51+
go func() {
52+
mu1.Lock()
53+
cond1.Wait()
54+
mu1.Unlock()
55+
}()
56+
time.Sleep(1 * time.Millisecond)
57+
58+
// pick up next sudog
59+
var cond2 sync.Cond
60+
var mu2 sync.Mutex
61+
cond2.L = &mu2
62+
go func() {
63+
mu2.Lock()
64+
cond2.Wait()
65+
mu2.Unlock()
66+
}()
67+
time.Sleep(1 * time.Millisecond)
68+
69+
// put top sudog back
70+
cond1.Broadcast()
71+
time.Sleep(1 * time.Millisecond)
72+
73+
// drop cache on floor
74+
runtime.GC()
75+
76+
// release cond2 after select has gotten to run
77+
release = func() {
78+
cond2.Broadcast()
79+
time.Sleep(1 * time.Millisecond)
80+
}
81+
}
82+
83+
runtime.GC()
84+
85+
runtime.ReadMemStats(&stats2)
86+
87+
if int(stats2.HeapObjects)-int(stats1.HeapObjects) > 20 { // normally at most 1 or 2; was 300 with leak
88+
print("BUG: object leak: ", stats.HeapObjects, " -> ", stats1.HeapObjects, " -> ", stats2.HeapObjects, "\n")
89+
}
90+
}

0 commit comments

Comments
 (0)