Skip to content

Commit 8e0ec5e

Browse files
beneschdvyukov
authored andcommitted
runtime: ensure m.p is never stale
When a goroutine enters a syscall, its M unwires from its P to allow the P to be retaken by another M if the syscall is slow. The M retains a reference to its old P, however, so that if its old P has not been retaken when the syscall returns, it can quickly reacquire that P. The implementation, however, was confusing, as it left the reference to the potentially-retaken P in m.p, which implied that the P was still wired. Make the code clearer by enforcing the invariant that m.p is never stale. entersyscall now moves m.p to m.oldp and sets m.p to 0; exitsyscall does the reverse, provided m.oldp has not been retaken. With this scheme in place, the issue described in #27660 (assertion failures in the race detector) would have resulted in a clean segfault instead of silently corrupting memory. Change-Id: Ib3e03623ebed4f410e852a716919fe4538858f0a Reviewed-on: https://go-review.googlesource.com/c/148899 Run-TryBot: Dmitry Vyukov <dvyukov@google.com> Reviewed-by: Dmitry Vyukov <dvyukov@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
1 parent e4c1fee commit 8e0ec5e

File tree

2 files changed

+24
-29
lines changed

2 files changed

+24
-29
lines changed

src/runtime/proc.go

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2828,8 +2828,11 @@ func reentersyscall(pc, sp uintptr) {
28282828
_g_.m.syscalltick = _g_.m.p.ptr().syscalltick
28292829
_g_.sysblocktraced = true
28302830
_g_.m.mcache = nil
2831-
_g_.m.p.ptr().m = 0
2832-
atomic.Store(&_g_.m.p.ptr().status, _Psyscall)
2831+
pp := _g_.m.p.ptr()
2832+
pp.m = 0
2833+
_g_.m.oldp.set(pp)
2834+
_g_.m.p = 0
2835+
atomic.Store(&pp.status, _Psyscall)
28332836
if sched.gcwaiting != 0 {
28342837
systemstack(entersyscall_gcwait)
28352838
save(pc, sp)
@@ -2855,7 +2858,7 @@ func entersyscall_sysmon() {
28552858

28562859
func entersyscall_gcwait() {
28572860
_g_ := getg()
2858-
_p_ := _g_.m.p.ptr()
2861+
_p_ := _g_.m.oldp.ptr()
28592862

28602863
lock(&sched.lock)
28612864
if sched.stopwait > 0 && atomic.Cas(&_p_.status, _Psyscall, _Pgcstop) {
@@ -2940,8 +2943,9 @@ func exitsyscall() {
29402943
}
29412944

29422945
_g_.waitsince = 0
2943-
oldp := _g_.m.p.ptr()
2944-
if exitsyscallfast() {
2946+
oldp := _g_.m.oldp.ptr()
2947+
_g_.m.oldp = 0
2948+
if exitsyscallfast(oldp) {
29452949
if _g_.m.mcache == nil {
29462950
throw("lost mcache")
29472951
}
@@ -3011,27 +3015,23 @@ func exitsyscall() {
30113015
}
30123016

30133017
//go:nosplit
3014-
func exitsyscallfast() bool {
3018+
func exitsyscallfast(oldp *p) bool {
30153019
_g_ := getg()
30163020

30173021
// Freezetheworld sets stopwait but does not retake P's.
30183022
if sched.stopwait == freezeStopWait {
3019-
_g_.m.mcache = nil
3020-
_g_.m.p = 0
30213023
return false
30223024
}
30233025

30243026
// Try to re-acquire the last P.
3025-
if _g_.m.p != 0 && _g_.m.p.ptr().status == _Psyscall && atomic.Cas(&_g_.m.p.ptr().status, _Psyscall, _Prunning) {
3027+
if oldp != nil && oldp.status == _Psyscall && atomic.Cas(&oldp.status, _Psyscall, _Pidle) {
30263028
// There's a cpu for us, so we can run.
3029+
wirep(oldp)
30273030
exitsyscallfast_reacquired()
30283031
return true
30293032
}
30303033

30313034
// Try to get any other idle P.
3032-
oldp := _g_.m.p.ptr()
3033-
_g_.m.mcache = nil
3034-
_g_.m.p = 0
30353035
if sched.pidle != 0 {
30363036
var ok bool
30373037
systemstack(func() {
@@ -3058,15 +3058,9 @@ func exitsyscallfast() bool {
30583058
// has successfully reacquired the P it was running on before the
30593059
// syscall.
30603060
//
3061-
// This function is allowed to have write barriers because exitsyscall
3062-
// has acquired a P at this point.
3063-
//
3064-
//go:yeswritebarrierrec
30653061
//go:nosplit
30663062
func exitsyscallfast_reacquired() {
30673063
_g_ := getg()
3068-
_g_.m.mcache = _g_.m.p.ptr().mcache
3069-
_g_.m.p.ptr().m.set(_g_.m)
30703064
if _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
30713065
if trace.enabled {
30723066
// The p was retaken and then enter into syscall again (since _g_.m.syscalltick has changed).
@@ -4081,11 +4075,9 @@ func procresize(nprocs int32) *p {
40814075
//go:yeswritebarrierrec
40824076
func acquirep(_p_ *p) {
40834077
// Do the part that isn't allowed to have write barriers.
4084-
acquirep1(_p_)
4078+
wirep(_p_)
40854079

4086-
// have p; write barriers now allowed
4087-
_g_ := getg()
4088-
_g_.m.mcache = _p_.mcache
4080+
// Have p; write barriers now allowed.
40894081

40904082
// Perform deferred mcache flush before this P can allocate
40914083
// from a potentially stale mcache.
@@ -4096,25 +4088,27 @@ func acquirep(_p_ *p) {
40964088
}
40974089
}
40984090

4099-
// acquirep1 is the first step of acquirep, which actually acquires
4100-
// _p_. This is broken out so we can disallow write barriers for this
4101-
// part, since we don't yet have a P.
4091+
// wirep is the first step of acquirep, which actually associates the
4092+
// current M to _p_. This is broken out so we can disallow write
4093+
// barriers for this part, since we don't yet have a P.
41024094
//
41034095
//go:nowritebarrierrec
4104-
func acquirep1(_p_ *p) {
4096+
//go:nosplit
4097+
func wirep(_p_ *p) {
41054098
_g_ := getg()
41064099

41074100
if _g_.m.p != 0 || _g_.m.mcache != nil {
4108-
throw("acquirep: already in go")
4101+
throw("wirep: already in go")
41094102
}
41104103
if _p_.m != 0 || _p_.status != _Pidle {
41114104
id := int64(0)
41124105
if _p_.m != 0 {
41134106
id = _p_.m.ptr().id
41144107
}
4115-
print("acquirep: p->m=", _p_.m, "(", id, ") p->status=", _p_.status, "\n")
4116-
throw("acquirep: invalid p state")
4108+
print("wirep: p->m=", _p_.m, "(", id, ") p->status=", _p_.status, "\n")
4109+
throw("wirep: invalid p state")
41174110
}
4111+
_g_.m.mcache = _p_.mcache
41184112
_g_.m.p.set(_p_)
41194113
_p_.m.set(_g_.m)
41204114
_p_.status = _Prunning

src/runtime/runtime2.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,7 @@ type m struct {
417417
caughtsig guintptr // goroutine running during fatal signal
418418
p puintptr // attached p for executing go code (nil if not executing go code)
419419
nextp puintptr
420+
oldp puintptr // the p that was attached before executing a syscall
420421
id int64
421422
mallocing int32
422423
throwing int32

0 commit comments

Comments
 (0)