Skip to content

Commit 07e738e

Browse files
committed
runtime: use only treaps for tracking spans
Currently, mheap tracks spans in both mSpanLists and mTreaps, but mSpanLists, while they tend to be smaller, complicate the implementation. Here we simplify the implementation by removing free and busy from mheap and renaming freelarge -> free and busylarge -> busy. This change also slightly changes the reclamation policy. Previously, for allocations under 1MB we would attempt to find a small span of the right size. Now, we just try to find any number of spans totaling the right size. This may increase heap fragmentation, but that will be dealt with using virtual memory tricks in follow-up CLs. For #14045. Garbage-heavy benchmarks show very little change, except what appears to be a decrease in STW times and peak RSS. name old STW-ns/GC new STW-ns/GC delta Garbage/benchmem-MB=64-8 263k ±64% 217k ±24% -17.66% (p=0.028 n=25+23) name old STW-ns/op new STW-ns/op delta Garbage/benchmem-MB=64-8 9.39k ±65% 7.80k ±24% -16.88% (p=0.037 n=25+23) name old peak-RSS-bytes new peak-RSS-bytes delta Garbage/benchmem-MB=64-8 281M ± 0% 249M ± 4% -11.40% (p=0.000 n=19+18) https://perf.golang.org/search?q=upload:20181005.1 Go1 benchmarks perform roughly the same, the most notable regression being the JSON encode/decode benchmark with worsens by ~2%. name old time/op new time/op delta BinaryTree17-8 3.02s ± 2% 2.99s ± 2% -1.18% (p=0.000 n=25+24) Fannkuch11-8 3.05s ± 1% 3.02s ± 2% -1.20% (p=0.000 n=25+25) FmtFprintfEmpty-8 43.6ns ± 5% 43.4ns ± 3% ~ (p=0.528 n=25+25) FmtFprintfString-8 74.9ns ± 3% 73.4ns ± 1% -2.03% (p=0.001 n=25+24) FmtFprintfInt-8 79.3ns ± 3% 77.9ns ± 1% -1.73% (p=0.003 n=25+25) FmtFprintfIntInt-8 119ns ± 6% 116ns ± 0% -2.68% (p=0.000 n=25+18) FmtFprintfPrefixedInt-8 134ns ± 4% 132ns ± 1% -1.52% (p=0.004 n=25+25) FmtFprintfFloat-8 240ns ± 1% 241ns ± 1% ~ (p=0.403 n=24+23) FmtManyArgs-8 543ns ± 1% 537ns ± 1% -1.00% (p=0.000 n=25+25) GobDecode-8 6.88ms ± 1% 6.92ms ± 4% ~ (p=0.088 n=24+22) GobEncode-8 5.92ms ± 1% 5.93ms ± 1% ~ (p=0.898 n=25+24) Gzip-8 267ms ± 2% 266ms ± 2% ~ (p=0.213 n=25+24) Gunzip-8 35.4ms ± 1% 35.6ms ± 1% +0.70% (p=0.000 n=25+25) HTTPClientServer-8 104µs ± 2% 104µs ± 2% ~ (p=0.686 n=25+25) JSONEncode-8 9.67ms ± 1% 9.80ms ± 4% +1.32% (p=0.000 n=25+25) JSONDecode-8 47.7ms ± 1% 48.8ms ± 5% +2.33% (p=0.000 n=25+25) Mandelbrot200-8 4.87ms ± 1% 4.91ms ± 1% +0.79% (p=0.000 n=25+25) GoParse-8 3.59ms ± 4% 3.55ms ± 1% ~ (p=0.199 n=25+24) RegexpMatchEasy0_32-8 90.3ns ± 1% 89.9ns ± 1% -0.47% (p=0.000 n=25+21) RegexpMatchEasy0_1K-8 204ns ± 1% 204ns ± 1% ~ (p=0.914 n=25+24) RegexpMatchEasy1_32-8 84.9ns ± 0% 84.6ns ± 1% -0.36% (p=0.000 n=24+25) RegexpMatchEasy1_1K-8 350ns ± 1% 348ns ± 3% -0.59% (p=0.007 n=25+25) RegexpMatchMedium_32-8 122ns ± 1% 121ns ± 0% -1.08% (p=0.000 n=25+18) RegexpMatchMedium_1K-8 36.1µs ± 1% 34.6µs ± 1% -4.02% (p=0.000 n=25+25) RegexpMatchHard_32-8 1.69µs ± 2% 1.65µs ± 1% -2.38% (p=0.000 n=25+25) RegexpMatchHard_1K-8 50.8µs ± 1% 49.4µs ± 1% -2.69% (p=0.000 n=25+24) Revcomp-8 453ms ± 2% 449ms ± 3% -0.74% (p=0.022 n=25+24) Template-8 63.2ms ± 2% 63.4ms ± 1% ~ (p=0.127 n=25+24) TimeParse-8 313ns ± 1% 315ns ± 3% ~ (p=0.924 n=24+25) TimeFormat-8 294ns ± 1% 292ns ± 2% -0.65% (p=0.004 n=23+24) [Geo mean] 49.9µs 49.6µs -0.65% name old speed new speed delta GobDecode-8 112MB/s ± 1% 110MB/s ± 4% -1.00% (p=0.036 n=24+24) GobEncode-8 130MB/s ± 1% 129MB/s ± 1% ~ (p=0.894 n=25+24) Gzip-8 72.7MB/s ± 2% 73.0MB/s ± 2% ~ (p=0.208 n=25+24) Gunzip-8 549MB/s ± 1% 545MB/s ± 1% -0.70% (p=0.000 n=25+25) JSONEncode-8 201MB/s ± 1% 198MB/s ± 3% -1.29% (p=0.000 n=25+25) JSONDecode-8 40.7MB/s ± 1% 39.8MB/s ± 5% -2.23% (p=0.000 n=25+25) GoParse-8 16.2MB/s ± 4% 16.3MB/s ± 1% ~ (p=0.211 n=25+24) RegexpMatchEasy0_32-8 354MB/s ± 1% 356MB/s ± 1% +0.47% (p=0.000 n=25+21) RegexpMatchEasy0_1K-8 5.00GB/s ± 0% 4.99GB/s ± 1% ~ (p=0.588 n=24+24) RegexpMatchEasy1_32-8 377MB/s ± 1% 378MB/s ± 1% +0.39% (p=0.000 n=25+25) RegexpMatchEasy1_1K-8 2.92GB/s ± 1% 2.94GB/s ± 3% +0.65% (p=0.008 n=25+25) RegexpMatchMedium_32-8 8.14MB/s ± 1% 8.22MB/s ± 1% +0.98% (p=0.000 n=25+24) RegexpMatchMedium_1K-8 28.4MB/s ± 1% 29.6MB/s ± 1% +4.19% (p=0.000 n=25+25) RegexpMatchHard_32-8 18.9MB/s ± 2% 19.4MB/s ± 1% +2.43% (p=0.000 n=25+25) RegexpMatchHard_1K-8 20.2MB/s ± 1% 20.7MB/s ± 1% +2.76% (p=0.000 n=25+24) Revcomp-8 561MB/s ± 2% 566MB/s ± 3% +0.75% (p=0.021 n=25+24) Template-8 30.7MB/s ± 2% 30.6MB/s ± 1% ~ (p=0.131 n=25+24) [Geo mean] 120MB/s 121MB/s +0.48% https://perf.golang.org/search?q=upload:20181004.6 Change-Id: I97f9fee34577961a116a8ddd445c6272253f0f95 Reviewed-on: https://go-review.googlesource.com/c/139837 Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
1 parent e508a5f commit 07e738e

File tree

2 files changed

+20
-119
lines changed

2 files changed

+20
-119
lines changed

src/runtime/malloc.go

+1-2
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,7 @@ const (
136136
_TinySize = 16
137137
_TinySizeClass = int8(2)
138138

139-
_FixAllocChunk = 16 << 10 // Chunk size for FixAlloc
140-
_MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
139+
_FixAllocChunk = 16 << 10 // Chunk size for FixAlloc
141140

142141
// Per-P, per order stack segment cache size.
143142
_StackCacheSize = 32 * 1024

src/runtime/mheap.go

+19-117
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,11 @@ const minPhysPageSize = 4096
3030
//go:notinheap
3131
type mheap struct {
3232
lock mutex
33-
free [_MaxMHeapList]mSpanList // free lists of given length up to _MaxMHeapList
34-
freelarge mTreap // free treap of length >= _MaxMHeapList
35-
busy [_MaxMHeapList]mSpanList // busy lists of large spans of given length
36-
busylarge mSpanList // busy lists of large spans length >= _MaxMHeapList
37-
sweepgen uint32 // sweep generation, see comment in mspan
38-
sweepdone uint32 // all spans are swept
39-
sweepers uint32 // number of active sweepone calls
33+
free mTreap // free treap of spans
34+
busy mSpanList // busy list of spans
35+
sweepgen uint32 // sweep generation, see comment in mspan
36+
sweepdone uint32 // all spans are swept
37+
sweepers uint32 // number of active sweepone calls
4038

4139
// allspans is a slice of all mspans ever created. Each mspan
4240
// appears exactly once.
@@ -599,12 +597,7 @@ func (h *mheap) init() {
599597
h.spanalloc.zero = false
600598

601599
// h->mapcache needs no init
602-
for i := range h.free {
603-
h.free[i].init()
604-
h.busy[i].init()
605-
}
606-
607-
h.busylarge.init()
600+
h.busy.init()
608601
for i := range h.central {
609602
h.central[i].mcentral.init(spanClass(i))
610603
}
@@ -647,30 +640,12 @@ retry:
647640
// Sweeps and reclaims at least npage pages into heap.
648641
// Called before allocating npage pages.
649642
func (h *mheap) reclaim(npage uintptr) {
650-
// First try to sweep busy spans with large objects of size >= npage,
651-
// this has good chances of reclaiming the necessary space.
652-
for i := int(npage); i < len(h.busy); i++ {
653-
if h.reclaimList(&h.busy[i], npage) != 0 {
654-
return // Bingo!
655-
}
656-
}
657-
658-
// Then -- even larger objects.
659-
if h.reclaimList(&h.busylarge, npage) != 0 {
643+
if h.reclaimList(&h.busy, npage) != 0 {
660644
return // Bingo!
661645
}
662646

663-
// Now try smaller objects.
664-
// One such object is not enough, so we need to reclaim several of them.
665-
reclaimed := uintptr(0)
666-
for i := 0; i < int(npage) && i < len(h.busy); i++ {
667-
reclaimed += h.reclaimList(&h.busy[i], npage-reclaimed)
668-
if reclaimed >= npage {
669-
return
670-
}
671-
}
672-
673647
// Now sweep everything that is not yet swept.
648+
var reclaimed uintptr
674649
unlock(&h.lock)
675650
for {
676651
n := sweepone()
@@ -752,11 +727,7 @@ func (h *mheap) alloc_m(npage uintptr, spanclass spanClass, large bool) *mspan {
752727
mheap_.nlargealloc++
753728
atomic.Xadd64(&memstats.heap_live, int64(npage<<_PageShift))
754729
// Swept spans are at the end of lists.
755-
if s.npages < uintptr(len(h.busy)) {
756-
h.busy[s.npages].insertBack(s)
757-
} else {
758-
h.busylarge.insertBack(s)
759-
}
730+
h.busy.insertBack(s)
760731
}
761732
}
762733
// heap_scan and heap_live were updated.
@@ -867,31 +838,20 @@ func (h *mheap) setSpans(base, npage uintptr, s *mspan) {
867838
// The returned span has been removed from the
868839
// free list, but its state is still mSpanFree.
869840
func (h *mheap) allocSpanLocked(npage uintptr, stat *uint64) *mspan {
870-
var list *mSpanList
871841
var s *mspan
872842

873-
// Try in fixed-size lists up to max.
874-
for i := int(npage); i < len(h.free); i++ {
875-
list = &h.free[i]
876-
if !list.isEmpty() {
877-
s = list.first
878-
list.remove(s)
879-
goto HaveSpan
880-
}
881-
}
882-
// Best fit in list of large spans.
883-
s = h.allocLarge(npage) // allocLarge removed s from h.freelarge for us
843+
// Best fit in the treap of spans.
844+
s = h.free.remove(npage)
884845
if s == nil {
885846
if !h.grow(npage) {
886847
return nil
887848
}
888-
s = h.allocLarge(npage)
849+
s = h.free.remove(npage)
889850
if s == nil {
890851
return nil
891852
}
892853
}
893854

894-
HaveSpan:
895855
// Mark span in use.
896856
if s.state != mSpanFree {
897857
throw("MHeap_AllocLocked - MSpan not free")
@@ -933,21 +893,6 @@ HaveSpan:
933893
return s
934894
}
935895

936-
// Large spans have a minimum size of 1MByte. The maximum number of large spans to support
937-
// 1TBytes is 1 million, experimentation using random sizes indicates that the depth of
938-
// the tree is less that 2x that of a perfectly balanced tree. For 1TByte can be referenced
939-
// by a perfectly balanced tree with a depth of 20. Twice that is an acceptable 40.
940-
func (h *mheap) isLargeSpan(npages uintptr) bool {
941-
return npages >= uintptr(len(h.free))
942-
}
943-
944-
// allocLarge allocates a span of at least npage pages from the treap of large spans.
945-
// Returns nil if no such span currently exists.
946-
func (h *mheap) allocLarge(npage uintptr) *mspan {
947-
// Search treap for smallest span with >= npage pages.
948-
return h.freelarge.remove(npage)
949-
}
950-
951896
// Try to add at least npage pages of memory to the heap,
952897
// returning whether it worked.
953898
//
@@ -1023,7 +968,7 @@ func (h *mheap) freeManual(s *mspan, stat *uint64) {
1023968
unlock(&h.lock)
1024969
}
1025970

1026-
// s must be on a busy list (h.busy or h.busylarge) or unlinked.
971+
// s must be on the busy list or unlinked.
1027972
func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince int64) {
1028973
switch s.state {
1029974
case mSpanManual:
@@ -1048,7 +993,7 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
1048993
}
1049994
s.state = mSpanFree
1050995
if s.inList() {
1051-
h.busyList(s.npages).remove(s)
996+
h.busy.remove(s)
1052997
}
1053998

1054999
// Stamp newly unused spans. The scavenger will use that
@@ -1069,12 +1014,7 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
10691014
h.setSpan(before.base(), s)
10701015
// The size is potentially changing so the treap needs to delete adjacent nodes and
10711016
// insert back as a combined node.
1072-
if h.isLargeSpan(before.npages) {
1073-
// We have a t, it is large so it has to be in the treap so we can remove it.
1074-
h.freelarge.removeSpan(before)
1075-
} else {
1076-
h.freeList(before.npages).remove(before)
1077-
}
1017+
h.free.removeSpan(before)
10781018
before.state = mSpanDead
10791019
h.spanalloc.free(unsafe.Pointer(before))
10801020
}
@@ -1085,32 +1025,13 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
10851025
s.npreleased += after.npreleased
10861026
s.needzero |= after.needzero
10871027
h.setSpan(s.base()+s.npages*pageSize-1, s)
1088-
if h.isLargeSpan(after.npages) {
1089-
h.freelarge.removeSpan(after)
1090-
} else {
1091-
h.freeList(after.npages).remove(after)
1092-
}
1028+
h.free.removeSpan(after)
10931029
after.state = mSpanDead
10941030
h.spanalloc.free(unsafe.Pointer(after))
10951031
}
10961032

1097-
// Insert s into appropriate list or treap.
1098-
if h.isLargeSpan(s.npages) {
1099-
h.freelarge.insert(s)
1100-
} else {
1101-
h.freeList(s.npages).insert(s)
1102-
}
1103-
}
1104-
1105-
func (h *mheap) freeList(npages uintptr) *mSpanList {
1106-
return &h.free[npages]
1107-
}
1108-
1109-
func (h *mheap) busyList(npages uintptr) *mSpanList {
1110-
if npages < uintptr(len(h.busy)) {
1111-
return &h.busy[npages]
1112-
}
1113-
return &h.busylarge
1033+
// Insert s into the free treap.
1034+
h.free.insert(s)
11141035
}
11151036

11161037
func scavengeTreapNode(t *treapNode, now, limit uint64) uintptr {
@@ -1123,33 +1044,14 @@ func scavengeTreapNode(t *treapNode, now, limit uint64) uintptr {
11231044
return 0
11241045
}
11251046

1126-
func scavengelist(list *mSpanList, now, limit uint64) uintptr {
1127-
if list.isEmpty() {
1128-
return 0
1129-
}
1130-
1131-
var sumreleased uintptr
1132-
for s := list.first; s != nil; s = s.next {
1133-
if (now-uint64(s.unusedsince)) <= limit || s.npreleased == s.npages {
1134-
continue
1135-
}
1136-
sumreleased += s.scavenge()
1137-
}
1138-
return sumreleased
1139-
}
1140-
11411047
func (h *mheap) scavenge(k int32, now, limit uint64) {
11421048
// Disallow malloc or panic while holding the heap lock. We do
11431049
// this here because this is an non-mallocgc entry-point to
11441050
// the mheap API.
11451051
gp := getg()
11461052
gp.m.mallocing++
11471053
lock(&h.lock)
1148-
var sumreleased uintptr
1149-
for i := 0; i < len(h.free); i++ {
1150-
sumreleased += scavengelist(&h.free[i], now, limit)
1151-
}
1152-
sumreleased += scavengetreap(h.freelarge.treap, now, limit)
1054+
sumreleased := scavengetreap(h.free.treap, now, limit)
11531055
unlock(&h.lock)
11541056
gp.m.mallocing--
11551057

0 commit comments

Comments
 (0)