Skip to content

Commit 95e6a9f

Browse files
committed
cmd/trace: pprof computation for span types
/spanio, /spanblock, /spansched, /spansyscall provide the pprof-style summary of span execution's io, block, scheduling, syscall latency distributions respectively. The computation logic for /io, /block, /sched, /syscall analysis was refactored and extended for reuse in these new types of analysis. Upon the analysis query, we create a map of goroutine id to time intervals based on the query parameter, that represents the interesting time intervals of matching goroutines. Only the events from the matching goroutines that fall into the intervals are considered in the pprof computation. The new endpoints are not yet hooked into other span analysis page (e.g. /userspan) yet. Change-Id: I80c3396e45a2d6631758710de67d132e5832c7ce Reviewed-on: https://go-review.googlesource.com/105822 Reviewed-by: Heschi Kreinick <heschi@google.com> Run-TryBot: Heschi Kreinick <heschi@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
1 parent 8bb8eaf commit 95e6a9f

File tree

2 files changed

+166
-88
lines changed

2 files changed

+166
-88
lines changed

src/cmd/trace/main.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,19 +83,19 @@ func main() {
8383
flag.Usage()
8484
}
8585

86-
var pprofFunc func(io.Writer, string) error
86+
var pprofFunc func(io.Writer, *http.Request) error
8787
switch *pprofFlag {
8888
case "net":
89-
pprofFunc = pprofIO
89+
pprofFunc = pprofByGoroutine(computePprofIO)
9090
case "sync":
91-
pprofFunc = pprofBlock
91+
pprofFunc = pprofByGoroutine(computePprofBlock)
9292
case "syscall":
93-
pprofFunc = pprofSyscall
93+
pprofFunc = pprofByGoroutine(computePprofSyscall)
9494
case "sched":
95-
pprofFunc = pprofSched
95+
pprofFunc = pprofByGoroutine(computePprofSched)
9696
}
9797
if pprofFunc != nil {
98-
if err := pprofFunc(os.Stdout, ""); err != nil {
98+
if err := pprofFunc(os.Stdout, &http.Request{}); err != nil {
9999
dief("failed to generate pprof: %v\n", err)
100100
}
101101
os.Exit(0)

src/cmd/trace/pprof.go

Lines changed: 160 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ import (
1717
"os/exec"
1818
"path/filepath"
1919
"runtime"
20+
"sort"
2021
"strconv"
22+
"time"
2123

2224
"github.com/google/pprof/profile"
2325
)
@@ -35,10 +37,15 @@ func goCmd() string {
3537
}
3638

3739
func init() {
38-
http.HandleFunc("/io", serveSVGProfile(pprofIO))
39-
http.HandleFunc("/block", serveSVGProfile(pprofBlock))
40-
http.HandleFunc("/syscall", serveSVGProfile(pprofSyscall))
41-
http.HandleFunc("/sched", serveSVGProfile(pprofSched))
40+
http.HandleFunc("/io", serveSVGProfile(pprofByGoroutine(computePprofIO)))
41+
http.HandleFunc("/block", serveSVGProfile(pprofByGoroutine(computePprofBlock)))
42+
http.HandleFunc("/syscall", serveSVGProfile(pprofByGoroutine(computePprofSyscall)))
43+
http.HandleFunc("/sched", serveSVGProfile(pprofByGoroutine(computePprofSched)))
44+
45+
http.HandleFunc("/spanio", serveSVGProfile(pprofBySpan(computePprofIO)))
46+
http.HandleFunc("/spanblock", serveSVGProfile(pprofBySpan(computePprofBlock)))
47+
http.HandleFunc("/spansyscall", serveSVGProfile(pprofBySpan(computePprofSyscall)))
48+
http.HandleFunc("/spansched", serveSVGProfile(pprofBySpan(computePprofSched)))
4249
}
4350

4451
// Record represents one entry in pprof-like profiles.
@@ -48,10 +55,46 @@ type Record struct {
4855
time int64
4956
}
5057

58+
// interval represents a time interval in the trace.
59+
type interval struct {
60+
begin, end int64 // nanoseconds.
61+
}
62+
63+
func pprofByGoroutine(compute func(io.Writer, map[uint64][]interval, []*trace.Event) error) func(w io.Writer, r *http.Request) error {
64+
return func(w io.Writer, r *http.Request) error {
65+
id := r.FormValue("id")
66+
events, err := parseEvents()
67+
if err != nil {
68+
return err
69+
}
70+
gToIntervals, err := pprofMatchingGoroutines(id, events)
71+
if err != nil {
72+
return err
73+
}
74+
return compute(w, gToIntervals, events)
75+
}
76+
}
77+
78+
func pprofBySpan(compute func(io.Writer, map[uint64][]interval, []*trace.Event) error) func(w io.Writer, r *http.Request) error {
79+
return func(w io.Writer, r *http.Request) error {
80+
filter, err := newSpanFilter(r)
81+
if err != nil {
82+
return err
83+
}
84+
gToIntervals, err := pprofMatchingSpans(filter)
85+
if err != nil {
86+
return err
87+
}
88+
events, _ := parseEvents()
89+
90+
return compute(w, gToIntervals, events)
91+
}
92+
}
93+
5194
// pprofMatchingGoroutines parses the goroutine type id string (i.e. pc)
52-
// and returns the ids of goroutines of the matching type.
95+
// and returns the ids of goroutines of the matching type and its interval.
5396
// If the id string is empty, returns nil without an error.
54-
func pprofMatchingGoroutines(id string, events []*trace.Event) (map[uint64]bool, error) {
97+
func pprofMatchingGoroutines(id string, events []*trace.Event) (map[uint64][]interval, error) {
5598
if id == "" {
5699
return nil, nil
57100
}
@@ -60,62 +103,95 @@ func pprofMatchingGoroutines(id string, events []*trace.Event) (map[uint64]bool,
60103
return nil, fmt.Errorf("invalid goroutine type: %v", id)
61104
}
62105
analyzeGoroutines(events)
63-
var res map[uint64]bool
106+
var res map[uint64][]interval
64107
for _, g := range gs {
65108
if g.PC != pc {
66109
continue
67110
}
68111
if res == nil {
69-
res = make(map[uint64]bool)
112+
res = make(map[uint64][]interval)
113+
}
114+
endTime := g.EndTime
115+
if g.EndTime == 0 {
116+
endTime = lastTimestamp() // the trace doesn't include the goroutine end event. Use the trace end time.
70117
}
71-
res[g.ID] = true
118+
res[g.ID] = []interval{{begin: g.StartTime, end: endTime}}
72119
}
73120
if len(res) == 0 && id != "" {
74121
return nil, fmt.Errorf("failed to find matching goroutines for id: %s", id)
75122
}
76123
return res, nil
77124
}
78125

79-
// pprofIO generates IO pprof-like profile (time spent in IO wait,
80-
// currently only network blocking event).
81-
func pprofIO(w io.Writer, id string) error {
82-
events, err := parseEvents()
126+
// pprofMatchingSpans returns the time intervals of matching spans
127+
// grouped by the goroutine id. If the filter is nil, returns nil without an error.
128+
func pprofMatchingSpans(filter *spanFilter) (map[uint64][]interval, error) {
129+
res, err := analyzeAnnotations()
83130
if err != nil {
84-
return err
131+
return nil, err
85132
}
86-
goroutines, err := pprofMatchingGoroutines(id, events)
87-
if err != nil {
88-
return err
133+
if filter == nil {
134+
return nil, nil
135+
}
136+
137+
gToIntervals := make(map[uint64][]interval)
138+
for id, spans := range res.spans {
139+
for _, s := range spans {
140+
if filter.match(id, s) {
141+
gToIntervals[s.G] = append(gToIntervals[s.G], interval{begin: s.firstTimestamp(), end: s.lastTimestamp()})
142+
}
143+
}
144+
}
145+
146+
for g, intervals := range gToIntervals {
147+
// in order to remove nested spans and
148+
// consider only the outermost spans,
149+
// first, we sort based on the start time
150+
// and then scan through to select only the outermost spans.
151+
sort.Slice(intervals, func(i, j int) bool {
152+
x := intervals[i].begin
153+
y := intervals[j].begin
154+
if x == y {
155+
return intervals[i].end < intervals[j].end
156+
}
157+
return x < y
158+
})
159+
var lastTimestamp int64
160+
var n int
161+
// select only the outermost spans.
162+
for _, i := range intervals {
163+
if lastTimestamp <= i.begin {
164+
intervals[n] = i // new non-overlapping span starts.
165+
lastTimestamp = i.end
166+
n++
167+
} // otherwise, skip because this span overlaps with a previous span.
168+
}
169+
gToIntervals[g] = intervals[:n]
89170
}
171+
return gToIntervals, nil
172+
}
90173

174+
// computePprofIO generates IO pprof-like profile (time spent in IO wait, currently only network blocking event).
175+
func computePprofIO(w io.Writer, gToIntervals map[uint64][]interval, events []*trace.Event) error {
91176
prof := make(map[uint64]Record)
92177
for _, ev := range events {
93178
if ev.Type != trace.EvGoBlockNet || ev.Link == nil || ev.StkID == 0 || len(ev.Stk) == 0 {
94179
continue
95180
}
96-
if goroutines != nil && !goroutines[ev.G] {
97-
continue
181+
overlapping := pprofOverlappingDuration(gToIntervals, ev)
182+
if overlapping > 0 {
183+
rec := prof[ev.StkID]
184+
rec.stk = ev.Stk
185+
rec.n++
186+
rec.time += overlapping.Nanoseconds()
187+
prof[ev.StkID] = rec
98188
}
99-
rec := prof[ev.StkID]
100-
rec.stk = ev.Stk
101-
rec.n++
102-
rec.time += ev.Link.Ts - ev.Ts
103-
prof[ev.StkID] = rec
104189
}
105190
return buildProfile(prof).Write(w)
106191
}
107192

108-
// pprofBlock generates blocking pprof-like profile (time spent blocked on synchronization primitives).
109-
func pprofBlock(w io.Writer, id string) error {
110-
events, err := parseEvents()
111-
if err != nil {
112-
return err
113-
}
114-
goroutines, err := pprofMatchingGoroutines(id, events)
115-
if err != nil {
116-
return err
117-
}
118-
193+
// computePprofBlock generates blocking pprof-like profile (time spent blocked on synchronization primitives).
194+
func computePprofBlock(w io.Writer, gToIntervals map[uint64][]interval, events []*trace.Event) error {
119195
prof := make(map[uint64]Record)
120196
for _, ev := range events {
121197
switch ev.Type {
@@ -130,84 +206,86 @@ func pprofBlock(w io.Writer, id string) error {
130206
if ev.Link == nil || ev.StkID == 0 || len(ev.Stk) == 0 {
131207
continue
132208
}
133-
if goroutines != nil && !goroutines[ev.G] {
134-
continue
209+
overlapping := pprofOverlappingDuration(gToIntervals, ev)
210+
if overlapping > 0 {
211+
rec := prof[ev.StkID]
212+
rec.stk = ev.Stk
213+
rec.n++
214+
rec.time += overlapping.Nanoseconds()
215+
prof[ev.StkID] = rec
135216
}
136-
rec := prof[ev.StkID]
137-
rec.stk = ev.Stk
138-
rec.n++
139-
rec.time += ev.Link.Ts - ev.Ts
140-
prof[ev.StkID] = rec
141217
}
142218
return buildProfile(prof).Write(w)
143219
}
144220

145-
// pprofSyscall generates syscall pprof-like profile (time spent blocked in syscalls).
146-
func pprofSyscall(w io.Writer, id string) error {
147-
148-
events, err := parseEvents()
149-
if err != nil {
150-
return err
151-
}
152-
goroutines, err := pprofMatchingGoroutines(id, events)
153-
if err != nil {
154-
return err
155-
}
156-
221+
// computePprofSyscall generates syscall pprof-like profile (time spent blocked in syscalls).
222+
func computePprofSyscall(w io.Writer, gToIntervals map[uint64][]interval, events []*trace.Event) error {
157223
prof := make(map[uint64]Record)
158224
for _, ev := range events {
159225
if ev.Type != trace.EvGoSysCall || ev.Link == nil || ev.StkID == 0 || len(ev.Stk) == 0 {
160226
continue
161227
}
162-
if goroutines != nil && !goroutines[ev.G] {
163-
continue
228+
overlapping := pprofOverlappingDuration(gToIntervals, ev)
229+
if overlapping > 0 {
230+
rec := prof[ev.StkID]
231+
rec.stk = ev.Stk
232+
rec.n++
233+
rec.time += overlapping.Nanoseconds()
234+
prof[ev.StkID] = rec
164235
}
165-
rec := prof[ev.StkID]
166-
rec.stk = ev.Stk
167-
rec.n++
168-
rec.time += ev.Link.Ts - ev.Ts
169-
prof[ev.StkID] = rec
170236
}
171237
return buildProfile(prof).Write(w)
172238
}
173239

174-
// pprofSched generates scheduler latency pprof-like profile
240+
// computePprofSched generates scheduler latency pprof-like profile
175241
// (time between a goroutine become runnable and actually scheduled for execution).
176-
func pprofSched(w io.Writer, id string) error {
177-
events, err := parseEvents()
178-
if err != nil {
179-
return err
180-
}
181-
goroutines, err := pprofMatchingGoroutines(id, events)
182-
if err != nil {
183-
return err
184-
}
185-
242+
func computePprofSched(w io.Writer, gToIntervals map[uint64][]interval, events []*trace.Event) error {
186243
prof := make(map[uint64]Record)
187244
for _, ev := range events {
188245
if (ev.Type != trace.EvGoUnblock && ev.Type != trace.EvGoCreate) ||
189246
ev.Link == nil || ev.StkID == 0 || len(ev.Stk) == 0 {
190247
continue
191248
}
192-
if goroutines != nil && !goroutines[ev.G] {
193-
continue
249+
overlapping := pprofOverlappingDuration(gToIntervals, ev)
250+
if overlapping > 0 {
251+
rec := prof[ev.StkID]
252+
rec.stk = ev.Stk
253+
rec.n++
254+
rec.time += overlapping.Nanoseconds()
255+
prof[ev.StkID] = rec
194256
}
195-
rec := prof[ev.StkID]
196-
rec.stk = ev.Stk
197-
rec.n++
198-
rec.time += ev.Link.Ts - ev.Ts
199-
prof[ev.StkID] = rec
200257
}
201258
return buildProfile(prof).Write(w)
202259
}
203260

261+
// pprofOverlappingDuration returns the overlapping duration between
262+
// the time intervals in gToIntervals and the specified event.
263+
// If gToIntervals is nil, this simply returns the event's duration.
264+
func pprofOverlappingDuration(gToIntervals map[uint64][]interval, ev *trace.Event) time.Duration {
265+
if gToIntervals == nil { // No filtering.
266+
return time.Duration(ev.Link.Ts-ev.Ts) * time.Nanosecond
267+
}
268+
intervals := gToIntervals[ev.G]
269+
if len(intervals) == 0 {
270+
return 0
271+
}
272+
273+
var overlapping time.Duration
274+
for _, i := range intervals {
275+
if o := overlappingDuration(i.begin, i.end, ev.Ts, ev.Link.Ts); o > 0 {
276+
overlapping += o
277+
}
278+
}
279+
return overlapping
280+
}
281+
204282
// serveSVGProfile serves pprof-like profile generated by prof as svg.
205-
func serveSVGProfile(prof func(w io.Writer, id string) error) http.HandlerFunc {
283+
func serveSVGProfile(prof func(w io.Writer, r *http.Request) error) http.HandlerFunc {
206284
return func(w http.ResponseWriter, r *http.Request) {
207285

208286
if r.FormValue("raw") != "" {
209287
w.Header().Set("Content-Type", "application/octet-stream")
210-
if err := prof(w, r.FormValue("id")); err != nil {
288+
if err := prof(w, r); err != nil {
211289
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
212290
w.Header().Set("X-Go-Pprof", "1")
213291
http.Error(w, fmt.Sprintf("failed to get profile: %v", err), http.StatusInternalServerError)
@@ -226,7 +304,7 @@ func serveSVGProfile(prof func(w io.Writer, id string) error) http.HandlerFunc {
226304
os.Remove(blockf.Name())
227305
}()
228306
blockb := bufio.NewWriter(blockf)
229-
if err := prof(blockb, r.FormValue("id")); err != nil {
307+
if err := prof(blockb, r); err != nil {
230308
http.Error(w, fmt.Sprintf("failed to generate profile: %v", err), http.StatusInternalServerError)
231309
return
232310
}

0 commit comments

Comments
 (0)