wip: testwrapper trace work in progress

Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
This commit is contained in:
Tom Proctor 2026-05-21 08:52:21 +00:00
parent 5295e3e119
commit 59695d555e
3 changed files with 514 additions and 0 deletions

View File

@ -61,6 +61,10 @@ func splitArgs(args []string) (pre, pkgs, post []string, _ error) {
return nil, nil, nil, err
}
fs.Visit(func(f *flag.Flag) {
if f.Name == "testtrace" {
// Consumed by testwrapper itself; not a go test flag.
return
}
if f.Name == "cachelink" && !cacheLink.enabled {
return
}
@ -114,6 +118,16 @@ func (c *cacheLinkVal) Set(s string) error {
func (*cacheLinkVal) IsBoolFlag() bool { return true }
// testTraceFile is the path passed via -testtrace=PATH. If non-empty,
// testwrapper writes a Chrome Trace Event Format file (viewable in
// chrome://tracing or https://ui.perfetto.dev) capturing per-test and
// per-package spans for the entire run, including retries.
//
// The flag is deliberately named -testtrace (rather than -trace or -tracefile)
// to avoid colliding with "go test -trace=FILE", which writes a Go runtime
// execution trace.
var testTraceFile string
func newTestFlagSet() *flag.FlagSet {
fs := flag.NewFlagSet("testwrapper", flag.ContinueOnError)
fs.SetOutput(io.Discard)
@ -127,6 +141,7 @@ func newTestFlagSet() *flag.FlagSet {
fs.String("vet", "", "vet checks to run, or 'off' or 'all'")
fs.Var(&cacheLink, "cachelink", "Go -cachelink value (bool); or 'auto' to enable if GOCACHEPROG is set")
fs.StringVar(&testTraceFile, "testtrace", "", "if non-empty, write a Chrome Trace Event Format file of test/package timings to this path (distinct from go test -trace, which writes a runtime execution trace)")
return fs
}

View File

@ -286,6 +286,17 @@ func main() {
packages = strings.Split(strings.TrimSpace(string(out)), "\n")
}
var tw *traceWriter
if testTraceFile != "" {
var err error
tw, err = newTraceWriter(testTraceFile)
if err != nil {
log.Fatalf("opening trace file: %v", err)
}
tw.SetRunArgs(os.Args[1:])
defer tw.Close()
}
ctx := context.Background()
type nextRun struct {
tests []*packageTests
@ -331,6 +342,7 @@ type nextRun struct {
if thisRun.attempt > maxAttempts {
fmt.Println("max attempts reached")
tw.Close()
os.Exit(1)
}
if thisRun.attempt > 1 {
@ -358,6 +370,7 @@ type nextRun struct {
tr.pkg = packages[0]
}
if tr.pkgFinished {
tw.emitPackage(tr, thisRun.attempt)
if tr.raceDetected {
// A data race is never something we want to
// paper over by retrying flaky tests in the
@ -382,6 +395,7 @@ type nextRun struct {
printPkgOutcome(tr.pkg, tr.outcome, tr.cached, thisRun.attempt, tr.end.Sub(tr.start))
continue
}
tw.emitTest(tr, thisRun.attempt)
if testingVerbose || tr.outcome == "fail" {
io.Copy(os.Stdout, &tr.logs)
}
@ -409,6 +423,7 @@ type nextRun struct {
fmt.Printf("non-flakytest failures: %s\n", j)
}
fmt.Println()
tw.Close()
os.Exit(1)
}
@ -417,10 +432,12 @@ type nextRun struct {
if err := <-runErr; len(toRetry) == 0 && err != nil {
if exit, ok := errors.AsType[*exec.ExitError](err); ok {
if code := exit.ExitCode(); code > -1 {
tw.Close()
os.Exit(exit.ExitCode())
}
}
log.Printf("testwrapper: %s", err)
tw.Close()
os.Exit(1)
}
}

482
cmd/testwrapper/trace.go Normal file
View File

@ -0,0 +1,482 @@
// Copyright (c) Tailscale Inc & contributors
// SPDX-License-Identifier: BSD-3-Clause
package main
import (
"encoding/json"
"os"
"sort"
"time"
)
// traceWriter emits the Chrome Trace Event Format ("JSON Object Format") to a
// file viewable in https://ui.perfetto.dev or chrome://tracing. Format spec:
// https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU
//
// Each Go package becomes a process ("pid") in the trace. Within a package,
// tid 0 carries the package-level summary span; tids >= 1 are lanes for
// concurrently-running tests, packed greedily so that t.Parallel tests render
// as parallel swim lanes instead of overlapping bars.
//
// Events are buffered in memory and written when Close is called. A whole-
// monorepo trace is well under 1 MiB so this is fine in practice.
type traceWriter struct {
f *os.File
events []traceEvent
runPid int // pid reserved for the orchestration process
runArgs []string // testwrapper command-line args, recorded on the run span
pids map[string]int // package → pid (allocated >= runPid+1)
lanes map[int][]time.Time // pid → lane index → lane's last end time
}
// runPidValue is reserved as the pid of the synthetic "test run" process
// that holds the orchestration span and the global counters. Package pids
// are allocated starting at runPidValue+1.
const runPidValue = 1
func newTraceWriter(path string) (*traceWriter, error) {
f, err := os.Create(path)
if err != nil {
return nil, err
}
return &traceWriter{
f: f,
runPid: runPidValue,
pids: map[string]int{},
lanes: map[int][]time.Time{},
}, nil
}
// SetRunArgs records the command-line args used to invoke testwrapper. They
// are surfaced as the "command_line" arg on the orchestration span so a
// reader can drill into how the run was kicked off.
func (w *traceWriter) SetRunArgs(args []string) {
if w == nil {
return
}
w.runArgs = args
}
type traceEvent struct {
Name string `json:"name"` // display name of the event (spec: Common Event Fields)
Cat string `json:"cat,omitempty"` // comma-separated categories used by the UI for filtering (spec: Common Event Fields)
Ph string `json:"ph"` // phase / event type: "X" complete, "M" metadata, "C" counter (spec: Event Descriptions)
Ts int64 `json:"ts,omitempty"` // tracing-clock timestamp in microseconds (spec: Common Event Fields)
Dur int64 `json:"dur,omitempty"` // duration in microseconds, for "X" complete events only (spec: Complete Events)
Pid int `json:"pid"` // process ID the event belongs to; rendered as a top-level row (spec: Common Event Fields)
Tid int `json:"tid"` // thread ID within Pid; rendered as a sub-lane under the process (spec: Common Event Fields)
Cname string `json:"cname,omitempty"` // optional color from the reserved Catapult/Perfetto palette (spec: Specifying Colors)
Args map[string]any `json:"args,omitempty"` // arbitrary key/value metadata shown when the event is selected (spec: Common Event Fields)
}
// cnameForOutcome maps a test outcome to a Perfetto/Catapult palette color so
// the trace renders pass/fail/skip in a fixed three-tone scheme instead of
// hashing the test name into a different color per slice.
func cnameForOutcome(outcome string) string {
switch outcome {
case "pass":
return "good"
case "fail":
return "terrible"
case "skip":
return "grey"
}
return ""
}
func (w *traceWriter) write(ev traceEvent) {
if w == nil || w.f == nil {
return
}
w.events = append(w.events, ev)
}
// pidFor returns a stable pid for pkg, emitting metadata events for the
// process name and tid 0 (the package summary lane) the first time pkg is
// seen.
//
// Thread names are chosen so that Perfetto's "<thread_name> <tid>" display
// formula reads naturally: tid 0 (the package's go test invocation) renders
// as "go test 0 (main thread)", and parallel test lanes render as "lane 1",
// "lane 2", and so on.
func (w *traceWriter) pidFor(pkg string) int {
if pid, ok := w.pids[pkg]; ok {
return pid
}
pid := w.runPid + 1 + len(w.pids)
w.pids[pkg] = pid
w.write(traceEvent{
Name: "process_name", Ph: "M", Pid: pid, Tid: 0,
Args: map[string]any{"name": pkg},
})
// Sort packages by their pid, which equals their order of first
// emission, which (since testwrapper runs packages sequentially) equals
// their chronological start order. The result is a top-down waterfall.
w.write(traceEvent{
Name: "process_sort_index", Ph: "M", Pid: pid, Tid: 0,
Args: map[string]any{"sort_index": pid},
})
w.write(traceEvent{
Name: "thread_name", Ph: "M", Pid: pid, Tid: 0,
Args: map[string]any{"name": "go test"},
})
return pid
}
// laneFor finds (or allocates) a lane within pid whose most-recent test
// finished at or before start, then updates that lane's end time. The
// returned tid is >= 1; tid 0 is reserved for the package summary span.
func (w *traceWriter) laneFor(pid int, start, end time.Time) int {
lanes := w.lanes[pid]
for i, laneEnd := range lanes {
if !laneEnd.After(start) {
lanes[i] = end
w.lanes[pid] = lanes
return i + 1
}
}
tid := len(lanes) + 1
w.lanes[pid] = append(lanes, end)
w.write(traceEvent{
Name: "thread_name", Ph: "M", Pid: pid, Tid: tid,
Args: map[string]any{"name": "lane"},
})
return tid
}
// emitTest records a span for a single test attempt.
func (w *traceWriter) emitTest(tr *testAttempt, attempt int) {
if w == nil || tr.start.IsZero() || tr.end.IsZero() {
return
}
pid := w.pidFor(tr.pkg)
tid := w.laneFor(pid, tr.start, tr.end)
args := map[string]any{
"outcome": tr.outcome,
"attempt": attempt,
}
if tr.isMarkedFlaky {
args["flaky"] = true
if tr.issueURL != "" {
args["issueURL"] = tr.issueURL
}
}
w.write(traceEvent{
Name: tr.testName,
Cat: tr.outcome,
Ph: "X",
Ts: tr.start.UnixMicro(),
Dur: tr.end.Sub(tr.start).Microseconds(),
Pid: pid,
Tid: tid,
Cname: cnameForOutcome(tr.outcome),
Args: args,
})
}
// emitPackage records the package-level summary span on tid 0.
func (w *traceWriter) emitPackage(tr *testAttempt, attempt int) {
if w == nil || tr.pkg == "" || tr.start.IsZero() || tr.end.IsZero() {
return
}
pid := w.pidFor(tr.pkg)
w.write(traceEvent{
Name: tr.pkg,
Cat: tr.outcome,
Ph: "X",
Ts: tr.start.UnixMicro(),
Dur: tr.end.Sub(tr.start).Microseconds(),
Pid: pid,
Tid: 0,
Cname: cnameForOutcome(tr.outcome),
Args: map[string]any{
"outcome": tr.outcome,
"attempt": attempt,
"cached": tr.cached,
},
})
}
// Close marshals all buffered events to the file. It is safe to call on a
// nil receiver and idempotent.
func (w *traceWriter) Close() error {
if w == nil || w.f == nil {
return nil
}
f := w.f
w.f = nil
defer f.Close()
w.appendBuildSpans()
w.appendOrchestration()
return json.NewEncoder(f).Encode(struct {
TraceEvents []traceEvent `json:"traceEvents"`
}{w.events})
}
// appendBuildSpans synthesizes a "build" span on tid 0 of each package,
// covering the interval between when go test starts (the package summary
// span's start) and when the first test in that package begins running.
//
// testwrapper invokes "go test" which compiles and runs in one shot, so we
// cannot observe the compile step directly; this difference is the closest
// proxy and includes compile time, test binary startup, and any TestMain
// setup before the first t.Run.
func (w *traceWriter) appendBuildSpans() {
type pkgTimes struct {
pkgStart int64
firstTest int64
havePkg bool
haveTest bool
}
by := map[int]*pkgTimes{}
for _, ev := range w.events {
if ev.Ph != "X" {
continue
}
pt := by[ev.Pid]
if pt == nil {
pt = &pkgTimes{}
by[ev.Pid] = pt
}
if ev.Tid == 0 {
pt.pkgStart = ev.Ts
pt.havePkg = true
continue
}
if !pt.haveTest || ev.Ts < pt.firstTest {
pt.firstTest = ev.Ts
pt.haveTest = true
}
}
for pid, pt := range by {
if !pt.havePkg || !pt.haveTest || pt.firstTest <= pt.pkgStart {
continue
}
w.events = append(w.events, traceEvent{
Name: "build",
Cat: "build",
Ph: "X",
Ts: pt.pkgStart,
Dur: pt.firstTest - pt.pkgStart,
Pid: pid,
Tid: 0,
Cname: "olive",
})
}
}
// appendOrchestration synthesizes the top-level "test run" process: an
// overall span covering the full duration of the run (with the testwrapper
// command line in args), a running_tests counter showing live concurrency,
// and a test_results counter showing cumulative final-outcome counts. All
// of these live on a single process pinned to the top of the trace.
//
// Final outcome per test is the highest-attempt span; a final pass that
// took more than one attempt is reclassified as "flaky" so the same test
// never contributes to two counters.
func (w *traceWriter) appendOrchestration() {
type testKey struct {
pid int
name string
}
type spanInfo struct {
start, end int64
outcome string
attempt int
}
byTest := map[testKey][]spanInfo{}
var minTs, maxTs int64
have := false
for _, ev := range w.events {
if ev.Ph != "X" {
continue
}
end := ev.Ts + ev.Dur
if !have || ev.Ts < minTs {
minTs = ev.Ts
}
if !have || end > maxTs {
maxTs = end
}
have = true
if ev.Tid < 1 || ev.Cat == "build" {
continue // counters only track test spans
}
outcome, _ := ev.Args["outcome"].(string)
attempt, _ := ev.Args["attempt"].(int)
k := testKey{ev.Pid, ev.Name}
byTest[k] = append(byTest[k], spanInfo{ev.Ts, end, outcome, attempt})
}
if !have {
return
}
// running transitions (every test span contributes ±1).
type runT struct {
t int64
delta int
}
var runs []runT
for _, spans := range byTest {
for _, s := range spans {
runs = append(runs, runT{s.start, +1}, runT{s.end, -1})
}
}
sort.Slice(runs, func(i, j int) bool { return runs[i].t < runs[j].t })
// Per-package final-state events. Caching is a package-level concept in
// go test, so the counter answers "of the packages we asked for, how
// many ran, how many were cache hits, and how many had no test files?"
// "no_tests" matches go test's "? pkg [no test files]" output and is
// distinct from the test-level "skipped" (a test that called t.Skip()).
type pkgEnd struct {
t int64
kind string // "tested" | "cached" | "no_tests"
}
var pkgEnds []pkgEnd
var pkgsTested, pkgsCached, pkgsNoTests int
for _, ev := range w.events {
if ev.Ph != "X" || ev.Tid != 0 || ev.Cat == "build" {
continue // only package summary spans
}
cached, _ := ev.Args["cached"].(bool)
outcome, _ := ev.Args["outcome"].(string)
var kind string
switch {
case cached:
kind = "cached"
pkgsCached++
case outcome == "skip":
kind = "no_tests"
pkgsNoTests++
default:
kind = "tested"
pkgsTested++
}
pkgEnds = append(pkgEnds, pkgEnd{ev.Ts + ev.Dur, kind})
}
sort.Slice(pkgEnds, func(i, j int) bool { return pkgEnds[i].t < pkgEnds[j].t })
// Final-outcome events (one per test, at its final attempt's end).
type finalT struct {
t int64
kind string // "pass" | "fail" | "skip" | "flaky"
}
var finals []finalT
var passed, failed, flaky, skipped int
for _, spans := range byTest {
final := spans[0]
for _, s := range spans[1:] {
if s.attempt > final.attempt {
final = s
}
}
kind := final.outcome
if kind == "pass" && len(spans) > 1 {
kind = "flaky"
}
switch kind {
case "pass":
passed++
case "fail":
failed++
case "flaky":
flaky++
case "skip":
skipped++
}
finals = append(finals, finalT{final.end, kind})
}
sort.Slice(finals, func(i, j int) bool { return finals[i].t < finals[j].t })
pid := w.runPid
w.events = append(w.events,
traceEvent{Name: "process_name", Ph: "M", Pid: pid, Tid: 0, Args: map[string]any{"name": "test run"}},
traceEvent{Name: "process_sort_index", Ph: "M", Pid: pid, Tid: 0, Args: map[string]any{"sort_index": -1}},
traceEvent{Name: "thread_name", Ph: "M", Pid: pid, Tid: 0, Args: map[string]any{"name": "run"}},
)
runArgs := map[string]any{
"tests_passed": passed,
"tests_failed": failed,
"tests_flaky": flaky,
"tests_skipped": skipped,
"packages_tested": pkgsTested,
"packages_cached": pkgsCached,
"packages_no_tests": pkgsNoTests,
}
if len(w.runArgs) > 0 {
runArgs["command_line"] = w.runArgs
}
w.events = append(w.events, traceEvent{
Name: "test run",
Cat: "run",
Ph: "X",
Ts: minTs,
Dur: maxTs - minTs,
Pid: pid,
Tid: 0,
Args: runArgs,
})
// Merge the three timelines and emit a counter sample at every
// timestamp where any counter changed.
running, p, f, fl, sk := 0, 0, 0, 0, 0
pt, pc, pn := 0, 0, 0
ri, fi, pi := 0, 0, 0
for ri < len(runs) || fi < len(finals) || pi < len(pkgEnds) {
t := int64(0)
first := true
if ri < len(runs) {
t = runs[ri].t
first = false
}
if fi < len(finals) {
if first || finals[fi].t < t {
t = finals[fi].t
}
first = false
}
if pi < len(pkgEnds) {
if first || pkgEnds[pi].t < t {
t = pkgEnds[pi].t
}
}
for ri < len(runs) && runs[ri].t == t {
running += runs[ri].delta
ri++
}
for fi < len(finals) && finals[fi].t == t {
switch finals[fi].kind {
case "pass":
p++
case "fail":
f++
case "flaky":
fl++
case "skip":
sk++
}
fi++
}
for pi < len(pkgEnds) && pkgEnds[pi].t == t {
switch pkgEnds[pi].kind {
case "tested":
pt++
case "cached":
pc++
case "no_tests":
pn++
}
pi++
}
w.events = append(w.events,
traceEvent{Name: "concurrency", Ph: "C", Ts: t, Pid: pid, Args: map[string]any{"running_tests": running}},
traceEvent{Name: "test results", Ph: "C", Ts: t, Pid: pid, Args: map[string]any{"passed": p, "failed": f, "flaky": fl, "skipped": sk}},
traceEvent{Name: "packages", Ph: "C", Ts: t, Pid: pid, Args: map[string]any{"done": pt + pc + pn, "cached": pc, "no_tests": pn}},
)
}
}