control/controlclient: fix map context race

Capture Auto.mapCtx while holding Auto.mu before using it for
incremental map update forwarding. Pause and restart paths can replace
the context under the same mutex, so using it after unlocking races
with those writers.

Add a race regression test for the UserProfiles path that repeatedly
cancels the map context while incremental profile updates are
forwarded.

Fixes #19953

Change-Id: Icc55c4a0dffbc16d6507a2b446b3909d4d0a0278
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2026-06-01 20:03:44 +00:00 committed by Brad Fitzpatrick
parent c234dcc2ef
commit d64aaffc06
2 changed files with 74 additions and 4 deletions

View File

@ -472,13 +472,14 @@ func (mrs mapRoutineState) UpdateNetmapDelta(muts []netmap.NodeMutation) bool {
c.mu.Lock()
goodState := c.loggedIn && c.inMapPoll
ndu, canDelta := c.observer.(NetmapDeltaUpdater)
mapCtx := c.mapCtx
c.mu.Unlock()
if !goodState || !canDelta {
return false
}
ctx, cancel := context.WithTimeout(c.mapCtx, 2*time.Second)
ctx, cancel := context.WithTimeout(mapCtx, 2*time.Second)
defer cancel()
ch := make(chan bool, 1)
@ -508,11 +509,12 @@ func (mrs mapRoutineState) UpdatePacketFilter(rules views.Slice[tailcfg.FilterRu
c.mu.Lock()
goodState := c.loggedIn && c.inMapPoll
pfu, ok := c.observer.(PacketFilterUpdater)
mapCtx := c.mapCtx
c.mu.Unlock()
if !goodState || !ok {
return false
}
ctx, cancel := context.WithTimeout(c.mapCtx, 2*time.Second)
ctx, cancel := context.WithTimeout(mapCtx, 2*time.Second)
defer cancel()
ch := make(chan bool, 1)
c.observerQueue.Add(func() {
@ -536,11 +538,12 @@ func (mrs mapRoutineState) UpdateUserProfiles(profiles map[tailcfg.UserID]tailcf
c.mu.Lock()
goodState := c.loggedIn && c.inMapPoll
upu, ok := c.observer.(UserProfileUpdater)
mapCtx := c.mapCtx
c.mu.Unlock()
if !goodState || !ok {
return false
}
ctx, cancel := context.WithTimeout(c.mapCtx, 2*time.Second)
ctx, cancel := context.WithTimeout(mapCtx, 2*time.Second)
defer cancel()
ch := make(chan bool, 1)
c.observerQueue.Add(func() {
@ -561,13 +564,14 @@ func (mrs mapRoutineState) PatchDiscoKey(pub key.NodePublic, disco key.DiscoPubl
c.mu.Lock()
goodState := c.loggedIn && c.inMapPoll
dun, ok := c.observer.(patchDiscoKeyer)
mapCtx := c.mapCtx
c.mu.Unlock()
if !goodState || !ok {
return
}
ctx, cancel := context.WithTimeout(c.mapCtx, 2*time.Second)
ctx, cancel := context.WithTimeout(mapCtx, 2*time.Second)
defer cancel()
c.observerQueue.RunSync(ctx, func() {

View File

@ -0,0 +1,66 @@
// Copyright (c) Tailscale Inc & contributors
// SPDX-License-Identifier: BSD-3-Clause
package controlclient
import (
"context"
"sync"
"testing"
"time"
"tailscale.com/tailcfg"
)
type userProfileUpdateObserver struct{}
func (userProfileUpdateObserver) SetControlClientStatus(Client, Status) {}
func (userProfileUpdateObserver) UpdateUserProfiles(map[tailcfg.UserID]tailcfg.UserProfileView) bool {
return true
}
func TestMapRoutineStateUpdateUserProfilesConcurrentCancelMapCtx(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
c := &Auto{
logf: func(string, ...any) {},
observer: userProfileUpdateObserver{},
mapCtx: ctx,
mapCancel: cancel,
loggedIn: true,
inMapPoll: true,
}
mrs := mapRoutineState{c: c}
start := make(chan struct{})
var wg sync.WaitGroup
for range 4 {
wg.Go(func() {
<-start
for range 2000 {
c.mu.Lock()
c.cancelMapCtxLocked()
c.mu.Unlock()
}
})
}
for range 4 {
wg.Go(func() {
<-start
for range 2000 {
mrs.UpdateUserProfiles(nil)
}
})
}
close(start)
wg.Wait()
waitCtx, cancel := context.WithTimeout(t.Context(), 5*time.Second)
defer cancel()
if err := c.observerQueue.Wait(waitCtx); err != nil {
t.Fatal(err)
}
c.observerQueue.Shutdown()
c.mapCancel()
}