feat!: add summary to coder ping (#14762)

This commit is contained in:
Ethan
2024-09-25 13:24:23 +10:00
committed by GitHub
parent e086d7813b
commit b7c574f679
8 changed files with 258 additions and 93 deletions
+4 -6
View File
@@ -370,6 +370,9 @@ func (d ConnDiags) Write(w io.Writer) {
for _, msg := range general { for _, msg := range general {
_, _ = fmt.Fprintln(w, msg) _, _ = fmt.Fprintln(w, msg)
} }
if len(general) > 0 {
_, _ = fmt.Fprintln(w, "")
}
if len(client) > 0 { if len(client) > 0 {
_, _ = fmt.Fprint(w, "Possible client-side issues with direct connection:\n\n") _, _ = fmt.Fprint(w, "Possible client-side issues with direct connection:\n\n")
for _, msg := range client { for _, msg := range client {
@@ -385,12 +388,6 @@ func (d ConnDiags) Write(w io.Writer) {
} }
func (d ConnDiags) splitDiagnostics() (general, client, agent []string) { func (d ConnDiags) splitDiagnostics() (general, client, agent []string) {
if d.PingP2P {
general = append(general, "✔ You are connected directly (p2p)")
} else {
general = append(general, "❗ You are connected via a DERP relay, not directly (p2p)")
}
if d.AgentNetcheck != nil { if d.AgentNetcheck != nil {
for _, msg := range d.AgentNetcheck.Interfaces.Warnings { for _, msg := range d.AgentNetcheck.Interfaces.Warnings {
agent = append(agent, msg.Message) agent = append(agent, msg.Message)
@@ -461,5 +458,6 @@ func (d ConnDiags) splitDiagnostics() (general, client, agent []string) {
agent = append(agent, agent = append(agent,
fmt.Sprintf("Agent IP address is within an AWS range (AWS uses hard NAT)\n %s#endpoint-dependent-nat-hard-nat", d.TroubleshootingURL)) fmt.Sprintf("Agent IP address is within an AWS range (AWS uses hard NAT)\n %s#endpoint-dependent-nat-hard-nat", d.TroubleshootingURL))
} }
return general, client, agent return general, client, agent
} }
-26
View File
@@ -683,19 +683,6 @@ func TestConnDiagnostics(t *testing.T) {
diags cliui.ConnDiags diags cliui.ConnDiags
want []string want []string
}{ }{
{
name: "Direct",
diags: cliui.ConnDiags{
ConnInfo: workspacesdk.AgentConnectionInfo{
DERPMap: &tailcfg.DERPMap{},
},
PingP2P: true,
LocalNetInfo: &tailcfg.NetInfo{},
},
want: []string{
`✔ You are connected directly (p2p)`,
},
},
{ {
name: "DirectBlocked", name: "DirectBlocked",
diags: cliui.ConnDiags{ diags: cliui.ConnDiags{
@@ -705,7 +692,6 @@ func TestConnDiagnostics(t *testing.T) {
}, },
}, },
want: []string{ want: []string{
`❗ You are connected via a DERP relay, not directly (p2p)`,
`❗ Your Coder administrator has blocked direct connections`, `❗ Your Coder administrator has blocked direct connections`,
}, },
}, },
@@ -718,7 +704,6 @@ func TestConnDiagnostics(t *testing.T) {
LocalNetInfo: &tailcfg.NetInfo{}, LocalNetInfo: &tailcfg.NetInfo{},
}, },
want: []string{ want: []string{
`❗ You are connected via a DERP relay, not directly (p2p)`,
`The DERP map is not configured to use STUN`, `The DERP map is not configured to use STUN`,
}, },
}, },
@@ -743,7 +728,6 @@ func TestConnDiagnostics(t *testing.T) {
}, },
}, },
want: []string{ want: []string{
`❗ You are connected via a DERP relay, not directly (p2p)`,
`Client could not connect to STUN over UDP`, `Client could not connect to STUN over UDP`,
}, },
}, },
@@ -770,7 +754,6 @@ func TestConnDiagnostics(t *testing.T) {
}, },
}, },
want: []string{ want: []string{
`❗ You are connected via a DERP relay, not directly (p2p)`,
`Agent could not connect to STUN over UDP`, `Agent could not connect to STUN over UDP`,
}, },
}, },
@@ -785,7 +768,6 @@ func TestConnDiagnostics(t *testing.T) {
}, },
}, },
want: []string{ want: []string{
`❗ You are connected via a DERP relay, not directly (p2p)`,
`Client is potentially behind a hard NAT, as multiple endpoints were retrieved from different STUN servers`, `Client is potentially behind a hard NAT, as multiple endpoints were retrieved from different STUN servers`,
}, },
}, },
@@ -795,14 +777,12 @@ func TestConnDiagnostics(t *testing.T) {
ConnInfo: workspacesdk.AgentConnectionInfo{ ConnInfo: workspacesdk.AgentConnectionInfo{
DERPMap: &tailcfg.DERPMap{}, DERPMap: &tailcfg.DERPMap{},
}, },
PingP2P: false,
LocalNetInfo: &tailcfg.NetInfo{}, LocalNetInfo: &tailcfg.NetInfo{},
AgentNetcheck: &healthsdk.AgentNetcheckReport{ AgentNetcheck: &healthsdk.AgentNetcheckReport{
NetInfo: &tailcfg.NetInfo{MappingVariesByDestIP: "true"}, NetInfo: &tailcfg.NetInfo{MappingVariesByDestIP: "true"},
}, },
}, },
want: []string{ want: []string{
`❗ You are connected via a DERP relay, not directly (p2p)`,
`Agent is potentially behind a hard NAT, as multiple endpoints were retrieved from different STUN servers`, `Agent is potentially behind a hard NAT, as multiple endpoints were retrieved from different STUN servers`,
}, },
}, },
@@ -812,7 +792,6 @@ func TestConnDiagnostics(t *testing.T) {
ConnInfo: workspacesdk.AgentConnectionInfo{ ConnInfo: workspacesdk.AgentConnectionInfo{
DERPMap: &tailcfg.DERPMap{}, DERPMap: &tailcfg.DERPMap{},
}, },
PingP2P: true,
AgentNetcheck: &healthsdk.AgentNetcheckReport{ AgentNetcheck: &healthsdk.AgentNetcheckReport{
Interfaces: healthsdk.InterfacesReport{ Interfaces: healthsdk.InterfacesReport{
BaseReport: healthsdk.BaseReport{ BaseReport: healthsdk.BaseReport{
@@ -824,7 +803,6 @@ func TestConnDiagnostics(t *testing.T) {
}, },
}, },
want: []string{ want: []string{
`✔ You are connected directly (p2p)`,
`Network interface eth0 has MTU 1280, (less than 1378), which may degrade the quality of direct connections`, `Network interface eth0 has MTU 1280, (less than 1378), which may degrade the quality of direct connections`,
}, },
}, },
@@ -834,7 +812,6 @@ func TestConnDiagnostics(t *testing.T) {
ConnInfo: workspacesdk.AgentConnectionInfo{ ConnInfo: workspacesdk.AgentConnectionInfo{
DERPMap: &tailcfg.DERPMap{}, DERPMap: &tailcfg.DERPMap{},
}, },
PingP2P: true,
LocalInterfaces: &healthsdk.InterfacesReport{ LocalInterfaces: &healthsdk.InterfacesReport{
BaseReport: healthsdk.BaseReport{ BaseReport: healthsdk.BaseReport{
Warnings: []health.Message{ Warnings: []health.Message{
@@ -844,7 +821,6 @@ func TestConnDiagnostics(t *testing.T) {
}, },
}, },
want: []string{ want: []string{
`✔ You are connected directly (p2p)`,
`Network interface eth1 has MTU 1310, (less than 1378), which may degrade the quality of direct connections`, `Network interface eth1 has MTU 1310, (less than 1378), which may degrade the quality of direct connections`,
}, },
}, },
@@ -858,7 +834,6 @@ func TestConnDiagnostics(t *testing.T) {
AgentIPIsAWS: false, AgentIPIsAWS: false,
}, },
want: []string{ want: []string{
`❗ You are connected via a DERP relay, not directly (p2p)`,
`Client IP address is within an AWS range (AWS uses hard NAT)`, `Client IP address is within an AWS range (AWS uses hard NAT)`,
}, },
}, },
@@ -872,7 +847,6 @@ func TestConnDiagnostics(t *testing.T) {
AgentIPIsAWS: true, AgentIPIsAWS: true,
}, },
want: []string{ want: []string{
`❗ You are connected via a DERP relay, not directly (p2p)`,
`Agent IP address is within an AWS range (AWS uses hard NAT)`, `Agent IP address is within an AWS range (AWS uses hard NAT)`,
}, },
}, },
+4
View File
@@ -199,6 +199,10 @@ func renderTable(out any, sort string, headers table.Row, filterColumns []string
if val != nil { if val != nil {
v = *val v = *val
} }
case *time.Duration:
if val != nil {
v = val.String()
}
case fmt.Stringer: case fmt.Stringer:
if val != nil { if val != nil {
v = val.String() v = val.String()
+137 -52
View File
@@ -4,26 +4,83 @@ import (
"context" "context"
"errors" "errors"
"fmt" "fmt"
"io"
"net/http" "net/http"
"net/netip" "net/netip"
"strings"
"time" "time"
"golang.org/x/xerrors" "golang.org/x/xerrors"
"tailscale.com/ipn/ipnstate"
"tailscale.com/tailcfg" "tailscale.com/tailcfg"
"cdr.dev/slog" "cdr.dev/slog"
"cdr.dev/slog/sloggers/sloghuman" "cdr.dev/slog/sloggers/sloghuman"
"github.com/briandowns/spinner"
"github.com/coder/pretty" "github.com/coder/pretty"
"github.com/coder/coder/v2/cli/cliui" "github.com/coder/coder/v2/cli/cliui"
"github.com/coder/coder/v2/cli/cliutil" "github.com/coder/coder/v2/cli/cliutil"
"github.com/coder/coder/v2/coderd/util/ptr"
"github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/healthsdk" "github.com/coder/coder/v2/codersdk/healthsdk"
"github.com/coder/coder/v2/codersdk/workspacesdk" "github.com/coder/coder/v2/codersdk/workspacesdk"
"github.com/coder/serpent" "github.com/coder/serpent"
) )
type pingSummary struct {
Workspace string `table:"workspace,nosort"`
Total int `table:"total"`
Successful int `table:"successful"`
Min *time.Duration `table:"min"`
Avg *time.Duration `table:"avg"`
Max *time.Duration `table:"max"`
Variance *time.Duration `table:"variance"`
latencySum float64
runningAvg float64
m2 float64
}
func (s *pingSummary) addResult(r *ipnstate.PingResult) {
s.Total++
if r == nil || r.Err != "" {
return
}
s.Successful++
if s.Min == nil || r.LatencySeconds < s.Min.Seconds() {
s.Min = ptr.Ref(time.Duration(r.LatencySeconds * float64(time.Second)))
}
if s.Max == nil || r.LatencySeconds > s.Min.Seconds() {
s.Max = ptr.Ref(time.Duration(r.LatencySeconds * float64(time.Second)))
}
s.latencySum += r.LatencySeconds
d := r.LatencySeconds - s.runningAvg
s.runningAvg += d / float64(s.Successful)
d2 := r.LatencySeconds - s.runningAvg
s.m2 += d * d2
}
// Write finalizes the summary and writes it
func (s *pingSummary) Write(w io.Writer) {
if s.Successful > 0 {
s.Avg = ptr.Ref(time.Duration(s.latencySum / float64(s.Successful) * float64(time.Second)))
}
if s.Successful > 1 {
s.Variance = ptr.Ref(time.Duration((s.m2 / float64(s.Successful-1)) * float64(time.Second)))
}
out, err := cliui.DisplayTable([]*pingSummary{s}, "", nil)
if err != nil {
_, _ = fmt.Fprintf(w, "Failed to display ping summary: %v\n", err)
return
}
width := len(strings.Split(out, "\n")[0])
_, _ = fmt.Println(strings.Repeat("-", width))
_, _ = fmt.Fprint(w, out)
}
func (r *RootCmd) ping() *serpent.Command { func (r *RootCmd) ping() *serpent.Command {
var ( var (
pingNum int64 pingNum int64
@@ -46,6 +103,14 @@ func (r *RootCmd) ping() *serpent.Command {
ctx, cancel := context.WithCancel(inv.Context()) ctx, cancel := context.WithCancel(inv.Context())
defer cancel() defer cancel()
spin := spinner.New(spinner.CharSets[5], 100*time.Millisecond)
spin.Writer = inv.Stderr
spin.Suffix = pretty.Sprint(cliui.DefaultStyles.Keyword, " Collecting diagnostics...")
spin.Start()
notifyCtx, notifyCancel := inv.SignalNotifyContext(ctx, StopSignals...)
defer notifyCancel()
workspaceName := inv.Args[0] workspaceName := inv.Args[0]
_, workspaceAgent, err := getWorkspaceAndAgent( _, workspaceAgent, err := getWorkspaceAndAgent(
ctx, inv, client, ctx, inv, client,
@@ -77,11 +142,64 @@ func (r *RootCmd) ping() *serpent.Command {
defer conn.Close() defer conn.Close()
derpMap := conn.DERPMap() derpMap := conn.DERPMap()
_ = derpMap
diagCtx, diagCancel := context.WithTimeout(inv.Context(), 30*time.Second)
defer diagCancel()
diags := conn.GetPeerDiagnostics()
// Silent ping to determine whether we should show diags
_, didP2p, _, _ := conn.Ping(ctx)
ni := conn.GetNetInfo()
connDiags := cliui.ConnDiags{
DisableDirect: r.disableDirect,
LocalNetInfo: ni,
Verbose: r.verbose,
PingP2P: didP2p,
TroubleshootingURL: appearanceConfig.DocsURL + "/networking/troubleshooting",
}
awsRanges, err := cliutil.FetchAWSIPRanges(diagCtx, cliutil.AWSIPRangesURL)
if err != nil {
opts.Logger.Debug(inv.Context(), "failed to retrieve AWS IP ranges", slog.Error(err))
}
connDiags.ClientIPIsAWS = isAWSIP(awsRanges, ni)
connInfo, err := wsClient.AgentConnectionInfoGeneric(diagCtx)
if err != nil || connInfo.DERPMap == nil {
return xerrors.Errorf("Failed to retrieve connection info from server: %w\n", err)
}
connDiags.ConnInfo = connInfo
ifReport, err := healthsdk.RunInterfacesReport()
if err == nil {
connDiags.LocalInterfaces = &ifReport
} else {
_, _ = fmt.Fprintf(inv.Stdout, "Failed to retrieve local interfaces report: %v\n", err)
}
agentNetcheck, err := conn.Netcheck(diagCtx)
if err == nil {
connDiags.AgentNetcheck = &agentNetcheck
connDiags.AgentIPIsAWS = isAWSIP(awsRanges, agentNetcheck.NetInfo)
} else {
var sdkErr *codersdk.Error
if errors.As(err, &sdkErr) && sdkErr.StatusCode() == http.StatusNotFound {
_, _ = fmt.Fprint(inv.Stdout, "Could not generate full connection report as the workspace agent is outdated\n")
} else {
_, _ = fmt.Fprintf(inv.Stdout, "Failed to retrieve connection report from agent: %v\n", err)
}
}
spin.Stop()
cliui.PeerDiagnostics(inv.Stderr, diags)
connDiags.Write(inv.Stderr)
results := &pingSummary{
Workspace: workspaceName,
}
n := 0 n := 0
didP2p := false
start := time.Now() start := time.Now()
pingLoop:
for { for {
if n > 0 { if n > 0 {
time.Sleep(pingWait) time.Sleep(pingWait)
@@ -91,6 +209,7 @@ func (r *RootCmd) ping() *serpent.Command {
ctx, cancel := context.WithTimeout(ctx, pingTimeout) ctx, cancel := context.WithTimeout(ctx, pingTimeout)
dur, p2p, pong, err := conn.Ping(ctx) dur, p2p, pong, err := conn.Ping(ctx)
cancel() cancel()
results.addResult(pong)
if err != nil { if err != nil {
if xerrors.Is(err, context.DeadlineExceeded) { if xerrors.Is(err, context.DeadlineExceeded) {
_, _ = fmt.Fprintf(inv.Stdout, "ping to %q timed out \n", workspaceName) _, _ = fmt.Fprintf(inv.Stdout, "ping to %q timed out \n", workspaceName)
@@ -146,57 +265,24 @@ func (r *RootCmd) ping() *serpent.Command {
pretty.Sprint(cliui.DefaultStyles.DateTimeStamp, dur.String()), pretty.Sprint(cliui.DefaultStyles.DateTimeStamp, dur.String()),
) )
if n == int(pingNum) { select {
break case <-notifyCtx.Done():
} break pingLoop
} default:
diagCtx, diagCancel := context.WithTimeout(inv.Context(), 30*time.Second) if n == int(pingNum) {
defer diagCancel() break pingLoop
diags := conn.GetPeerDiagnostics() }
cliui.PeerDiagnostics(inv.Stdout, diags)
ni := conn.GetNetInfo()
connDiags := cliui.ConnDiags{
PingP2P: didP2p,
DisableDirect: r.disableDirect,
LocalNetInfo: ni,
Verbose: r.verbose,
TroubleshootingURL: appearanceConfig.DocsURL + "/networking/troubleshooting",
}
awsRanges, err := cliutil.FetchAWSIPRanges(diagCtx, cliutil.AWSIPRangesURL)
if err != nil {
opts.Logger.Debug(inv.Context(), "failed to retrieve AWS IP ranges", slog.Error(err))
}
connDiags.ClientIPIsAWS = isAWSIP(awsRanges, ni)
connInfo, err := wsClient.AgentConnectionInfoGeneric(diagCtx)
if err != nil || connInfo.DERPMap == nil {
return xerrors.Errorf("Failed to retrieve connection info from server: %w\n", err)
}
connDiags.ConnInfo = connInfo
ifReport, err := healthsdk.RunInterfacesReport()
if err == nil {
connDiags.LocalInterfaces = &ifReport
} else {
_, _ = fmt.Fprintf(inv.Stdout, "Failed to retrieve local interfaces report: %v\n", err)
}
agentNetcheck, err := conn.Netcheck(diagCtx)
if err == nil {
connDiags.AgentNetcheck = &agentNetcheck
connDiags.AgentIPIsAWS = isAWSIP(awsRanges, agentNetcheck.NetInfo)
} else {
var sdkErr *codersdk.Error
if errors.As(err, &sdkErr) && sdkErr.StatusCode() == http.StatusNotFound {
_, _ = fmt.Fprint(inv.Stdout, "Could not generate full connection report as the workspace agent is outdated\n")
} else {
_, _ = fmt.Fprintf(inv.Stdout, "Failed to retrieve connection report from agent: %v\n", err)
} }
} }
connDiags.Write(inv.Stdout) if didP2p {
_, _ = fmt.Fprintf(inv.Stderr, "✔ You are connected directly (p2p)\n")
} else {
_, _ = fmt.Fprintf(inv.Stderr, "❗ You are connected via a DERP relay, not directly (p2p)\n%s#common-problems-with-direct-connections\n", connDiags.TroubleshootingURL)
}
results.Write(inv.Stdout)
return nil return nil
}, },
} }
@@ -218,8 +304,7 @@ func (r *RootCmd) ping() *serpent.Command {
{ {
Flag: "num", Flag: "num",
FlagShorthand: "n", FlagShorthand: "n",
Default: "10", Description: "Specifies the number of pings to perform. By default, pings will continue until interrupted.",
Description: "Specifies the number of pings to perform.",
Value: serpent.Int64Of(&pingNum), Value: serpent.Int64Of(&pingNum),
}, },
} }
+106
View File
@@ -0,0 +1,106 @@
package cli
import (
"io"
"testing"
"time"
"github.com/stretchr/testify/require"
"tailscale.com/ipn/ipnstate"
)
func TestBuildSummary(t *testing.T) {
t.Parallel()
t.Run("Ok", func(t *testing.T) {
t.Parallel()
input := []*ipnstate.PingResult{
{
Err: "",
LatencySeconds: 0.1,
},
{
Err: "",
LatencySeconds: 0.2,
},
{
Err: "",
LatencySeconds: 0.3,
},
{
Err: "ping error",
LatencySeconds: 0.4,
},
}
actual := pingSummary{
Workspace: "test",
}
for _, r := range input {
actual.addResult(r)
}
actual.Write(io.Discard)
require.Equal(t, time.Duration(0.1*float64(time.Second)), *actual.Min)
require.Equal(t, time.Duration(0.2*float64(time.Second)), *actual.Avg)
require.Equal(t, time.Duration(0.3*float64(time.Second)), *actual.Max)
require.Equal(t, time.Duration(0.009999999*float64(time.Second)), *actual.Variance)
require.Equal(t, actual.Successful, 3)
})
t.Run("One", func(t *testing.T) {
t.Parallel()
input := []*ipnstate.PingResult{
{
LatencySeconds: 0.2,
},
}
actual := &pingSummary{
Workspace: "test",
}
for _, r := range input {
actual.addResult(r)
}
actual.Write(io.Discard)
require.Equal(t, actual.Successful, 1)
require.Equal(t, time.Duration(0.2*float64(time.Second)), *actual.Min)
require.Equal(t, time.Duration(0.2*float64(time.Second)), *actual.Avg)
require.Equal(t, time.Duration(0.2*float64(time.Second)), *actual.Max)
require.Nil(t, actual.Variance)
})
t.Run("NoLatency", func(t *testing.T) {
t.Parallel()
input := []*ipnstate.PingResult{
{
Err: "ping error",
},
{
Err: "ping error",
LatencySeconds: 0.2,
},
}
expected := &pingSummary{
Workspace: "test",
Total: 2,
Successful: 0,
Min: nil,
Avg: nil,
Max: nil,
Variance: nil,
latencySum: 0,
runningAvg: 0,
m2: 0,
}
actual := &pingSummary{
Workspace: "test",
}
for _, r := range input {
actual.addResult(r)
}
actual.Write(io.Discard)
require.Equal(t, expected, actual)
})
}
-2
View File
@@ -66,8 +66,6 @@ func TestPing(t *testing.T) {
}) })
pty.ExpectMatch("pong from " + workspace.Name) pty.ExpectMatch("pong from " + workspace.Name)
pty.ExpectMatch("✔ received remote agent data from Coder networking coordinator")
pty.ExpectMatch("You are connected")
cancel() cancel()
<-cmdDone <-cmdDone
}) })
+3 -2
View File
@@ -6,8 +6,9 @@ USAGE:
Ping a workspace Ping a workspace
OPTIONS: OPTIONS:
-n, --num int (default: 10) -n, --num int
Specifies the number of pings to perform. Specifies the number of pings to perform. By default, pings will
continue until interrupted.
-t, --timeout duration (default: 5s) -t, --timeout duration (default: 5s)
Specifies how long to wait for a ping to complete. Specifies how long to wait for a ping to complete.
+4 -5
View File
@@ -32,9 +32,8 @@ Specifies how long to wait for a ping to complete.
### -n, --num ### -n, --num
| | | | | |
| ------- | ---------------- | | ---- | ---------------- |
| Type | <code>int</code> | | Type | <code>int</code> |
| Default | <code>10</code> |
Specifies the number of pings to perform. Specifies the number of pings to perform. By default, pings will continue until interrupted.