Files
coder/agent/agentscripts/agentscripts_test.go
T
Danielle Maywood ae522c558d feat: add agent timings (#14713)
* feat: begin impl of agent script timings

* feat: add job_id and display_name to script timings

* fix: increment migration number

* fix: rename migrations from 251 to 254

* test: get tests compiling

* fix: appease the linter

* fix: get tests passing again

* fix: drop column from correct table

* test: add fixture for agent script timings

* fix: typo

* fix: use job id used in provisioner job timings

* fix: increment migration number

* test: behaviour of script runner

* test: rewrite test

* test: does exit 1 script break things?

* test: rewrite test again

* fix: revert change

Not sure how this came to be, I do not recall manually changing
these files.

* fix: let code breathe

* fix: wrap errors

* fix: justify nolint

* fix: swap require.Equal argument order

* fix: add mutex operations

* feat: add 'ran_on_start' and 'blocked_login' fields

* fix: update testdata fixture

* fix: refer to agent_id instead of job_id in timings

* fix: JobID -> AgentID in dbauthz_test

* fix: add 'id' to scripts, make timing refer to script id

* fix: fix broken tests and convert bug

* fix: update testdata fixtures

* fix: update testdata fixtures again

* feat: capture stage and if script timed out

* fix: update migration number

* test: add test for script api

* fix: fake db query

* fix: use UTC time

* fix: ensure r.scriptComplete is not nil

* fix: move err check to right after call

* fix: uppercase sql

* fix: use dbtime.Now()

* fix: debug log on r.scriptCompleted being nil

* fix: ensure correct rbac permissions

* chore: remove DisplayName

* fix: get tests passing

* fix: remove space in sql up

* docs: document ExecuteOption

* fix: drop 'RETURNING' from sql

* chore: remove 'display_name' from timing table

* fix: testdata fixture

* fix: put r.scriptCompleted call in goroutine

* fix: track goroutine for test + use separate context for reporting

* fix: appease linter, handle trackCommandGoroutine error

* fix: resolve race condition

* feat: replace timed_out column with status column

* test: update testdata fixture

* fix: apply suggestions from review

* revert: linter changes
2024-09-24 10:51:49 +01:00

212 lines
5.5 KiB
Go

package agentscripts_test
import (
"context"
"path/filepath"
"runtime"
"testing"
"time"
"github.com/google/uuid"
"github.com/prometheus/client_golang/prometheus"
"github.com/spf13/afero"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
"cdr.dev/slog/sloggers/slogtest"
"github.com/coder/coder/v2/agent/agentscripts"
"github.com/coder/coder/v2/agent/agentssh"
"github.com/coder/coder/v2/agent/agenttest"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/agentsdk"
"github.com/coder/coder/v2/testutil"
)
func TestMain(m *testing.M) {
goleak.VerifyTestMain(m)
}
func TestExecuteBasic(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitShort)
fLogger := newFakeScriptLogger()
runner := setup(t, func(uuid2 uuid.UUID) agentscripts.ScriptLogger {
return fLogger
})
defer runner.Close()
aAPI := agenttest.NewFakeAgentAPI(t, slogtest.Make(t, nil), nil, nil)
err := runner.Init([]codersdk.WorkspaceAgentScript{{
LogSourceID: uuid.New(),
Script: "echo hello",
}}, aAPI.ScriptCompleted)
require.NoError(t, err)
require.NoError(t, runner.Execute(context.Background(), agentscripts.ExecuteAllScripts))
log := testutil.RequireRecvCtx(ctx, t, fLogger.logs)
require.Equal(t, "hello", log.Output)
}
func TestEnv(t *testing.T) {
t.Parallel()
fLogger := newFakeScriptLogger()
runner := setup(t, func(uuid2 uuid.UUID) agentscripts.ScriptLogger {
return fLogger
})
defer runner.Close()
id := uuid.New()
script := "echo $CODER_SCRIPT_DATA_DIR\necho $CODER_SCRIPT_BIN_DIR\n"
if runtime.GOOS == "windows" {
script = `
cmd.exe /c echo %CODER_SCRIPT_DATA_DIR%
cmd.exe /c echo %CODER_SCRIPT_BIN_DIR%
`
}
aAPI := agenttest.NewFakeAgentAPI(t, slogtest.Make(t, nil), nil, nil)
err := runner.Init([]codersdk.WorkspaceAgentScript{{
LogSourceID: id,
Script: script,
}}, aAPI.ScriptCompleted)
require.NoError(t, err)
ctx := testutil.Context(t, testutil.WaitLong)
done := testutil.Go(t, func() {
err := runner.Execute(ctx, agentscripts.ExecuteAllScripts)
assert.NoError(t, err)
})
defer func() {
select {
case <-ctx.Done():
case <-done:
}
}()
var log []agentsdk.Log
for {
select {
case <-ctx.Done():
require.Fail(t, "timed out waiting for logs")
case l := <-fLogger.logs:
t.Logf("log: %s", l.Output)
log = append(log, l)
}
if len(log) >= 2 {
break
}
}
require.Contains(t, log[0].Output, filepath.Join(runner.DataDir(), id.String()))
require.Contains(t, log[1].Output, runner.ScriptBinDir())
}
func TestTimeout(t *testing.T) {
t.Parallel()
runner := setup(t, nil)
defer runner.Close()
aAPI := agenttest.NewFakeAgentAPI(t, slogtest.Make(t, nil), nil, nil)
err := runner.Init([]codersdk.WorkspaceAgentScript{{
LogSourceID: uuid.New(),
Script: "sleep infinity",
Timeout: time.Millisecond,
}}, aAPI.ScriptCompleted)
require.NoError(t, err)
require.ErrorIs(t, runner.Execute(context.Background(), agentscripts.ExecuteAllScripts), agentscripts.ErrTimeout)
}
func TestScriptReportsTiming(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitShort)
fLogger := newFakeScriptLogger()
runner := setup(t, func(uuid2 uuid.UUID) agentscripts.ScriptLogger {
return fLogger
})
aAPI := agenttest.NewFakeAgentAPI(t, slogtest.Make(t, nil), nil, nil)
err := runner.Init([]codersdk.WorkspaceAgentScript{{
DisplayName: "say-hello",
LogSourceID: uuid.New(),
Script: "echo hello",
}}, aAPI.ScriptCompleted)
require.NoError(t, err)
require.NoError(t, runner.Execute(ctx, agentscripts.ExecuteAllScripts))
runner.Close()
log := testutil.RequireRecvCtx(ctx, t, fLogger.logs)
require.Equal(t, "hello", log.Output)
timings := aAPI.GetTimings()
require.Equal(t, 1, len(timings))
timing := timings[0]
require.Equal(t, int32(0), timing.ExitCode)
require.GreaterOrEqual(t, timing.End.AsTime(), timing.Start.AsTime())
}
// TestCronClose exists because cron.Run() can happen after cron.Close().
// If this happens, there used to be a deadlock.
func TestCronClose(t *testing.T) {
t.Parallel()
runner := agentscripts.New(agentscripts.Options{})
runner.StartCron()
require.NoError(t, runner.Close(), "close runner")
}
func setup(t *testing.T, getScriptLogger func(logSourceID uuid.UUID) agentscripts.ScriptLogger) *agentscripts.Runner {
t.Helper()
if getScriptLogger == nil {
// noop
getScriptLogger = func(uuid uuid.UUID) agentscripts.ScriptLogger {
return noopScriptLogger{}
}
}
fs := afero.NewMemMapFs()
logger := slogtest.Make(t, nil)
s, err := agentssh.NewServer(context.Background(), logger, prometheus.NewRegistry(), fs, nil)
require.NoError(t, err)
t.Cleanup(func() {
_ = s.Close()
})
return agentscripts.New(agentscripts.Options{
LogDir: t.TempDir(),
DataDirBase: t.TempDir(),
Logger: logger,
SSHServer: s,
Filesystem: fs,
GetScriptLogger: getScriptLogger,
})
}
type noopScriptLogger struct{}
func (noopScriptLogger) Send(context.Context, ...agentsdk.Log) error {
return nil
}
func (noopScriptLogger) Flush(context.Context) error {
return nil
}
type fakeScriptLogger struct {
logs chan agentsdk.Log
}
func (f *fakeScriptLogger) Send(ctx context.Context, logs ...agentsdk.Log) error {
for _, log := range logs {
select {
case <-ctx.Done():
return ctx.Err()
case f.logs <- log:
// OK!
}
}
return nil
}
func (*fakeScriptLogger) Flush(context.Context) error {
return nil
}
func newFakeScriptLogger() *fakeScriptLogger {
return &fakeScriptLogger{make(chan agentsdk.Log, 100)}
}