feat: add agent timings (#14713)

* feat: begin impl of agent script timings

* feat: add job_id and display_name to script timings

* fix: increment migration number

* fix: rename migrations from 251 to 254

* test: get tests compiling

* fix: appease the linter

* fix: get tests passing again

* fix: drop column from correct table

* test: add fixture for agent script timings

* fix: typo

* fix: use job id used in provisioner job timings

* fix: increment migration number

* test: behaviour of script runner

* test: rewrite test

* test: does exit 1 script break things?

* test: rewrite test again

* fix: revert change

Not sure how this came to be, I do not recall manually changing
these files.

* fix: let code breathe

* fix: wrap errors

* fix: justify nolint

* fix: swap require.Equal argument order

* fix: add mutex operations

* feat: add 'ran_on_start' and 'blocked_login' fields

* fix: update testdata fixture

* fix: refer to agent_id instead of job_id in timings

* fix: JobID -> AgentID in dbauthz_test

* fix: add 'id' to scripts, make timing refer to script id

* fix: fix broken tests and convert bug

* fix: update testdata fixtures

* fix: update testdata fixtures again

* feat: capture stage and if script timed out

* fix: update migration number

* test: add test for script api

* fix: fake db query

* fix: use UTC time

* fix: ensure r.scriptComplete is not nil

* fix: move err check to right after call

* fix: uppercase sql

* fix: use dbtime.Now()

* fix: debug log on r.scriptCompleted being nil

* fix: ensure correct rbac permissions

* chore: remove DisplayName

* fix: get tests passing

* fix: remove space in sql up

* docs: document ExecuteOption

* fix: drop 'RETURNING' from sql

* chore: remove 'display_name' from timing table

* fix: testdata fixture

* fix: put r.scriptCompleted call in goroutine

* fix: track goroutine for test + use separate context for reporting

* fix: appease linter, handle trackCommandGoroutine error

* fix: resolve race condition

* feat: replace timed_out column with status column

* test: update testdata fixture

* fix: apply suggestions from review

* revert: linter changes
This commit is contained in:
Danielle Maywood
2024-09-24 10:51:49 +01:00
committed by GitHub
parent b8944074c4
commit ae522c558d
43 changed files with 1367 additions and 232 deletions
+40 -10
View File
@@ -17,6 +17,7 @@ import (
"cdr.dev/slog/sloggers/slogtest"
"github.com/coder/coder/v2/agent/agentscripts"
"github.com/coder/coder/v2/agent/agentssh"
"github.com/coder/coder/v2/agent/agenttest"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/agentsdk"
"github.com/coder/coder/v2/testutil"
@@ -34,14 +35,13 @@ func TestExecuteBasic(t *testing.T) {
return fLogger
})
defer runner.Close()
aAPI := agenttest.NewFakeAgentAPI(t, slogtest.Make(t, nil), nil, nil)
err := runner.Init([]codersdk.WorkspaceAgentScript{{
LogSourceID: uuid.New(),
Script: "echo hello",
}})
}}, aAPI.ScriptCompleted)
require.NoError(t, err)
require.NoError(t, runner.Execute(context.Background(), func(script codersdk.WorkspaceAgentScript) bool {
return true
}))
require.NoError(t, runner.Execute(context.Background(), agentscripts.ExecuteAllScripts))
log := testutil.RequireRecvCtx(ctx, t, fLogger.logs)
require.Equal(t, "hello", log.Output)
}
@@ -61,18 +61,17 @@ func TestEnv(t *testing.T) {
cmd.exe /c echo %CODER_SCRIPT_BIN_DIR%
`
}
aAPI := agenttest.NewFakeAgentAPI(t, slogtest.Make(t, nil), nil, nil)
err := runner.Init([]codersdk.WorkspaceAgentScript{{
LogSourceID: id,
Script: script,
}})
}}, aAPI.ScriptCompleted)
require.NoError(t, err)
ctx := testutil.Context(t, testutil.WaitLong)
done := testutil.Go(t, func() {
err := runner.Execute(ctx, func(script codersdk.WorkspaceAgentScript) bool {
return true
})
err := runner.Execute(ctx, agentscripts.ExecuteAllScripts)
assert.NoError(t, err)
})
defer func() {
@@ -103,13 +102,44 @@ func TestTimeout(t *testing.T) {
t.Parallel()
runner := setup(t, nil)
defer runner.Close()
aAPI := agenttest.NewFakeAgentAPI(t, slogtest.Make(t, nil), nil, nil)
err := runner.Init([]codersdk.WorkspaceAgentScript{{
LogSourceID: uuid.New(),
Script: "sleep infinity",
Timeout: time.Millisecond,
}})
}}, aAPI.ScriptCompleted)
require.NoError(t, err)
require.ErrorIs(t, runner.Execute(context.Background(), nil), agentscripts.ErrTimeout)
require.ErrorIs(t, runner.Execute(context.Background(), agentscripts.ExecuteAllScripts), agentscripts.ErrTimeout)
}
func TestScriptReportsTiming(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitShort)
fLogger := newFakeScriptLogger()
runner := setup(t, func(uuid2 uuid.UUID) agentscripts.ScriptLogger {
return fLogger
})
aAPI := agenttest.NewFakeAgentAPI(t, slogtest.Make(t, nil), nil, nil)
err := runner.Init([]codersdk.WorkspaceAgentScript{{
DisplayName: "say-hello",
LogSourceID: uuid.New(),
Script: "echo hello",
}}, aAPI.ScriptCompleted)
require.NoError(t, err)
require.NoError(t, runner.Execute(ctx, agentscripts.ExecuteAllScripts))
runner.Close()
log := testutil.RequireRecvCtx(ctx, t, fLogger.logs)
require.Equal(t, "hello", log.Output)
timings := aAPI.GetTimings()
require.Equal(t, 1, len(timings))
timing := timings[0]
require.Equal(t, int32(0), timing.ExitCode)
require.GreaterOrEqual(t, timing.End.AsTime(), timing.Start.AsTime())
}
// TestCronClose exists because cron.Run() can happen after cron.Close().