fix(scaletest/templates): fix bugs and improve debugging (#10316)

This commit is contained in:
Mathias Fredriksson
2023-10-17 18:18:49 +03:00
committed by GitHub
parent 2f4ca0f566
commit 8f39ec5cc3
3 changed files with 24 additions and 17 deletions
@@ -82,7 +82,7 @@ end_phase() {
phase=$(tail -n 1 "${SCALETEST_PHASE_FILE}" | grep "START:${phase_num}:" | cut -d' ' -f3-) phase=$(tail -n 1 "${SCALETEST_PHASE_FILE}" | grep "START:${phase_num}:" | cut -d' ' -f3-)
if [[ -z ${phase} ]]; then if [[ -z ${phase} ]]; then
log "BUG: Could not find start phase ${phase_num} in ${SCALETEST_PHASE_FILE}" log "BUG: Could not find start phase ${phase_num} in ${SCALETEST_PHASE_FILE}"
exit 1 return 1
fi fi
log "End phase ${phase_num}: ${phase}" log "End phase ${phase_num}: ${phase}"
echo "$(date -Ins) END:${phase_num}: ${phase}" >>"${SCALETEST_PHASE_FILE}" echo "$(date -Ins) END:${phase_num}: ${phase}" >>"${SCALETEST_PHASE_FILE}"
@@ -132,6 +132,7 @@ annotate_grafana() {
'{time: $time, tags: $tags | split(","), text: $text}' <<<'{}' '{time: $time, tags: $tags | split(","), text: $text}' <<<'{}'
)" )"
if [[ ${DRY_RUN} == 1 ]]; then if [[ ${DRY_RUN} == 1 ]]; then
echo "FAKEID:${tags}:${text}:${start}" >>"${SCALETEST_STATE_DIR}/grafana-annotations"
log "Would have annotated Grafana, data=${json}" log "Would have annotated Grafana, data=${json}"
return 0 return 0
fi fi
@@ -171,23 +172,18 @@ annotate_grafana_end() {
tags="${tags},${GRAFANA_EXTRA_TAGS}" tags="${tags},${GRAFANA_EXTRA_TAGS}"
fi fi
if [[ ${DRY_RUN} == 1 ]]; then
log "Would have updated Grafana annotation (end=${end}): ${text} [${tags}]"
return 0
fi
if ! id=$(grep ":${tags}:${text}:${start}" "${SCALETEST_STATE_DIR}/grafana-annotations" | sort -n | tail -n1 | cut -d: -f1); then if ! id=$(grep ":${tags}:${text}:${start}" "${SCALETEST_STATE_DIR}/grafana-annotations" | sort -n | tail -n1 | cut -d: -f1); then
log "NOTICE: Could not find Grafana annotation to end: '${tags}:${text}:${start}', skipping..." log "NOTICE: Could not find Grafana annotation to end: '${tags}:${text}:${start}', skipping..."
return 0 return 0
fi fi
log "Annotating Grafana (end=${end}): ${text} [${tags}]" log "Updating Grafana annotation (end=${end}): ${text} [${tags}, add=${GRAFANA_ADD_TAGS:-}]"
if [[ -n ${GRAFANA_ADD_TAGS:-} ]]; then if [[ -n ${GRAFANA_ADD_TAGS:-} ]]; then
json="$( json="$(
jq -n \ jq -n \
--argjson timeEnd "${end}" \ --argjson timeEnd "${end}" \
--argjson tags "${tags},${GRAFANA_ADD_TAGS}" \ --arg tags "${tags},${GRAFANA_ADD_TAGS}" \
'{timeEnd: $timeEnd, tags: $tags | split(",")}' '{timeEnd: $timeEnd, tags: $tags | split(",")}'
)" )"
else else
@@ -275,7 +271,7 @@ coder_pods() {
fetch_coder_full() { fetch_coder_full() {
if [[ -x "${SCALETEST_CODER_BINARY}" ]]; then if [[ -x "${SCALETEST_CODER_BINARY}" ]]; then
log "Full Coder binary already exists at ${SCALETEST_CODER_BINARY}" log "Full Coder binary already exists at ${SCALETEST_CODER_BINARY}"
return return 0
fi fi
ns=$(namespace) ns=$(namespace)
if [[ -z "${ns}" ]]; then if [[ -z "${ns}" ]]; then
@@ -286,12 +282,12 @@ fetch_coder_full() {
pods=$(coder_pods) pods=$(coder_pods)
if [[ -z ${pods} ]]; then if [[ -z ${pods} ]]; then
log "Could not find coder pods!" log "Could not find coder pods!"
return return 1
fi fi
pod=$(cut -d ' ' -f 1 <<<"${pods}") pod=$(cut -d ' ' -f 1 <<<"${pods}")
if [[ -z ${pod} ]]; then if [[ -z ${pod} ]]; then
log "Could not find coder pod!" log "Could not find coder pod!"
return return 1
fi fi
log "Fetching full Coder binary from ${pod}" log "Fetching full Coder binary from ${pod}"
# We need --retries due to https://github.com/kubernetes/kubernetes/issues/60140 :( # We need --retries due to https://github.com/kubernetes/kubernetes/issues/60140 :(
@@ -309,8 +305,8 @@ fetch_coder_full() {
# com.coder.scaletest.status. It will overwrite the previous status. # com.coder.scaletest.status. It will overwrite the previous status.
set_pod_status_annotation() { set_pod_status_annotation() {
if [[ $# -ne 1 ]]; then if [[ $# -ne 1 ]]; then
log "must specify an annotation value" log "BUG: Must specify an annotation value"
return return 1
else else
maybedryrun "${DRY_RUN}" kubectl --namespace "$(namespace)" annotate pod "$(hostname)" "com.coder.scaletest.status=$1" --overwrite maybedryrun "${DRY_RUN}" kubectl --namespace "$(namespace)" annotate pod "$(hostname)" "com.coder.scaletest.status=$1" --overwrite
fi fi
@@ -73,6 +73,10 @@ for scenario in "${SCALETEST_PARAM_LOAD_SCENARIOS[@]}"; do
maybedryrun "$DRY_RUN" sleep 10 maybedryrun "$DRY_RUN" sleep 10
status=1 status=1
;; ;;
*)
log "WARNING: Unknown load scenario: ${scenario}, skipping..."
;;
esac esac
set -e set -e
if ((status > 0)); then if ((status > 0)); then
@@ -47,7 +47,11 @@ annotate_grafana "workspace" "Agent running" # Ended in shutdown.sh.
trap 'trap - EXIT; kill -INT "${pids[@]}"; exit 1' INT EXIT trap 'trap - EXIT; kill -INT "${pids[@]}"; exit 1' INT EXIT
while :; do while :; do
sleep 285 # ~300 when accounting for profile and trace. # Sleep for short periods of time so that we can exit quickly.
# This adds up to ~300 when accounting for profile and trace.
for ((i = 0; i < 285; i++)); do
sleep 1
done
log "Grabbing pprof dumps" log "Grabbing pprof dumps"
start="$(date +%s)" start="$(date +%s)"
annotate_grafana "pprof" "Grab pprof dumps (start=${start})" annotate_grafana "pprof" "Grab pprof dumps (start=${start})"
@@ -74,13 +78,13 @@ gather_logs() {
annotate_grafana "logs" "Gather logs" annotate_grafana "logs" "Gather logs"
podsraw="$( podsraw="$(
kubectl -n coder-big get pods -l app.kubernetes.io/name=coder -o name kubectl -n coder-big get pods -l app.kubernetes.io/name=coder -o name
kubectl -n coder-big get pods -l app.kubernetes.io/name=coder-provisioner -o name kubectl -n coder-big get pods -l app.kubernetes.io/name=coder-provisioner -o name || true
kubectl -n coder-big get pods -l app.kubernetes.io/name=coder-workspace -o name | grep "^pod/scaletest-" kubectl -n coder-big get pods -l app.kubernetes.io/name=coder-workspace -o name | grep "^pod/scaletest-" || true
)" )"
mapfile -t pods <<<"${podsraw}" mapfile -t pods <<<"${podsraw}"
for pod in "${pods[@]}"; do for pod in "${pods[@]}"; do
pod_name="${pod#pod/}" pod_name="${pod#pod/}"
kubectl -n coder-big logs "${pod}" --since="${SCALETEST_RUN_START_TIME}" >"${SCALETEST_LOGS_DIR}/${pod_name}.txt" kubectl -n coder-big logs "${pod}" --since-time="${SCALETEST_RUN_START_TIME}" >"${SCALETEST_LOGS_DIR}/${pod_name}.txt"
done done
annotate_grafana_end "logs" "Gather logs" annotate_grafana_end "logs" "Gather logs"
} }
@@ -131,6 +135,9 @@ on_exit() {
set_appearance "${appearance_json}" "${message_color}" "${service_banner_message} | Scaletest ${message_status}: [${CODER_USER}/${CODER_WORKSPACE}](${CODER_URL}/@${CODER_USER}/${CODER_WORKSPACE})!" set_appearance "${appearance_json}" "${message_color}" "${service_banner_message} | Scaletest ${message_status}: [${CODER_USER}/${CODER_WORKSPACE}](${CODER_URL}/@${CODER_USER}/${CODER_WORKSPACE})!"
annotate_grafana_end "" "Start scaletest: ${SCALETEST_COMMENT}" annotate_grafana_end "" "Start scaletest: ${SCALETEST_COMMENT}"
wait "${pprof_pid}"
exit "${code}"
} }
trap on_exit EXIT trap on_exit EXIT