From bb57ce86598777d20a6717d07b0f729b9d71b72e Mon Sep 17 00:00:00 2001 From: Justin Harms Date: Sat, 3 Jan 2026 09:01:08 -0600 Subject: [PATCH] Update task status handling to reset runner_id on job cancellation and failure - Modified SQL queries in multiple functions to set runner_id to NULL when updating task statuses for cancelled jobs and failed tasks. - Ensured that tasks are properly marked as failed with the correct error messages and updated completion timestamps. - Improved handling of task statuses to prevent potential issues with task assignment and execution. --- internal/manager/jobs.go | 2 +- internal/manager/runners.go | 15 +++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/internal/manager/jobs.go b/internal/manager/jobs.go index 8ab6157..d431475 100644 --- a/internal/manager/jobs.go +++ b/internal/manager/jobs.go @@ -944,7 +944,7 @@ func (s *Manager) handleCancelJob(w http.ResponseWriter, r *http.Request) { // Cancel all pending tasks _, err = conn.Exec( - `UPDATE tasks SET status = ? WHERE job_id = ? AND status = ?`, + `UPDATE tasks SET status = ?, runner_id = NULL WHERE job_id = ? AND status = ?`, types.TaskStatusFailed, jobID, types.TaskStatusPending, ) return err diff --git a/internal/manager/runners.go b/internal/manager/runners.go index 07e75d9..0a55a9f 100644 --- a/internal/manager/runners.go +++ b/internal/manager/runners.go @@ -390,7 +390,7 @@ func (s *Manager) handleNextJob(w http.ResponseWriter, r *http.Request) { t.condition FROM tasks t JOIN jobs j ON t.job_id = j.id - WHERE t.status = ? AND j.status != ? + WHERE t.status = ? AND t.runner_id IS NULL AND j.status != ? ORDER BY t.created_at ASC LIMIT 50`, types.TaskStatusPending, types.JobStatusCancelled, @@ -1363,7 +1363,7 @@ func (s *Manager) handleRunnerJobWebSocket(w http.ResponseWriter, r *http.Reques log.Printf("Job WebSocket disconnected unexpectedly for task %d, marking as failed", taskID) s.db.With(func(conn *sql.DB) error { _, err := conn.Exec( - `UPDATE tasks SET status = ?, error_message = ?, completed_at = ? WHERE id = ?`, + `UPDATE tasks SET status = ?, runner_id = NULL, error_message = ?, completed_at = ? WHERE id = ?`, types.TaskStatusFailed, "WebSocket connection lost", time.Now(), taskID, ) return err @@ -1678,11 +1678,10 @@ func (s *Manager) handleWebSocketTaskComplete(runnerID int64, taskUpdate WSTaskU } else { // No retries remaining - mark as failed err = s.db.WithTx(func(tx *sql.Tx) error { - _, err := tx.Exec(`UPDATE tasks SET status = ? WHERE id = ?`, types.TaskStatusFailed, taskUpdate.TaskID) - if err != nil { - return err - } - _, err = tx.Exec(`UPDATE tasks SET completed_at = ? WHERE id = ?`, now, taskUpdate.TaskID) + _, err := tx.Exec( + `UPDATE tasks SET status = ?, runner_id = NULL, completed_at = ? WHERE id = ?`, + types.TaskStatusFailed, now, taskUpdate.TaskID, + ) if err != nil { return err } @@ -1849,7 +1848,7 @@ func (s *Manager) cancelActiveTasksForJob(jobID int64) error { // Tasks don't have a cancelled status - mark them as failed instead err := s.db.With(func(conn *sql.DB) error { _, err := conn.Exec( - `UPDATE tasks SET status = ?, error_message = ? WHERE job_id = ? AND status IN (?, ?)`, + `UPDATE tasks SET status = ?, runner_id = NULL, error_message = ? WHERE job_id = ? AND status IN (?, ?)`, types.TaskStatusFailed, "Job cancelled", jobID, types.TaskStatusPending, types.TaskStatusRunning, ) if err != nil {