Refactor job status handling to prevent race conditions

- Removed redundant error handling in handleListJobTasks. - Introduced per-job mutexes in Manager to serialize updateJobStatusFromTasks calls, ensuring thread safety during concurrent task completions. - Added methods to manage job status update mutexes, including creation and cleanup after job completion or failure. - Improved error handling in handleGetJobStatusForRunner by consolidating error checks.
2026-01-02 18:22:55 -06:00
parent 8e561922c9
commit b51b96a618
3 changed files with 43 additions and 12 deletions
--- a/internal/manager/manager.go
+++ b/internal/manager/manager.go
@@ -89,6 +89,9 @@ type Manager struct {
 	// Throttling for task status updates (per task)
 	taskUpdateTimes   map[int64]time.Time // key: taskID
 	taskUpdateTimesMu sync.RWMutex
+	// Per-job mutexes to serialize updateJobStatusFromTasks calls and prevent race conditions
+	jobStatusUpdateMu   map[int64]*sync.Mutex // key: jobID
+	jobStatusUpdateMuMu sync.RWMutex

 	// Client WebSocket connections (new unified WebSocket)
 	// Key is "userID:connID" to support multiple tabs per user
@@ -162,6 +165,8 @@ func NewManager(db *database.DB, cfg *config.Config, auth *authpkg.Auth, storage
 		runnerJobConns:          make(map[string]*websocket.Conn),
 		runnerJobConnsWriteMu:   make(map[string]*sync.Mutex),
 		runnerJobConnsWriteMuMu: sync.RWMutex{}, // Initialize the new field
+		// Per-job mutexes for serializing status updates
+		jobStatusUpdateMu: make(map[int64]*sync.Mutex),
 	}

 	// Check for required external tools