package api import ( "database/sql" "encoding/json" "fmt" "io" "log" "net/http" "strconv" "strings" "time" "fuego/pkg/types" "github.com/go-chi/chi/v5" ) // handleCreateJob creates a new job func (s *Server) handleCreateJob(w http.ResponseWriter, r *http.Request) { userID, err := getUserID(r) if err != nil { s.respondError(w, http.StatusUnauthorized, err.Error()) return } var req types.CreateJobRequest if err := json.NewDecoder(r.Body).Decode(&req); err != nil { s.respondError(w, http.StatusBadRequest, "Invalid request body") return } if req.Name == "" { s.respondError(w, http.StatusBadRequest, "Job name is required") return } if req.FrameStart < 0 || req.FrameEnd < req.FrameStart { s.respondError(w, http.StatusBadRequest, "Invalid frame range") return } // Validate frame range limits (prevent abuse) const maxFrameRange = 10000 if req.FrameEnd-req.FrameStart+1 > maxFrameRange { s.respondError(w, http.StatusBadRequest, fmt.Sprintf("Frame range too large. Maximum allowed: %d frames", maxFrameRange)) return } if req.OutputFormat == "" { req.OutputFormat = "PNG" } // Default allow_parallel_runners to true if not provided allowParallelRunners := true if req.AllowParallelRunners != nil { allowParallelRunners = *req.AllowParallelRunners } // Set job timeout to 24 hours (86400 seconds) jobTimeout := 86400 result, err := s.db.Exec( `INSERT INTO jobs (user_id, name, status, progress, frame_start, frame_end, output_format, allow_parallel_runners, timeout_seconds) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, userID, req.Name, types.JobStatusPending, 0.0, req.FrameStart, req.FrameEnd, req.OutputFormat, allowParallelRunners, jobTimeout, ) if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to create job: %v", err)) return } jobID, _ := result.LastInsertId() // Determine task timeout based on output format // 5 minutes (300 seconds) for frame tasks, 24 hours (86400 seconds) for FFmpeg video generation taskTimeout := 300 // Default: 5 minutes for frame rendering if req.OutputFormat == "MP4" { // For MP4, we'll create frame tasks with 5 min timeout // Video generation tasks will be created later with 24h timeout taskTimeout = 300 } // Create tasks for the job (one task per frame for simplicity, could be batched) for frame := req.FrameStart; frame <= req.FrameEnd; frame++ { _, err = s.db.Exec( `INSERT INTO tasks (job_id, frame_start, frame_end, status, timeout_seconds, max_retries) VALUES (?, ?, ?, ?, ?, ?)`, jobID, frame, frame, types.TaskStatusPending, taskTimeout, 3, ) if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to create tasks: %v", err)) return } } job := types.Job{ ID: jobID, UserID: userID, Name: req.Name, Status: types.JobStatusPending, Progress: 0.0, FrameStart: req.FrameStart, FrameEnd: req.FrameEnd, OutputFormat: req.OutputFormat, AllowParallelRunners: allowParallelRunners, TimeoutSeconds: jobTimeout, CreatedAt: time.Now(), } // Immediately try to distribute tasks to connected runners go s.distributeTasksToRunners() s.respondJSON(w, http.StatusCreated, job) } // handleListJobs lists jobs for the current user func (s *Server) handleListJobs(w http.ResponseWriter, r *http.Request) { userID, err := getUserID(r) if err != nil { s.respondError(w, http.StatusUnauthorized, err.Error()) return } rows, err := s.db.Query( `SELECT id, user_id, name, status, progress, frame_start, frame_end, output_format, allow_parallel_runners, timeout_seconds, blend_metadata, created_at, started_at, completed_at, error_message FROM jobs WHERE user_id = ? ORDER BY created_at DESC`, userID, ) if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to query jobs: %v", err)) return } defer rows.Close() jobs := []types.Job{} for rows.Next() { var job types.Job var startedAt, completedAt sql.NullTime var blendMetadataJSON sql.NullString err := rows.Scan( &job.ID, &job.UserID, &job.Name, &job.Status, &job.Progress, &job.FrameStart, &job.FrameEnd, &job.OutputFormat, &job.AllowParallelRunners, &job.TimeoutSeconds, &blendMetadataJSON, &job.CreatedAt, &startedAt, &completedAt, &job.ErrorMessage, ) if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to scan job: %v", err)) return } if startedAt.Valid { job.StartedAt = &startedAt.Time } if completedAt.Valid { job.CompletedAt = &completedAt.Time } if blendMetadataJSON.Valid && blendMetadataJSON.String != "" { var metadata types.BlendMetadata if err := json.Unmarshal([]byte(blendMetadataJSON.String), &metadata); err == nil { job.BlendMetadata = &metadata } } jobs = append(jobs, job) } s.respondJSON(w, http.StatusOK, jobs) } // handleGetJob gets a specific job func (s *Server) handleGetJob(w http.ResponseWriter, r *http.Request) { userID, err := getUserID(r) if err != nil { s.respondError(w, http.StatusUnauthorized, err.Error()) return } jobID, err := parseID(r, "id") if err != nil { s.respondError(w, http.StatusBadRequest, err.Error()) return } var job types.Job var startedAt, completedAt sql.NullTime var blendMetadataJSON sql.NullString err = s.db.QueryRow( `SELECT id, user_id, name, status, progress, frame_start, frame_end, output_format, allow_parallel_runners, timeout_seconds, blend_metadata, created_at, started_at, completed_at, error_message FROM jobs WHERE id = ? AND user_id = ?`, jobID, userID, ).Scan( &job.ID, &job.UserID, &job.Name, &job.Status, &job.Progress, &job.FrameStart, &job.FrameEnd, &job.OutputFormat, &job.AllowParallelRunners, &job.TimeoutSeconds, &blendMetadataJSON, &job.CreatedAt, &startedAt, &completedAt, &job.ErrorMessage, ) if err == sql.ErrNoRows { s.respondError(w, http.StatusNotFound, "Job not found") return } if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to query job: %v", err)) return } if startedAt.Valid { job.StartedAt = &startedAt.Time } if completedAt.Valid { job.CompletedAt = &completedAt.Time } if blendMetadataJSON.Valid && blendMetadataJSON.String != "" { var metadata types.BlendMetadata if err := json.Unmarshal([]byte(blendMetadataJSON.String), &metadata); err == nil { job.BlendMetadata = &metadata } } s.respondJSON(w, http.StatusOK, job) } // handleCancelJob cancels a job func (s *Server) handleCancelJob(w http.ResponseWriter, r *http.Request) { userID, err := getUserID(r) if err != nil { s.respondError(w, http.StatusUnauthorized, err.Error()) return } jobID, err := parseID(r, "id") if err != nil { s.respondError(w, http.StatusBadRequest, err.Error()) return } result, err := s.db.Exec( `UPDATE jobs SET status = ? WHERE id = ? AND user_id = ?`, types.JobStatusCancelled, jobID, userID, ) if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to cancel job: %v", err)) return } rowsAffected, _ := result.RowsAffected() if rowsAffected == 0 { s.respondError(w, http.StatusNotFound, "Job not found") return } // Cancel pending tasks _, err = s.db.Exec( `UPDATE tasks SET status = ? WHERE job_id = ? AND status = ?`, types.TaskStatusFailed, jobID, types.TaskStatusPending, ) if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to cancel tasks: %v", err)) return } s.respondJSON(w, http.StatusOK, map[string]string{"message": "Job cancelled"}) } // handleUploadJobFile handles file upload for a job func (s *Server) handleUploadJobFile(w http.ResponseWriter, r *http.Request) { userID, err := getUserID(r) if err != nil { s.respondError(w, http.StatusUnauthorized, err.Error()) return } jobID, err := parseID(r, "id") if err != nil { s.respondError(w, http.StatusBadRequest, err.Error()) return } // Verify job belongs to user var jobUserID int64 err = s.db.QueryRow("SELECT user_id FROM jobs WHERE id = ?", jobID).Scan(&jobUserID) if err == sql.ErrNoRows { s.respondError(w, http.StatusNotFound, "Job not found") return } if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to verify job: %v", err)) return } if jobUserID != userID { s.respondError(w, http.StatusForbidden, "Access denied") return } // Parse multipart form err = r.ParseMultipartForm(100 << 20) // 100 MB if err != nil { s.respondError(w, http.StatusBadRequest, "Failed to parse form") return } file, header, err := r.FormFile("file") if err != nil { s.respondError(w, http.StatusBadRequest, "No file provided") return } defer file.Close() // Save file filePath, err := s.storage.SaveUpload(jobID, header.Filename, file) if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to save file: %v", err)) return } // Record in database result, err := s.db.Exec( `INSERT INTO job_files (job_id, file_type, file_path, file_name, file_size) VALUES (?, ?, ?, ?, ?)`, jobID, types.JobFileTypeInput, filePath, header.Filename, header.Size, ) if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to record file: %v", err)) return } fileID, _ := result.LastInsertId() // If this is a blend file, create a metadata extraction task if strings.HasSuffix(strings.ToLower(header.Filename), ".blend") { // Create metadata extraction task metadataTaskTimeout := 300 // 5 minutes for metadata extraction taskResult, err := s.db.Exec( `INSERT INTO tasks (job_id, frame_start, frame_end, task_type, status, timeout_seconds, max_retries) VALUES (?, ?, ?, ?, ?, ?, ?)`, jobID, 0, 0, types.TaskTypeMetadata, types.TaskStatusPending, metadataTaskTimeout, 1, ) if err != nil { log.Printf("Failed to create metadata extraction task: %v", err) } else { metadataTaskID, _ := taskResult.LastInsertId() log.Printf("Created metadata extraction task %d for job %d", metadataTaskID, jobID) // Try to distribute the task immediately go s.distributeTasksToRunners() } } s.respondJSON(w, http.StatusCreated, map[string]interface{}{ "id": fileID, "file_name": header.Filename, "file_path": filePath, "file_size": header.Size, }) } // handleListJobFiles lists files for a job func (s *Server) handleListJobFiles(w http.ResponseWriter, r *http.Request) { userID, err := getUserID(r) if err != nil { s.respondError(w, http.StatusUnauthorized, err.Error()) return } jobID, err := parseID(r, "id") if err != nil { s.respondError(w, http.StatusBadRequest, err.Error()) return } // Verify job belongs to user var jobUserID int64 err = s.db.QueryRow("SELECT user_id FROM jobs WHERE id = ?", jobID).Scan(&jobUserID) if err == sql.ErrNoRows { s.respondError(w, http.StatusNotFound, "Job not found") return } if jobUserID != userID { s.respondError(w, http.StatusForbidden, "Access denied") return } rows, err := s.db.Query( `SELECT id, job_id, file_type, file_path, file_name, file_size, created_at FROM job_files WHERE job_id = ? ORDER BY created_at DESC`, jobID, ) if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to query files: %v", err)) return } defer rows.Close() files := []types.JobFile{} for rows.Next() { var file types.JobFile err := rows.Scan( &file.ID, &file.JobID, &file.FileType, &file.FilePath, &file.FileName, &file.FileSize, &file.CreatedAt, ) if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to scan file: %v", err)) return } files = append(files, file) } s.respondJSON(w, http.StatusOK, files) } // handleDownloadJobFile downloads a job file func (s *Server) handleDownloadJobFile(w http.ResponseWriter, r *http.Request) { userID, err := getUserID(r) if err != nil { s.respondError(w, http.StatusUnauthorized, err.Error()) return } jobID, err := parseID(r, "id") if err != nil { s.respondError(w, http.StatusBadRequest, err.Error()) return } fileID, err := parseID(r, "fileId") if err != nil { s.respondError(w, http.StatusBadRequest, err.Error()) return } // Verify job belongs to user var jobUserID int64 err = s.db.QueryRow("SELECT user_id FROM jobs WHERE id = ?", jobID).Scan(&jobUserID) if err == sql.ErrNoRows { s.respondError(w, http.StatusNotFound, "Job not found") return } if jobUserID != userID { s.respondError(w, http.StatusForbidden, "Access denied") return } // Get file info var filePath, fileName string err = s.db.QueryRow( `SELECT file_path, file_name FROM job_files WHERE id = ? AND job_id = ?`, fileID, jobID, ).Scan(&filePath, &fileName) if err == sql.ErrNoRows { s.respondError(w, http.StatusNotFound, "File not found") return } if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to query file: %v", err)) return } // Open file file, err := s.storage.GetFile(filePath) if err != nil { s.respondError(w, http.StatusNotFound, "File not found on disk") return } defer file.Close() // Set headers w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%s", fileName)) w.Header().Set("Content-Type", "application/octet-stream") // Stream file io.Copy(w, file) } // handleStreamVideo streams MP4 video file with range support func (s *Server) handleStreamVideo(w http.ResponseWriter, r *http.Request) { userID, err := getUserID(r) if err != nil { s.respondError(w, http.StatusUnauthorized, err.Error()) return } jobID, err := parseID(r, "id") if err != nil { s.respondError(w, http.StatusBadRequest, err.Error()) return } // Verify job belongs to user var jobUserID int64 var outputFormat string err = s.db.QueryRow("SELECT user_id, output_format FROM jobs WHERE id = ?", jobID).Scan(&jobUserID, &outputFormat) if err == sql.ErrNoRows { s.respondError(w, http.StatusNotFound, "Job not found") return } if jobUserID != userID { s.respondError(w, http.StatusForbidden, "Access denied") return } // Find MP4 file var filePath, fileName string err = s.db.QueryRow( `SELECT file_path, file_name FROM job_files WHERE job_id = ? AND file_type = ? AND file_name LIKE '%.mp4' ORDER BY created_at DESC LIMIT 1`, jobID, types.JobFileTypeOutput, ).Scan(&filePath, &fileName) if err == sql.ErrNoRows { s.respondError(w, http.StatusNotFound, "Video file not found") return } if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to query file: %v", err)) return } // Open file file, err := s.storage.GetFile(filePath) if err != nil { s.respondError(w, http.StatusNotFound, "File not found on disk") return } defer file.Close() // Get file info fileInfo, err := file.Stat() if err != nil { s.respondError(w, http.StatusInternalServerError, "Failed to get file info") return } fileSize := fileInfo.Size() // Handle range requests for video seeking rangeHeader := r.Header.Get("Range") if rangeHeader != "" { // Parse range header var start, end int64 fmt.Sscanf(rangeHeader, "bytes=%d-%d", &start, &end) if end == 0 { end = fileSize - 1 } // Seek to start position file.Seek(start, 0) // Set headers for partial content w.Header().Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", start, end, fileSize)) w.Header().Set("Accept-Ranges", "bytes") w.Header().Set("Content-Length", fmt.Sprintf("%d", end-start+1)) w.Header().Set("Content-Type", "video/mp4") w.WriteHeader(http.StatusPartialContent) // Copy partial content io.CopyN(w, file, end-start+1) } else { // Full file w.Header().Set("Content-Type", "video/mp4") w.Header().Set("Content-Length", fmt.Sprintf("%d", fileSize)) w.Header().Set("Accept-Ranges", "bytes") io.Copy(w, file) } } // handleListJobTasks lists all tasks for a job func (s *Server) handleListJobTasks(w http.ResponseWriter, r *http.Request) { userID, err := getUserID(r) if err != nil { s.respondError(w, http.StatusUnauthorized, err.Error()) return } jobID, err := parseID(r, "id") if err != nil { s.respondError(w, http.StatusBadRequest, err.Error()) return } // Verify job belongs to user var jobUserID int64 err = s.db.QueryRow("SELECT user_id FROM jobs WHERE id = ?", jobID).Scan(&jobUserID) if err == sql.ErrNoRows { s.respondError(w, http.StatusNotFound, "Job not found") return } if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to verify job: %v", err)) return } if jobUserID != userID { s.respondError(w, http.StatusForbidden, "Access denied") return } rows, err := s.db.Query( `SELECT id, job_id, runner_id, frame_start, frame_end, status, task_type, current_step, retry_count, max_retries, output_path, created_at, started_at, completed_at, error_message, timeout_seconds FROM tasks WHERE job_id = ? ORDER BY frame_start ASC`, jobID, ) if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to query tasks: %v", err)) return } defer rows.Close() tasks := []types.Task{} for rows.Next() { var task types.Task var runnerID sql.NullInt64 var startedAt, completedAt sql.NullTime var timeoutSeconds sql.NullInt64 err := rows.Scan( &task.ID, &task.JobID, &runnerID, &task.FrameStart, &task.FrameEnd, &task.Status, &task.TaskType, &task.CurrentStep, &task.RetryCount, &task.MaxRetries, &task.OutputPath, &task.CreatedAt, &startedAt, &completedAt, &task.ErrorMessage, &timeoutSeconds, ) if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to scan task: %v", err)) return } if runnerID.Valid { task.RunnerID = &runnerID.Int64 } if startedAt.Valid { task.StartedAt = &startedAt.Time } if completedAt.Valid { task.CompletedAt = &completedAt.Time } if timeoutSeconds.Valid { timeout := int(timeoutSeconds.Int64) task.TimeoutSeconds = &timeout } tasks = append(tasks, task) } s.respondJSON(w, http.StatusOK, tasks) } // handleGetTaskLogs retrieves logs for a specific task func (s *Server) handleGetTaskLogs(w http.ResponseWriter, r *http.Request) { userID, err := getUserID(r) if err != nil { s.respondError(w, http.StatusUnauthorized, err.Error()) return } jobID, err := parseID(r, "id") if err != nil { s.respondError(w, http.StatusBadRequest, err.Error()) return } taskIDStr := chi.URLParam(r, "taskId") taskID, err := strconv.ParseInt(taskIDStr, 10, 64) if err != nil { s.respondError(w, http.StatusBadRequest, "Invalid task ID") return } // Verify job belongs to user var jobUserID int64 err = s.db.QueryRow("SELECT user_id FROM jobs WHERE id = ?", jobID).Scan(&jobUserID) if err == sql.ErrNoRows { s.respondError(w, http.StatusNotFound, "Job not found") return } if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to verify job: %v", err)) return } if jobUserID != userID { s.respondError(w, http.StatusForbidden, "Access denied") return } // Verify task belongs to job var taskJobID int64 err = s.db.QueryRow("SELECT job_id FROM tasks WHERE id = ?", taskID).Scan(&taskJobID) if err == sql.ErrNoRows { s.respondError(w, http.StatusNotFound, "Task not found") return } if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to verify task: %v", err)) return } if taskJobID != jobID { s.respondError(w, http.StatusBadRequest, "Task does not belong to this job") return } // Get query parameters for filtering stepName := r.URL.Query().Get("step_name") logLevel := r.URL.Query().Get("log_level") limitStr := r.URL.Query().Get("limit") limit := 1000 // default if limitStr != "" { if l, err := strconv.Atoi(limitStr); err == nil && l > 0 { limit = l } } // Build query query := `SELECT id, task_id, runner_id, log_level, message, step_name, created_at FROM task_logs WHERE task_id = ?` args := []interface{}{taskID} if stepName != "" { query += " AND step_name = ?" args = append(args, stepName) } if logLevel != "" { query += " AND log_level = ?" args = append(args, logLevel) } query += " ORDER BY created_at ASC LIMIT ?" args = append(args, limit) rows, err := s.db.Query(query, args...) if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to query logs: %v", err)) return } defer rows.Close() logs := []types.TaskLog{} for rows.Next() { var log types.TaskLog var runnerID sql.NullInt64 err := rows.Scan( &log.ID, &log.TaskID, &runnerID, &log.LogLevel, &log.Message, &log.StepName, &log.CreatedAt, ) if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to scan log: %v", err)) return } if runnerID.Valid { log.RunnerID = &runnerID.Int64 } logs = append(logs, log) } s.respondJSON(w, http.StatusOK, logs) } // handleGetTaskSteps retrieves step timeline for a specific task func (s *Server) handleGetTaskSteps(w http.ResponseWriter, r *http.Request) { userID, err := getUserID(r) if err != nil { s.respondError(w, http.StatusUnauthorized, err.Error()) return } jobID, err := parseID(r, "id") if err != nil { s.respondError(w, http.StatusBadRequest, err.Error()) return } taskIDStr := chi.URLParam(r, "taskId") taskID, err := strconv.ParseInt(taskIDStr, 10, 64) if err != nil { s.respondError(w, http.StatusBadRequest, "Invalid task ID") return } // Verify job belongs to user var jobUserID int64 err = s.db.QueryRow("SELECT user_id FROM jobs WHERE id = ?", jobID).Scan(&jobUserID) if err == sql.ErrNoRows { s.respondError(w, http.StatusNotFound, "Job not found") return } if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to verify job: %v", err)) return } if jobUserID != userID { s.respondError(w, http.StatusForbidden, "Access denied") return } // Verify task belongs to job var taskJobID int64 err = s.db.QueryRow("SELECT job_id FROM tasks WHERE id = ?", taskID).Scan(&taskJobID) if err == sql.ErrNoRows { s.respondError(w, http.StatusNotFound, "Task not found") return } if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to verify task: %v", err)) return } if taskJobID != jobID { s.respondError(w, http.StatusBadRequest, "Task does not belong to this job") return } rows, err := s.db.Query( `SELECT id, task_id, step_name, status, started_at, completed_at, duration_ms, error_message FROM task_steps WHERE task_id = ? ORDER BY started_at ASC`, taskID, ) if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to query steps: %v", err)) return } defer rows.Close() steps := []types.TaskStep{} for rows.Next() { var step types.TaskStep var startedAt, completedAt sql.NullTime var durationMs sql.NullInt64 err := rows.Scan( &step.ID, &step.TaskID, &step.StepName, &step.Status, &startedAt, &completedAt, &durationMs, &step.ErrorMessage, ) if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to scan step: %v", err)) return } if startedAt.Valid { step.StartedAt = &startedAt.Time } if completedAt.Valid { step.CompletedAt = &completedAt.Time } if durationMs.Valid { duration := int(durationMs.Int64) step.DurationMs = &duration } steps = append(steps, step) } s.respondJSON(w, http.StatusOK, steps) } // handleRetryTask retries a failed task func (s *Server) handleRetryTask(w http.ResponseWriter, r *http.Request) { userID, err := getUserID(r) if err != nil { s.respondError(w, http.StatusUnauthorized, err.Error()) return } jobID, err := parseID(r, "id") if err != nil { s.respondError(w, http.StatusBadRequest, err.Error()) return } taskIDStr := chi.URLParam(r, "taskId") taskID, err := strconv.ParseInt(taskIDStr, 10, 64) if err != nil { s.respondError(w, http.StatusBadRequest, "Invalid task ID") return } // Verify job belongs to user var jobUserID int64 err = s.db.QueryRow("SELECT user_id FROM jobs WHERE id = ?", jobID).Scan(&jobUserID) if err == sql.ErrNoRows { s.respondError(w, http.StatusNotFound, "Job not found") return } if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to verify job: %v", err)) return } if jobUserID != userID { s.respondError(w, http.StatusForbidden, "Access denied") return } // Verify task belongs to job and is in a retryable state var taskJobID int64 var taskStatus string var retryCount, maxRetries int err = s.db.QueryRow( "SELECT job_id, status, retry_count, max_retries FROM tasks WHERE id = ?", taskID, ).Scan(&taskJobID, &taskStatus, &retryCount, &maxRetries) if err == sql.ErrNoRows { s.respondError(w, http.StatusNotFound, "Task not found") return } if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to verify task: %v", err)) return } if taskJobID != jobID { s.respondError(w, http.StatusBadRequest, "Task does not belong to this job") return } if taskStatus != string(types.TaskStatusFailed) { s.respondError(w, http.StatusBadRequest, "Task is not in failed state") return } if retryCount >= maxRetries { s.respondError(w, http.StatusBadRequest, "Maximum retries exceeded") return } // Reset task to pending _, err = s.db.Exec( `UPDATE tasks SET status = ?, runner_id = NULL, current_step = NULL, error_message = NULL, started_at = NULL, completed_at = NULL WHERE id = ?`, types.TaskStatusPending, taskID, ) if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to retry task: %v", err)) return } s.respondJSON(w, http.StatusOK, map[string]string{"message": "Task queued for retry"}) } // handleStreamTaskLogsWebSocket streams task logs via WebSocket // Note: This is called after auth middleware, so userID is already verified func (s *Server) handleStreamTaskLogsWebSocket(w http.ResponseWriter, r *http.Request) { userID, err := getUserID(r) if err != nil { http.Error(w, "Unauthorized", http.StatusUnauthorized) return } jobID, err := parseID(r, "id") if err != nil { s.respondError(w, http.StatusBadRequest, err.Error()) return } taskIDStr := chi.URLParam(r, "taskId") taskID, err := strconv.ParseInt(taskIDStr, 10, 64) if err != nil { s.respondError(w, http.StatusBadRequest, "Invalid task ID") return } // Verify job belongs to user var jobUserID int64 err = s.db.QueryRow("SELECT user_id FROM jobs WHERE id = ?", jobID).Scan(&jobUserID) if err == sql.ErrNoRows { s.respondError(w, http.StatusNotFound, "Job not found") return } if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to verify job: %v", err)) return } if jobUserID != userID { s.respondError(w, http.StatusForbidden, "Access denied") return } // Verify task belongs to job var taskJobID int64 err = s.db.QueryRow("SELECT job_id FROM tasks WHERE id = ?", taskID).Scan(&taskJobID) if err == sql.ErrNoRows { s.respondError(w, http.StatusNotFound, "Task not found") return } if err != nil { s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to verify task: %v", err)) return } if taskJobID != jobID { s.respondError(w, http.StatusBadRequest, "Task does not belong to this job") return } // Upgrade to WebSocket conn, err := s.wsUpgrader.Upgrade(w, r, nil) if err != nil { log.Printf("Failed to upgrade WebSocket: %v", err) return } defer conn.Close() key := fmt.Sprintf("%d:%d", jobID, taskID) s.frontendConnsMu.Lock() s.frontendConns[key] = conn s.frontendConnsMu.Unlock() defer func() { s.frontendConnsMu.Lock() delete(s.frontendConns, key) s.frontendConnsMu.Unlock() }() // Send initial connection message conn.WriteJSON(map[string]interface{}{ "type": "connected", "timestamp": time.Now().Unix(), }) // Get last log ID to start streaming from lastIDStr := r.URL.Query().Get("last_id") lastID := int64(0) if lastIDStr != "" { if id, err := strconv.ParseInt(lastIDStr, 10, 64); err == nil { lastID = id } } // Send existing logs rows, err := s.db.Query( `SELECT id, task_id, runner_id, log_level, message, step_name, created_at FROM task_logs WHERE task_id = ? AND id > ? ORDER BY created_at ASC LIMIT 100`, taskID, lastID, ) if err == nil { defer rows.Close() for rows.Next() { var log types.TaskLog var runnerID sql.NullInt64 err := rows.Scan( &log.ID, &log.TaskID, &runnerID, &log.LogLevel, &log.Message, &log.StepName, &log.CreatedAt, ) if err != nil { continue } if runnerID.Valid { log.RunnerID = &runnerID.Int64 } if log.ID > lastID { lastID = log.ID } conn.WriteJSON(map[string]interface{}{ "type": "log", "data": log, "timestamp": time.Now().Unix(), }) } } // Poll for new logs and send them ticker := time.NewTicker(1 * time.Second) defer ticker.Stop() ctx := r.Context() for { select { case <-ctx.Done(): return case <-ticker.C: rows, err := s.db.Query( `SELECT id, task_id, runner_id, log_level, message, step_name, created_at FROM task_logs WHERE task_id = ? AND id > ? ORDER BY created_at ASC LIMIT 100`, taskID, lastID, ) if err != nil { continue } for rows.Next() { var log types.TaskLog var runnerID sql.NullInt64 err := rows.Scan( &log.ID, &log.TaskID, &runnerID, &log.LogLevel, &log.Message, &log.StepName, &log.CreatedAt, ) if err != nil { rows.Close() continue } if runnerID.Valid { log.RunnerID = &runnerID.Int64 } if log.ID > lastID { lastID = log.ID } conn.WriteJSON(map[string]interface{}{ "type": "log", "data": log, "timestamp": time.Now().Unix(), }) } rows.Close() } } }