Files
jiggablend/internal/api/jobs.go

1100 lines
30 KiB
Go

package api
import (
"database/sql"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"strconv"
"strings"
"time"
"fuego/pkg/types"
"github.com/go-chi/chi/v5"
)
// handleCreateJob creates a new job
func (s *Server) handleCreateJob(w http.ResponseWriter, r *http.Request) {
userID, err := getUserID(r)
if err != nil {
s.respondError(w, http.StatusUnauthorized, err.Error())
return
}
var req types.CreateJobRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
s.respondError(w, http.StatusBadRequest, "Invalid request body")
return
}
if req.Name == "" {
s.respondError(w, http.StatusBadRequest, "Job name is required")
return
}
if req.FrameStart < 0 || req.FrameEnd < req.FrameStart {
s.respondError(w, http.StatusBadRequest, "Invalid frame range")
return
}
// Validate frame range limits (prevent abuse)
const maxFrameRange = 10000
if req.FrameEnd-req.FrameStart+1 > maxFrameRange {
s.respondError(w, http.StatusBadRequest, fmt.Sprintf("Frame range too large. Maximum allowed: %d frames", maxFrameRange))
return
}
if req.OutputFormat == "" {
req.OutputFormat = "PNG"
}
// Default allow_parallel_runners to true if not provided
allowParallelRunners := true
if req.AllowParallelRunners != nil {
allowParallelRunners = *req.AllowParallelRunners
}
// Set job timeout to 24 hours (86400 seconds)
jobTimeout := 86400
result, err := s.db.Exec(
`INSERT INTO jobs (user_id, name, status, progress, frame_start, frame_end, output_format, allow_parallel_runners, timeout_seconds)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
userID, req.Name, types.JobStatusPending, 0.0, req.FrameStart, req.FrameEnd, req.OutputFormat, allowParallelRunners, jobTimeout,
)
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to create job: %v", err))
return
}
jobID, _ := result.LastInsertId()
// Determine task timeout based on output format
// 5 minutes (300 seconds) for frame tasks, 24 hours (86400 seconds) for FFmpeg video generation
taskTimeout := 300 // Default: 5 minutes for frame rendering
if req.OutputFormat == "MP4" {
// For MP4, we'll create frame tasks with 5 min timeout
// Video generation tasks will be created later with 24h timeout
taskTimeout = 300
}
// Create tasks for the job (one task per frame for simplicity, could be batched)
for frame := req.FrameStart; frame <= req.FrameEnd; frame++ {
_, err = s.db.Exec(
`INSERT INTO tasks (job_id, frame_start, frame_end, status, timeout_seconds, max_retries)
VALUES (?, ?, ?, ?, ?, ?)`,
jobID, frame, frame, types.TaskStatusPending, taskTimeout, 3,
)
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to create tasks: %v", err))
return
}
}
job := types.Job{
ID: jobID,
UserID: userID,
Name: req.Name,
Status: types.JobStatusPending,
Progress: 0.0,
FrameStart: req.FrameStart,
FrameEnd: req.FrameEnd,
OutputFormat: req.OutputFormat,
AllowParallelRunners: allowParallelRunners,
TimeoutSeconds: jobTimeout,
CreatedAt: time.Now(),
}
// Immediately try to distribute tasks to connected runners
go s.distributeTasksToRunners()
s.respondJSON(w, http.StatusCreated, job)
}
// handleListJobs lists jobs for the current user
func (s *Server) handleListJobs(w http.ResponseWriter, r *http.Request) {
userID, err := getUserID(r)
if err != nil {
s.respondError(w, http.StatusUnauthorized, err.Error())
return
}
rows, err := s.db.Query(
`SELECT id, user_id, name, status, progress, frame_start, frame_end, output_format,
allow_parallel_runners, timeout_seconds, blend_metadata, created_at, started_at, completed_at, error_message
FROM jobs WHERE user_id = ? ORDER BY created_at DESC`,
userID,
)
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to query jobs: %v", err))
return
}
defer rows.Close()
jobs := []types.Job{}
for rows.Next() {
var job types.Job
var startedAt, completedAt sql.NullTime
var blendMetadataJSON sql.NullString
err := rows.Scan(
&job.ID, &job.UserID, &job.Name, &job.Status, &job.Progress,
&job.FrameStart, &job.FrameEnd, &job.OutputFormat, &job.AllowParallelRunners, &job.TimeoutSeconds,
&blendMetadataJSON, &job.CreatedAt, &startedAt, &completedAt, &job.ErrorMessage,
)
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to scan job: %v", err))
return
}
if startedAt.Valid {
job.StartedAt = &startedAt.Time
}
if completedAt.Valid {
job.CompletedAt = &completedAt.Time
}
if blendMetadataJSON.Valid && blendMetadataJSON.String != "" {
var metadata types.BlendMetadata
if err := json.Unmarshal([]byte(blendMetadataJSON.String), &metadata); err == nil {
job.BlendMetadata = &metadata
}
}
jobs = append(jobs, job)
}
s.respondJSON(w, http.StatusOK, jobs)
}
// handleGetJob gets a specific job
func (s *Server) handleGetJob(w http.ResponseWriter, r *http.Request) {
userID, err := getUserID(r)
if err != nil {
s.respondError(w, http.StatusUnauthorized, err.Error())
return
}
jobID, err := parseID(r, "id")
if err != nil {
s.respondError(w, http.StatusBadRequest, err.Error())
return
}
var job types.Job
var startedAt, completedAt sql.NullTime
var blendMetadataJSON sql.NullString
err = s.db.QueryRow(
`SELECT id, user_id, name, status, progress, frame_start, frame_end, output_format,
allow_parallel_runners, timeout_seconds, blend_metadata, created_at, started_at, completed_at, error_message
FROM jobs WHERE id = ? AND user_id = ?`,
jobID, userID,
).Scan(
&job.ID, &job.UserID, &job.Name, &job.Status, &job.Progress,
&job.FrameStart, &job.FrameEnd, &job.OutputFormat, &job.AllowParallelRunners, &job.TimeoutSeconds,
&blendMetadataJSON, &job.CreatedAt, &startedAt, &completedAt, &job.ErrorMessage,
)
if err == sql.ErrNoRows {
s.respondError(w, http.StatusNotFound, "Job not found")
return
}
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to query job: %v", err))
return
}
if startedAt.Valid {
job.StartedAt = &startedAt.Time
}
if completedAt.Valid {
job.CompletedAt = &completedAt.Time
}
if blendMetadataJSON.Valid && blendMetadataJSON.String != "" {
var metadata types.BlendMetadata
if err := json.Unmarshal([]byte(blendMetadataJSON.String), &metadata); err == nil {
job.BlendMetadata = &metadata
}
}
s.respondJSON(w, http.StatusOK, job)
}
// handleCancelJob cancels a job
func (s *Server) handleCancelJob(w http.ResponseWriter, r *http.Request) {
userID, err := getUserID(r)
if err != nil {
s.respondError(w, http.StatusUnauthorized, err.Error())
return
}
jobID, err := parseID(r, "id")
if err != nil {
s.respondError(w, http.StatusBadRequest, err.Error())
return
}
result, err := s.db.Exec(
`UPDATE jobs SET status = ? WHERE id = ? AND user_id = ?`,
types.JobStatusCancelled, jobID, userID,
)
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to cancel job: %v", err))
return
}
rowsAffected, _ := result.RowsAffected()
if rowsAffected == 0 {
s.respondError(w, http.StatusNotFound, "Job not found")
return
}
// Cancel pending tasks
_, err = s.db.Exec(
`UPDATE tasks SET status = ? WHERE job_id = ? AND status = ?`,
types.TaskStatusFailed, jobID, types.TaskStatusPending,
)
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to cancel tasks: %v", err))
return
}
s.respondJSON(w, http.StatusOK, map[string]string{"message": "Job cancelled"})
}
// handleUploadJobFile handles file upload for a job
func (s *Server) handleUploadJobFile(w http.ResponseWriter, r *http.Request) {
userID, err := getUserID(r)
if err != nil {
s.respondError(w, http.StatusUnauthorized, err.Error())
return
}
jobID, err := parseID(r, "id")
if err != nil {
s.respondError(w, http.StatusBadRequest, err.Error())
return
}
// Verify job belongs to user
var jobUserID int64
err = s.db.QueryRow("SELECT user_id FROM jobs WHERE id = ?", jobID).Scan(&jobUserID)
if err == sql.ErrNoRows {
s.respondError(w, http.StatusNotFound, "Job not found")
return
}
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to verify job: %v", err))
return
}
if jobUserID != userID {
s.respondError(w, http.StatusForbidden, "Access denied")
return
}
// Parse multipart form
err = r.ParseMultipartForm(100 << 20) // 100 MB
if err != nil {
s.respondError(w, http.StatusBadRequest, "Failed to parse form")
return
}
file, header, err := r.FormFile("file")
if err != nil {
s.respondError(w, http.StatusBadRequest, "No file provided")
return
}
defer file.Close()
// Save file
filePath, err := s.storage.SaveUpload(jobID, header.Filename, file)
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to save file: %v", err))
return
}
// Record in database
result, err := s.db.Exec(
`INSERT INTO job_files (job_id, file_type, file_path, file_name, file_size)
VALUES (?, ?, ?, ?, ?)`,
jobID, types.JobFileTypeInput, filePath, header.Filename, header.Size,
)
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to record file: %v", err))
return
}
fileID, _ := result.LastInsertId()
// If this is a blend file, create a metadata extraction task
if strings.HasSuffix(strings.ToLower(header.Filename), ".blend") {
// Create metadata extraction task
metadataTaskTimeout := 300 // 5 minutes for metadata extraction
taskResult, err := s.db.Exec(
`INSERT INTO tasks (job_id, frame_start, frame_end, task_type, status, timeout_seconds, max_retries)
VALUES (?, ?, ?, ?, ?, ?, ?)`,
jobID, 0, 0, types.TaskTypeMetadata, types.TaskStatusPending, metadataTaskTimeout, 1,
)
if err != nil {
log.Printf("Failed to create metadata extraction task: %v", err)
} else {
metadataTaskID, _ := taskResult.LastInsertId()
log.Printf("Created metadata extraction task %d for job %d", metadataTaskID, jobID)
// Try to distribute the task immediately
go s.distributeTasksToRunners()
}
}
s.respondJSON(w, http.StatusCreated, map[string]interface{}{
"id": fileID,
"file_name": header.Filename,
"file_path": filePath,
"file_size": header.Size,
})
}
// handleListJobFiles lists files for a job
func (s *Server) handleListJobFiles(w http.ResponseWriter, r *http.Request) {
userID, err := getUserID(r)
if err != nil {
s.respondError(w, http.StatusUnauthorized, err.Error())
return
}
jobID, err := parseID(r, "id")
if err != nil {
s.respondError(w, http.StatusBadRequest, err.Error())
return
}
// Verify job belongs to user
var jobUserID int64
err = s.db.QueryRow("SELECT user_id FROM jobs WHERE id = ?", jobID).Scan(&jobUserID)
if err == sql.ErrNoRows {
s.respondError(w, http.StatusNotFound, "Job not found")
return
}
if jobUserID != userID {
s.respondError(w, http.StatusForbidden, "Access denied")
return
}
rows, err := s.db.Query(
`SELECT id, job_id, file_type, file_path, file_name, file_size, created_at
FROM job_files WHERE job_id = ? ORDER BY created_at DESC`,
jobID,
)
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to query files: %v", err))
return
}
defer rows.Close()
files := []types.JobFile{}
for rows.Next() {
var file types.JobFile
err := rows.Scan(
&file.ID, &file.JobID, &file.FileType, &file.FilePath,
&file.FileName, &file.FileSize, &file.CreatedAt,
)
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to scan file: %v", err))
return
}
files = append(files, file)
}
s.respondJSON(w, http.StatusOK, files)
}
// handleDownloadJobFile downloads a job file
func (s *Server) handleDownloadJobFile(w http.ResponseWriter, r *http.Request) {
userID, err := getUserID(r)
if err != nil {
s.respondError(w, http.StatusUnauthorized, err.Error())
return
}
jobID, err := parseID(r, "id")
if err != nil {
s.respondError(w, http.StatusBadRequest, err.Error())
return
}
fileID, err := parseID(r, "fileId")
if err != nil {
s.respondError(w, http.StatusBadRequest, err.Error())
return
}
// Verify job belongs to user
var jobUserID int64
err = s.db.QueryRow("SELECT user_id FROM jobs WHERE id = ?", jobID).Scan(&jobUserID)
if err == sql.ErrNoRows {
s.respondError(w, http.StatusNotFound, "Job not found")
return
}
if jobUserID != userID {
s.respondError(w, http.StatusForbidden, "Access denied")
return
}
// Get file info
var filePath, fileName string
err = s.db.QueryRow(
`SELECT file_path, file_name FROM job_files WHERE id = ? AND job_id = ?`,
fileID, jobID,
).Scan(&filePath, &fileName)
if err == sql.ErrNoRows {
s.respondError(w, http.StatusNotFound, "File not found")
return
}
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to query file: %v", err))
return
}
// Open file
file, err := s.storage.GetFile(filePath)
if err != nil {
s.respondError(w, http.StatusNotFound, "File not found on disk")
return
}
defer file.Close()
// Set headers
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%s", fileName))
w.Header().Set("Content-Type", "application/octet-stream")
// Stream file
io.Copy(w, file)
}
// handleStreamVideo streams MP4 video file with range support
func (s *Server) handleStreamVideo(w http.ResponseWriter, r *http.Request) {
userID, err := getUserID(r)
if err != nil {
s.respondError(w, http.StatusUnauthorized, err.Error())
return
}
jobID, err := parseID(r, "id")
if err != nil {
s.respondError(w, http.StatusBadRequest, err.Error())
return
}
// Verify job belongs to user
var jobUserID int64
var outputFormat string
err = s.db.QueryRow("SELECT user_id, output_format FROM jobs WHERE id = ?", jobID).Scan(&jobUserID, &outputFormat)
if err == sql.ErrNoRows {
s.respondError(w, http.StatusNotFound, "Job not found")
return
}
if jobUserID != userID {
s.respondError(w, http.StatusForbidden, "Access denied")
return
}
// Find MP4 file
var filePath, fileName string
err = s.db.QueryRow(
`SELECT file_path, file_name FROM job_files
WHERE job_id = ? AND file_type = ? AND file_name LIKE '%.mp4'
ORDER BY created_at DESC LIMIT 1`,
jobID, types.JobFileTypeOutput,
).Scan(&filePath, &fileName)
if err == sql.ErrNoRows {
s.respondError(w, http.StatusNotFound, "Video file not found")
return
}
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to query file: %v", err))
return
}
// Open file
file, err := s.storage.GetFile(filePath)
if err != nil {
s.respondError(w, http.StatusNotFound, "File not found on disk")
return
}
defer file.Close()
// Get file info
fileInfo, err := file.Stat()
if err != nil {
s.respondError(w, http.StatusInternalServerError, "Failed to get file info")
return
}
fileSize := fileInfo.Size()
// Handle range requests for video seeking
rangeHeader := r.Header.Get("Range")
if rangeHeader != "" {
// Parse range header
var start, end int64
fmt.Sscanf(rangeHeader, "bytes=%d-%d", &start, &end)
if end == 0 {
end = fileSize - 1
}
// Seek to start position
file.Seek(start, 0)
// Set headers for partial content
w.Header().Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", start, end, fileSize))
w.Header().Set("Accept-Ranges", "bytes")
w.Header().Set("Content-Length", fmt.Sprintf("%d", end-start+1))
w.Header().Set("Content-Type", "video/mp4")
w.WriteHeader(http.StatusPartialContent)
// Copy partial content
io.CopyN(w, file, end-start+1)
} else {
// Full file
w.Header().Set("Content-Type", "video/mp4")
w.Header().Set("Content-Length", fmt.Sprintf("%d", fileSize))
w.Header().Set("Accept-Ranges", "bytes")
io.Copy(w, file)
}
}
// handleListJobTasks lists all tasks for a job
func (s *Server) handleListJobTasks(w http.ResponseWriter, r *http.Request) {
userID, err := getUserID(r)
if err != nil {
s.respondError(w, http.StatusUnauthorized, err.Error())
return
}
jobID, err := parseID(r, "id")
if err != nil {
s.respondError(w, http.StatusBadRequest, err.Error())
return
}
// Verify job belongs to user
var jobUserID int64
err = s.db.QueryRow("SELECT user_id FROM jobs WHERE id = ?", jobID).Scan(&jobUserID)
if err == sql.ErrNoRows {
s.respondError(w, http.StatusNotFound, "Job not found")
return
}
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to verify job: %v", err))
return
}
if jobUserID != userID {
s.respondError(w, http.StatusForbidden, "Access denied")
return
}
rows, err := s.db.Query(
`SELECT id, job_id, runner_id, frame_start, frame_end, status, task_type,
current_step, retry_count, max_retries, output_path, created_at, started_at,
completed_at, error_message, timeout_seconds
FROM tasks WHERE job_id = ? ORDER BY frame_start ASC`,
jobID,
)
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to query tasks: %v", err))
return
}
defer rows.Close()
tasks := []types.Task{}
for rows.Next() {
var task types.Task
var runnerID sql.NullInt64
var startedAt, completedAt sql.NullTime
var timeoutSeconds sql.NullInt64
err := rows.Scan(
&task.ID, &task.JobID, &runnerID, &task.FrameStart, &task.FrameEnd,
&task.Status, &task.TaskType, &task.CurrentStep, &task.RetryCount,
&task.MaxRetries, &task.OutputPath, &task.CreatedAt, &startedAt,
&completedAt, &task.ErrorMessage, &timeoutSeconds,
)
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to scan task: %v", err))
return
}
if runnerID.Valid {
task.RunnerID = &runnerID.Int64
}
if startedAt.Valid {
task.StartedAt = &startedAt.Time
}
if completedAt.Valid {
task.CompletedAt = &completedAt.Time
}
if timeoutSeconds.Valid {
timeout := int(timeoutSeconds.Int64)
task.TimeoutSeconds = &timeout
}
tasks = append(tasks, task)
}
s.respondJSON(w, http.StatusOK, tasks)
}
// handleGetTaskLogs retrieves logs for a specific task
func (s *Server) handleGetTaskLogs(w http.ResponseWriter, r *http.Request) {
userID, err := getUserID(r)
if err != nil {
s.respondError(w, http.StatusUnauthorized, err.Error())
return
}
jobID, err := parseID(r, "id")
if err != nil {
s.respondError(w, http.StatusBadRequest, err.Error())
return
}
taskIDStr := chi.URLParam(r, "taskId")
taskID, err := strconv.ParseInt(taskIDStr, 10, 64)
if err != nil {
s.respondError(w, http.StatusBadRequest, "Invalid task ID")
return
}
// Verify job belongs to user
var jobUserID int64
err = s.db.QueryRow("SELECT user_id FROM jobs WHERE id = ?", jobID).Scan(&jobUserID)
if err == sql.ErrNoRows {
s.respondError(w, http.StatusNotFound, "Job not found")
return
}
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to verify job: %v", err))
return
}
if jobUserID != userID {
s.respondError(w, http.StatusForbidden, "Access denied")
return
}
// Verify task belongs to job
var taskJobID int64
err = s.db.QueryRow("SELECT job_id FROM tasks WHERE id = ?", taskID).Scan(&taskJobID)
if err == sql.ErrNoRows {
s.respondError(w, http.StatusNotFound, "Task not found")
return
}
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to verify task: %v", err))
return
}
if taskJobID != jobID {
s.respondError(w, http.StatusBadRequest, "Task does not belong to this job")
return
}
// Get query parameters for filtering
stepName := r.URL.Query().Get("step_name")
logLevel := r.URL.Query().Get("log_level")
limitStr := r.URL.Query().Get("limit")
limit := 1000 // default
if limitStr != "" {
if l, err := strconv.Atoi(limitStr); err == nil && l > 0 {
limit = l
}
}
// Build query
query := `SELECT id, task_id, runner_id, log_level, message, step_name, created_at
FROM task_logs WHERE task_id = ?`
args := []interface{}{taskID}
if stepName != "" {
query += " AND step_name = ?"
args = append(args, stepName)
}
if logLevel != "" {
query += " AND log_level = ?"
args = append(args, logLevel)
}
query += " ORDER BY created_at ASC LIMIT ?"
args = append(args, limit)
rows, err := s.db.Query(query, args...)
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to query logs: %v", err))
return
}
defer rows.Close()
logs := []types.TaskLog{}
for rows.Next() {
var log types.TaskLog
var runnerID sql.NullInt64
err := rows.Scan(
&log.ID, &log.TaskID, &runnerID, &log.LogLevel, &log.Message,
&log.StepName, &log.CreatedAt,
)
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to scan log: %v", err))
return
}
if runnerID.Valid {
log.RunnerID = &runnerID.Int64
}
logs = append(logs, log)
}
s.respondJSON(w, http.StatusOK, logs)
}
// handleGetTaskSteps retrieves step timeline for a specific task
func (s *Server) handleGetTaskSteps(w http.ResponseWriter, r *http.Request) {
userID, err := getUserID(r)
if err != nil {
s.respondError(w, http.StatusUnauthorized, err.Error())
return
}
jobID, err := parseID(r, "id")
if err != nil {
s.respondError(w, http.StatusBadRequest, err.Error())
return
}
taskIDStr := chi.URLParam(r, "taskId")
taskID, err := strconv.ParseInt(taskIDStr, 10, 64)
if err != nil {
s.respondError(w, http.StatusBadRequest, "Invalid task ID")
return
}
// Verify job belongs to user
var jobUserID int64
err = s.db.QueryRow("SELECT user_id FROM jobs WHERE id = ?", jobID).Scan(&jobUserID)
if err == sql.ErrNoRows {
s.respondError(w, http.StatusNotFound, "Job not found")
return
}
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to verify job: %v", err))
return
}
if jobUserID != userID {
s.respondError(w, http.StatusForbidden, "Access denied")
return
}
// Verify task belongs to job
var taskJobID int64
err = s.db.QueryRow("SELECT job_id FROM tasks WHERE id = ?", taskID).Scan(&taskJobID)
if err == sql.ErrNoRows {
s.respondError(w, http.StatusNotFound, "Task not found")
return
}
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to verify task: %v", err))
return
}
if taskJobID != jobID {
s.respondError(w, http.StatusBadRequest, "Task does not belong to this job")
return
}
rows, err := s.db.Query(
`SELECT id, task_id, step_name, status, started_at, completed_at, duration_ms, error_message
FROM task_steps WHERE task_id = ? ORDER BY started_at ASC`,
taskID,
)
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to query steps: %v", err))
return
}
defer rows.Close()
steps := []types.TaskStep{}
for rows.Next() {
var step types.TaskStep
var startedAt, completedAt sql.NullTime
var durationMs sql.NullInt64
err := rows.Scan(
&step.ID, &step.TaskID, &step.StepName, &step.Status,
&startedAt, &completedAt, &durationMs, &step.ErrorMessage,
)
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to scan step: %v", err))
return
}
if startedAt.Valid {
step.StartedAt = &startedAt.Time
}
if completedAt.Valid {
step.CompletedAt = &completedAt.Time
}
if durationMs.Valid {
duration := int(durationMs.Int64)
step.DurationMs = &duration
}
steps = append(steps, step)
}
s.respondJSON(w, http.StatusOK, steps)
}
// handleRetryTask retries a failed task
func (s *Server) handleRetryTask(w http.ResponseWriter, r *http.Request) {
userID, err := getUserID(r)
if err != nil {
s.respondError(w, http.StatusUnauthorized, err.Error())
return
}
jobID, err := parseID(r, "id")
if err != nil {
s.respondError(w, http.StatusBadRequest, err.Error())
return
}
taskIDStr := chi.URLParam(r, "taskId")
taskID, err := strconv.ParseInt(taskIDStr, 10, 64)
if err != nil {
s.respondError(w, http.StatusBadRequest, "Invalid task ID")
return
}
// Verify job belongs to user
var jobUserID int64
err = s.db.QueryRow("SELECT user_id FROM jobs WHERE id = ?", jobID).Scan(&jobUserID)
if err == sql.ErrNoRows {
s.respondError(w, http.StatusNotFound, "Job not found")
return
}
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to verify job: %v", err))
return
}
if jobUserID != userID {
s.respondError(w, http.StatusForbidden, "Access denied")
return
}
// Verify task belongs to job and is in a retryable state
var taskJobID int64
var taskStatus string
var retryCount, maxRetries int
err = s.db.QueryRow(
"SELECT job_id, status, retry_count, max_retries FROM tasks WHERE id = ?",
taskID,
).Scan(&taskJobID, &taskStatus, &retryCount, &maxRetries)
if err == sql.ErrNoRows {
s.respondError(w, http.StatusNotFound, "Task not found")
return
}
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to verify task: %v", err))
return
}
if taskJobID != jobID {
s.respondError(w, http.StatusBadRequest, "Task does not belong to this job")
return
}
if taskStatus != string(types.TaskStatusFailed) {
s.respondError(w, http.StatusBadRequest, "Task is not in failed state")
return
}
if retryCount >= maxRetries {
s.respondError(w, http.StatusBadRequest, "Maximum retries exceeded")
return
}
// Reset task to pending
_, err = s.db.Exec(
`UPDATE tasks SET status = ?, runner_id = NULL, current_step = NULL,
error_message = NULL, started_at = NULL, completed_at = NULL
WHERE id = ?`,
types.TaskStatusPending, taskID,
)
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to retry task: %v", err))
return
}
s.respondJSON(w, http.StatusOK, map[string]string{"message": "Task queued for retry"})
}
// handleStreamTaskLogsWebSocket streams task logs via WebSocket
// Note: This is called after auth middleware, so userID is already verified
func (s *Server) handleStreamTaskLogsWebSocket(w http.ResponseWriter, r *http.Request) {
userID, err := getUserID(r)
if err != nil {
http.Error(w, "Unauthorized", http.StatusUnauthorized)
return
}
jobID, err := parseID(r, "id")
if err != nil {
s.respondError(w, http.StatusBadRequest, err.Error())
return
}
taskIDStr := chi.URLParam(r, "taskId")
taskID, err := strconv.ParseInt(taskIDStr, 10, 64)
if err != nil {
s.respondError(w, http.StatusBadRequest, "Invalid task ID")
return
}
// Verify job belongs to user
var jobUserID int64
err = s.db.QueryRow("SELECT user_id FROM jobs WHERE id = ?", jobID).Scan(&jobUserID)
if err == sql.ErrNoRows {
s.respondError(w, http.StatusNotFound, "Job not found")
return
}
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to verify job: %v", err))
return
}
if jobUserID != userID {
s.respondError(w, http.StatusForbidden, "Access denied")
return
}
// Verify task belongs to job
var taskJobID int64
err = s.db.QueryRow("SELECT job_id FROM tasks WHERE id = ?", taskID).Scan(&taskJobID)
if err == sql.ErrNoRows {
s.respondError(w, http.StatusNotFound, "Task not found")
return
}
if err != nil {
s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to verify task: %v", err))
return
}
if taskJobID != jobID {
s.respondError(w, http.StatusBadRequest, "Task does not belong to this job")
return
}
// Upgrade to WebSocket
conn, err := s.wsUpgrader.Upgrade(w, r, nil)
if err != nil {
log.Printf("Failed to upgrade WebSocket: %v", err)
return
}
defer conn.Close()
key := fmt.Sprintf("%d:%d", jobID, taskID)
s.frontendConnsMu.Lock()
s.frontendConns[key] = conn
s.frontendConnsMu.Unlock()
defer func() {
s.frontendConnsMu.Lock()
delete(s.frontendConns, key)
s.frontendConnsMu.Unlock()
}()
// Send initial connection message
conn.WriteJSON(map[string]interface{}{
"type": "connected",
"timestamp": time.Now().Unix(),
})
// Get last log ID to start streaming from
lastIDStr := r.URL.Query().Get("last_id")
lastID := int64(0)
if lastIDStr != "" {
if id, err := strconv.ParseInt(lastIDStr, 10, 64); err == nil {
lastID = id
}
}
// Send existing logs
rows, err := s.db.Query(
`SELECT id, task_id, runner_id, log_level, message, step_name, created_at
FROM task_logs WHERE task_id = ? AND id > ? ORDER BY created_at ASC LIMIT 100`,
taskID, lastID,
)
if err == nil {
defer rows.Close()
for rows.Next() {
var log types.TaskLog
var runnerID sql.NullInt64
err := rows.Scan(
&log.ID, &log.TaskID, &runnerID, &log.LogLevel, &log.Message,
&log.StepName, &log.CreatedAt,
)
if err != nil {
continue
}
if runnerID.Valid {
log.RunnerID = &runnerID.Int64
}
if log.ID > lastID {
lastID = log.ID
}
conn.WriteJSON(map[string]interface{}{
"type": "log",
"data": log,
"timestamp": time.Now().Unix(),
})
}
}
// Poll for new logs and send them
ticker := time.NewTicker(1 * time.Second)
defer ticker.Stop()
ctx := r.Context()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
rows, err := s.db.Query(
`SELECT id, task_id, runner_id, log_level, message, step_name, created_at
FROM task_logs WHERE task_id = ? AND id > ? ORDER BY created_at ASC LIMIT 100`,
taskID, lastID,
)
if err != nil {
continue
}
for rows.Next() {
var log types.TaskLog
var runnerID sql.NullInt64
err := rows.Scan(
&log.ID, &log.TaskID, &runnerID, &log.LogLevel, &log.Message,
&log.StepName, &log.CreatedAt,
)
if err != nil {
rows.Close()
continue
}
if runnerID.Valid {
log.RunnerID = &runnerID.Int64
}
if log.ID > lastID {
lastID = log.ID
}
conn.WriteJSON(map[string]interface{}{
"type": "log",
"data": log,
"timestamp": time.Now().Unix(),
})
}
rows.Close()
}
}
}