Enhance logging and context handling in job management. Introduce a logger initialization with configurable parameters in the manager and runner commands. Update job context handling to use tar files instead of tar.gz, and implement ETag generation for improved caching. Refactor API endpoints to support new context file structure and enhance error handling in job submissions. Add support for unhide objects and auto-execution options in job creation requests.

2025-11-24 21:48:05 -06:00
parent a029714e08
commit 4ac05d50a1
23 changed files with 4133 additions and 1311 deletions
--- a/internal/api/runners.go
+++ b/internal/api/runners.go
@@ -9,6 +9,7 @@ import (
 	"log"
 	"math/rand"
 	"net/http"
+	"net/url"
 	"path/filepath"
 	"sort"
 	"strconv"
@@ -17,6 +18,7 @@ import (

 	"jiggablend/pkg/types"

+	"github.com/go-chi/chi/v5"
 	"github.com/gorilla/websocket"
 )

@@ -287,13 +289,27 @@ func (s *Server) handleUpdateTaskStep(w http.ResponseWriter, r *http.Request) {
 		}
 	}

+	// Get job ID for broadcasting
+	var jobID int64
+	err = s.db.QueryRow("SELECT job_id FROM tasks WHERE id = ?", taskID).Scan(&jobID)
+	if err == nil {
+		// Broadcast step update to frontend
+		s.broadcastTaskUpdate(jobID, taskID, "step_update", map[string]interface{}{
+			"step_id":      stepID,
+			"step_name":    req.StepName,
+			"status":       req.Status,
+			"duration_ms":  req.DurationMs,
+			"error_message": req.ErrorMessage,
+		})
+	}
+
 	s.respondJSON(w, http.StatusOK, map[string]interface{}{
 		"step_id": stepID,
 		"message": "Step updated successfully",
 	})
 }

-// handleDownloadJobContext allows runners to download the job context tar.gz
+// handleDownloadJobContext allows runners to download the job context tar
 func (s *Server) handleDownloadJobContext(w http.ResponseWriter, r *http.Request) {
 	jobID, err := parseID(r, "jobId")
 	if err != nil {
@@ -302,7 +318,7 @@ func (s *Server) handleDownloadJobContext(w http.ResponseWriter, r *http.Request
 	}

 	// Construct the context file path
-	contextPath := filepath.Join(s.storage.JobPath(jobID), "context.tar.gz")
+	contextPath := filepath.Join(s.storage.JobPath(jobID), "context.tar")

 	// Check if context file exists
 	if !s.storage.FileExists(contextPath) {
@@ -319,9 +335,9 @@ func (s *Server) handleDownloadJobContext(w http.ResponseWriter, r *http.Request
 	}
 	defer file.Close()

-	// Set appropriate headers for tar.gz file
-	w.Header().Set("Content-Type", "application/gzip")
-	w.Header().Set("Content-Disposition", "attachment; filename=context.tar.gz")
+	// Set appropriate headers for tar file
+	w.Header().Set("Content-Type", "application/x-tar")
+	w.Header().Set("Content-Disposition", "attachment; filename=context.tar")
 	
 	// Stream the file to the response
 	io.Copy(w, file)
@@ -356,16 +372,26 @@ func (s *Server) handleUploadFileFromRunner(w http.ResponseWriter, r *http.Reque
 	}

 	// Record in database
-	_, err = s.db.Exec(
+	var fileID int64
+	err = s.db.QueryRow(
 		`INSERT INTO job_files (job_id, file_type, file_path, file_name, file_size) 
-		 VALUES (?, ?, ?, ?, ?)`,
+		 VALUES (?, ?, ?, ?, ?)
+		 RETURNING id`,
 		jobID, types.JobFileTypeOutput, filePath, header.Filename, header.Size,
-	)
+	).Scan(&fileID)
 	if err != nil {
 		s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to record file: %v", err))
 		return
 	}

+	// Broadcast file addition
+	s.broadcastJobUpdate(jobID, "file_added", map[string]interface{}{
+		"file_id":   fileID,
+		"file_type": types.JobFileTypeOutput,
+		"file_name": header.Filename,
+		"file_size": header.Size,
+	})
+
 	s.respondJSON(w, http.StatusCreated, map[string]interface{}{
 		"file_path": filePath,
 		"file_name": header.Filename,
@@ -510,6 +536,79 @@ func (s *Server) handleGetJobMetadataForRunner(w http.ResponseWriter, r *http.Re
 	s.respondJSON(w, http.StatusOK, metadata)
 }

+// handleDownloadFileForRunner allows runners to download a file by fileName
+func (s *Server) handleDownloadFileForRunner(w http.ResponseWriter, r *http.Request) {
+	jobID, err := parseID(r, "jobId")
+	if err != nil {
+		s.respondError(w, http.StatusBadRequest, err.Error())
+		return
+	}
+
+	// Get fileName from URL path (may need URL decoding)
+	fileName := chi.URLParam(r, "fileName")
+	if fileName == "" {
+		s.respondError(w, http.StatusBadRequest, "fileName is required")
+		return
+	}
+
+	// URL decode the fileName in case it contains encoded characters
+	decodedFileName, err := url.QueryUnescape(fileName)
+	if err != nil {
+		// If decoding fails, use original fileName
+		decodedFileName = fileName
+	}
+
+	// Get file info from database
+	var filePath string
+	err = s.db.QueryRow(
+		`SELECT file_path FROM job_files WHERE job_id = ? AND file_name = ?`,
+		jobID, decodedFileName,
+	).Scan(&filePath)
+	if err == sql.ErrNoRows {
+		s.respondError(w, http.StatusNotFound, "File not found")
+		return
+	}
+	if err != nil {
+		s.respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to query file: %v", err))
+		return
+	}
+
+	// Open file
+	file, err := s.storage.GetFile(filePath)
+	if err != nil {
+		s.respondError(w, http.StatusNotFound, "File not found on disk")
+		return
+	}
+	defer file.Close()
+
+	// Determine content type based on file extension
+	contentType := "application/octet-stream"
+	fileNameLower := strings.ToLower(decodedFileName)
+	switch {
+	case strings.HasSuffix(fileNameLower, ".png"):
+		contentType = "image/png"
+	case strings.HasSuffix(fileNameLower, ".jpg") || strings.HasSuffix(fileNameLower, ".jpeg"):
+		contentType = "image/jpeg"
+	case strings.HasSuffix(fileNameLower, ".gif"):
+		contentType = "image/gif"
+	case strings.HasSuffix(fileNameLower, ".webp"):
+		contentType = "image/webp"
+	case strings.HasSuffix(fileNameLower, ".exr") || strings.HasSuffix(fileNameLower, ".EXR"):
+		contentType = "image/x-exr"
+	case strings.HasSuffix(fileNameLower, ".mp4"):
+		contentType = "video/mp4"
+	case strings.HasSuffix(fileNameLower, ".webm"):
+		contentType = "video/webm"
+	}
+
+	// Set headers
+	w.Header().Set("Content-Type", contentType)
+	w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%s", decodedFileName))
+
+	// Stream file
+	io.Copy(w, file)
+}
+
 // WebSocket message types
 type WSMessage struct {
 	Type      string          `json:"type"`
@@ -785,6 +884,13 @@ func (s *Server) handleWebSocketTaskComplete(runnerID int64, taskUpdate WSTaskUp
 		taskUpdate.TaskID,
 	).Scan(&jobID)
 	if err == nil {
+		// Broadcast task update
+		s.broadcastTaskUpdate(jobID, taskUpdate.TaskID, "task_update", map[string]interface{}{
+			"status":       status,
+			"output_path":  taskUpdate.OutputPath,
+			"completed_at": now,
+			"error":        taskUpdate.Error,
+		})
 		s.updateJobStatusFromTasks(jobID)
 	}
 }
@@ -840,6 +946,7 @@ func (s *Server) getCurrentFrameFromLogs(jobID int64) (int, bool) {
 	for rows.Next() {
 		var taskID int64
 		if err := rows.Scan(&taskID); err != nil {
+			log.Printf("Failed to scan task ID in getCurrentFrameFromLogs: %v", err)
 			continue
 		}

@@ -895,6 +1002,14 @@ func (s *Server) updateJobStatusFromTasks(jobID int64) {
 		allowParallelRunners.Valid && !allowParallelRunners.Bool &&
 		frameStart.Valid && frameEnd.Valid

+	// Get current job status to detect changes
+	var currentStatus string
+	err = s.db.QueryRow(`SELECT status FROM jobs WHERE id = ?`, jobID).Scan(&currentStatus)
+	if err != nil {
+		log.Printf("Failed to get current job status for job %d: %v", jobID, err)
+		return
+	}
+
 	// Count total tasks and completed tasks
 	var totalTasks, completedTasks int
 	err = s.db.QueryRow(
@@ -914,8 +1029,6 @@ func (s *Server) updateJobStatusFromTasks(jobID int64) {
 		return
 	}

-	log.Printf("updateJobStatusFromTasks: job %d - total: %d, completed: %d", jobID, totalTasks, completedTasks)
-
 	// Calculate progress
 	var progress float64
 	if totalTasks == 0 {
@@ -985,9 +1098,6 @@ func (s *Server) updateJobStatusFromTasks(jobID int64) {
 		} else {
 			progress = renderProgress
 		}
-
-		log.Printf("updateJobStatusFromTasks: job %d - frame-based progress: current_frame=%d, render_progress=%.1f%%, non_render_progress=%.1f%%, total_progress=%.1f%%",
-			jobID, currentFrame, renderProgress, nonRenderProgress, progress)
 	} else {
 		// Standard task-based progress
 		progress = float64(completedTasks) / float64(totalTasks) * 100.0
@@ -1013,8 +1123,6 @@ func (s *Server) updateJobStatusFromTasks(jobID int64) {
 		return
 	}

-	log.Printf("updateJobStatusFromTasks: job %d - pending/running: %d", jobID, pendingOrRunningTasks)
-
 	if pendingOrRunningTasks == 0 && totalTasks > 0 {
 		// All tasks are either completed or failed/cancelled
 		// Check if any tasks failed
@@ -1039,7 +1147,16 @@ func (s *Server) updateJobStatusFromTasks(jobID int64) {
 		if err != nil {
 			log.Printf("Failed to update job %d status to %s: %v", jobID, jobStatus, err)
 		} else {
-			log.Printf("Updated job %d status to %s (progress: %.1f%%, completed tasks: %d/%d)", jobID, jobStatus, progress, completedTasks, totalTasks)
+			// Only log if status actually changed
+			if currentStatus != jobStatus {
+				log.Printf("Updated job %d status from %s to %s (progress: %.1f%%, completed tasks: %d/%d)", jobID, currentStatus, jobStatus, progress, completedTasks, totalTasks)
+			}
+			// Broadcast job update via WebSocket
+			s.broadcastJobUpdate(jobID, "job_update", map[string]interface{}{
+				"status":      jobStatus,
+				"progress":    progress,
+				"completed_at": now,
+			})
 		}

 		if outputFormatStr == "EXR_264_MP4" || outputFormatStr == "EXR_AV1_MP4" {
@@ -1054,14 +1171,22 @@ func (s *Server) updateJobStatusFromTasks(jobID int64) {
 				// Create a video generation task instead of calling generateMP4Video directly
 				// This prevents race conditions when multiple runners complete frames simultaneously
 				videoTaskTimeout := 86400 // 24 hours for video generation
-				_, err := s.db.Exec(
+				var videoTaskID int64
+				err := s.db.QueryRow(
 					`INSERT INTO tasks (job_id, frame_start, frame_end, task_type, status, timeout_seconds, max_retries) 
-					 VALUES (?, ?, ?, ?, ?, ?, ?)`,
+					 VALUES (?, ?, ?, ?, ?, ?, ?)
+					 RETURNING id`,
 					jobID, 0, 0, types.TaskTypeVideoGeneration, types.TaskStatusPending, videoTaskTimeout, 1,
-				)
+				).Scan(&videoTaskID)
 				if err != nil {
 					log.Printf("Failed to create video generation task for job %d: %v", jobID, err)
 				} else {
+					// Broadcast that a new task was added
+					log.Printf("Broadcasting task_added for job %d: video generation task %d", jobID, videoTaskID)
+					s.broadcastTaskUpdate(jobID, videoTaskID, "task_added", map[string]interface{}{
+						"task_id": videoTaskID,
+						"task_type": types.TaskTypeVideoGeneration,
+					})
 					// Update job status to ensure it's marked as running (has pending video task)
 					s.updateJobStatusFromTasks(jobID)
 					// Try to distribute the task immediately
@@ -1099,7 +1224,10 @@ func (s *Server) updateJobStatusFromTasks(jobID int64) {
 		if err != nil {
 			log.Printf("Failed to update job %d status to %s: %v", jobID, jobStatus, err)
 		} else {
-			log.Printf("Updated job %d status to %s (progress: %.1f%%, completed: %d/%d, pending: %d, running: %d)", jobID, jobStatus, progress, completedTasks, totalTasks, pendingOrRunningTasks-runningTasks, runningTasks)
+			// Only log if status actually changed
+			if currentStatus != jobStatus {
+				log.Printf("Updated job %d status from %s to %s (progress: %.1f%%, completed: %d/%d, pending: %d, running: %d)", jobID, currentStatus, jobStatus, progress, completedTasks, totalTasks, pendingOrRunningTasks-runningTasks, runningTasks)
+			}
 		}
 	}
 }
@@ -1224,7 +1352,6 @@ func (s *Server) distributeTasksToRunners() {
 			t.AllowParallelRunners = true
 		}
 		pendingTasks = append(pendingTasks, t)
-		log.Printf("Found pending task %d (type: %s, job: %d '%s', status: %s)", t.TaskID, t.TaskType, t.JobID, t.JobName, t.JobStatus)
 	}

 	if len(pendingTasks) == 0 {
@@ -1308,11 +1435,6 @@ func (s *Server) distributeTasksToRunners() {
 	}
 	log.Printf("Distributing %d pending tasks (%v) to %d connected runners: %v", len(pendingTasks), taskTypes, len(connectedRunners), connectedRunners)

-	// Log each pending task for debugging
-	for _, task := range pendingTasks {
-		log.Printf("  - Task %d (type: %s, job: %d '%s', status: %s)", task.TaskID, task.TaskType, task.JobID, task.JobName, task.JobStatus)
-	}
-
 	// Distribute tasks to runners
 	// Sort tasks to prioritize metadata tasks
 	sort.Slice(pendingTasks, func(i, j int) bool {
@@ -1572,6 +1694,13 @@ func (s *Server) distributeTasksToRunners() {
 			continue
 		}

+		// Broadcast task assignment
+		s.broadcastTaskUpdate(task.JobID, task.TaskID, "task_update", map[string]interface{}{
+			"status":     types.TaskStatusRunning,
+			"runner_id":  selectedRunnerID,
+			"started_at": now,
+		})
+
 		// Task was successfully assigned, send via WebSocket
 		log.Printf("Assigned task %d (type: %s, job: %d) to runner %d", task.TaskID, task.TaskType, task.JobID, selectedRunnerID)

@@ -1642,6 +1771,8 @@ func (s *Server) assignTaskToRunner(runnerID int64, taskID int64) error {
 			var filePath string
 			if err := rows.Scan(&filePath); err == nil {
 				task.InputFiles = append(task.InputFiles, filePath)
+			} else {
+				log.Printf("Failed to scan input file path for task %d: %v", taskID, err)
 			}
 		}
 	} else {