its a bit broken

2025-11-25 03:48:28 -06:00
parent a53ea4dce7
commit 690e6b13f8
16 changed files with 1542 additions and 861 deletions
--- a/internal/runner/client.go
+++ b/internal/runner/client.go
@@ -4,13 +4,16 @@ import (
 	"archive/tar"
 	"bufio"
 	"bytes"
+	"crypto/sha256"
 	_ "embed"
+	"encoding/hex"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"log"
 	"mime/multipart"
+	"net"
 	"net/http"
 	"net/url"
 	"os"
@@ -35,8 +38,7 @@ type Client struct {
 	hostname           string
 	httpClient         *http.Client
 	runnerID           int64
-	runnerSecret       string
-	managerSecret      string
+	apiKey             string // API key for authentication
 	wsConn             *websocket.Conn
 	wsConnMu           sync.RWMutex
 	wsWriteMu          sync.Mutex // Protects concurrent writes to WebSocket (WebSocket is not thread-safe)
@@ -54,11 +56,13 @@ type Client struct {
 	allocatedDevices   map[int64]string       // map[taskID]device - tracks which device is allocated to which task
 	allocatedDevicesMu sync.RWMutex           // Protects allocatedDevices
 	longRunningClient  *http.Client           // HTTP client for long-running operations (no timeout)
+	fingerprint        string                 // Unique hardware fingerprint for this runner
+	fingerprintMu      sync.RWMutex           // Protects fingerprint
 }

 // NewClient creates a new runner client
 func NewClient(managerURL, name, hostname string) *Client {
-	return &Client{
+	client := &Client{
 		managerURL:        managerURL,
 		name:              name,
 		hostname:          hostname,
@@ -67,13 +71,88 @@ func NewClient(managerURL, name, hostname string) *Client {
 		stopChan:          make(chan struct{}),
 		stepStartTimes:    make(map[string]time.Time),
 	}
+	// Generate fingerprint immediately
+	client.generateFingerprint()
+	return client
 }

-// SetSecrets sets the runner and manager secrets
-func (c *Client) SetSecrets(runnerID int64, runnerSecret, managerSecret string) {
+// generateFingerprint creates a unique hardware fingerprint for this runner
+// This fingerprint should be stable across restarts but unique per physical/virtual machine
+func (c *Client) generateFingerprint() {
+	c.fingerprintMu.Lock()
+	defer c.fingerprintMu.Unlock()
+
+	// Use a combination of stable hardware identifiers
+	var components []string
+
+	// Add hostname (stable on most systems)
+	components = append(components, c.hostname)
+
+	// Try to get machine ID from /etc/machine-id (Linux)
+	if machineID, err := os.ReadFile("/etc/machine-id"); err == nil {
+		components = append(components, strings.TrimSpace(string(machineID)))
+	}
+
+	// Try to get product UUID from /sys/class/dmi/id/product_uuid (Linux)
+	if productUUID, err := os.ReadFile("/sys/class/dmi/id/product_uuid"); err == nil {
+		components = append(components, strings.TrimSpace(string(productUUID)))
+	}
+
+	// Try to get MAC address of first network interface (cross-platform)
+	if macAddr, err := c.getMACAddress(); err == nil {
+		components = append(components, macAddr)
+	}
+
+	// If no stable identifiers found, fall back to hostname + process ID + timestamp
+	// This is less ideal but ensures uniqueness
+	if len(components) <= 1 {
+		components = append(components, fmt.Sprintf("%d", os.Getpid()))
+		components = append(components, fmt.Sprintf("%d", time.Now().Unix()))
+	}
+
+	// Create fingerprint by hashing the components
+	h := sha256.New()
+	for _, comp := range components {
+		h.Write([]byte(comp))
+		h.Write([]byte{0}) // separator
+	}
+
+	c.fingerprint = hex.EncodeToString(h.Sum(nil))
+}
+
+// getMACAddress returns the MAC address of the first non-loopback network interface
+func (c *Client) getMACAddress() (string, error) {
+	interfaces, err := net.Interfaces()
+	if err != nil {
+		return "", err
+	}
+
+	for _, iface := range interfaces {
+		// Skip loopback and down interfaces
+		if iface.Flags&net.FlagLoopback != 0 || iface.Flags&net.FlagUp == 0 {
+			continue
+		}
+		// Skip interfaces without hardware address
+		if iface.HardwareAddr == nil || len(iface.HardwareAddr) == 0 {
+			continue
+		}
+		return iface.HardwareAddr.String(), nil
+	}
+
+	return "", fmt.Errorf("no suitable network interface found")
+}
+
+// GetFingerprint returns the runner's hardware fingerprint
+func (c *Client) GetFingerprint() string {
+	c.fingerprintMu.RLock()
+	defer c.fingerprintMu.RUnlock()
+	return c.fingerprint
+}
+
+// SetAPIKey sets the runner ID and API key
+func (c *Client) SetAPIKey(runnerID int64, apiKey string) {
 	c.runnerID = runnerID
-	c.runnerSecret = runnerSecret
-	c.managerSecret = managerSecret
+	c.apiKey = apiKey

 	// Initialize runner workspace directory if not already initialized
 	if c.workspaceDir == "" {
@@ -408,10 +487,15 @@ func (c *Client) Register(registrationToken string) (int64, string, string, erro
 	}

 	req := map[string]interface{}{
-		"name":               c.name,
-		"hostname":           c.hostname,
-		"capabilities":       string(capabilitiesJSON),
-		"registration_token": registrationToken,
+		"name":         c.name,
+		"hostname":     c.hostname,
+		"capabilities": string(capabilitiesJSON),
+		"api_key":      registrationToken, // API key passed as registrationToken param for compatibility
+	}
+
+	// Only send fingerprint for non-fixed API keys to avoid uniqueness conflicts
+	if !strings.HasPrefix(registrationToken, "jk_r0_") { // Fixed test key
+		req["fingerprint"] = c.GetFingerprint()
 	}

 	body, _ := json.Marshal(req)
@@ -447,19 +531,16 @@ func (c *Client) Register(registrationToken string) (int64, string, string, erro
 	}

 	var result struct {
-		ID            int64  `json:"id"`
-		RunnerSecret  string `json:"runner_secret"`
-		ManagerSecret string `json:"manager_secret"`
+		ID int64 `json:"id"`
 	}
 	if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
 		return 0, "", "", fmt.Errorf("failed to decode response: %w", err)
 	}

 	c.runnerID = result.ID
-	c.runnerSecret = result.RunnerSecret
-	c.managerSecret = result.ManagerSecret
+	c.apiKey = registrationToken // Store the API key for future use

-	return result.ID, result.RunnerSecret, result.ManagerSecret, nil
+	return result.ID, registrationToken, "", nil // Return API key as "runner secret" for compatibility
 }

 // doSignedRequest performs an authenticated HTTP request using shared secret
@@ -476,7 +557,7 @@ func (c *Client) doSignedRequestLong(method, path string, body []byte, queryPara

 // doSignedRequestWithClient performs an authenticated HTTP request using the specified client
 func (c *Client) doSignedRequestWithClient(method, path string, body []byte, client *http.Client, queryParams ...string) (*http.Response, error) {
-	if c.runnerSecret == "" {
+	if c.apiKey == "" {
 		return nil, fmt.Errorf("runner not authenticated")
 	}

@@ -491,15 +572,18 @@ func (c *Client) doSignedRequestWithClient(method, path string, body []byte, cli
 		return nil, err
 	}

-	req.Header.Set("Content-Type", "application/json")
-	req.Header.Set("X-Runner-Secret", c.runnerSecret)
+	// Add authentication - use API key in Authorization header
+	req.Header.Set("Authorization", "Bearer "+c.apiKey)
+	if len(body) > 0 {
+		req.Header.Set("Content-Type", "application/json")
+	}

 	return client.Do(req)
 }

 // ConnectWebSocket establishes a WebSocket connection to the manager
 func (c *Client) ConnectWebSocket() error {
-	if c.runnerID == 0 || c.runnerSecret == "" {
+	if c.runnerID == 0 || c.apiKey == "" {
 		return fmt.Errorf("runner not authenticated")
 	}

@@ -509,8 +593,8 @@ func (c *Client) ConnectWebSocket() error {
 	// Convert HTTP URL to WebSocket URL
 	wsURL := strings.Replace(c.managerURL, "http://", "ws://", 1)
 	wsURL = strings.Replace(wsURL, "https://", "wss://", 1)
-	wsURL = fmt.Sprintf("%s%s?runner_id=%d&secret=%s",
-		wsURL, path, c.runnerID, url.QueryEscape(c.runnerSecret))
+	wsURL = fmt.Sprintf("%s%s?runner_id=%d&api_key=%s",
+		wsURL, path, c.runnerID, url.QueryEscape(c.apiKey))

 	// Parse URL
 	u, err := url.Parse(wsURL)
@@ -868,6 +952,44 @@ func (c *Client) KillAllProcesses() {
 	log.Printf("Killed %d process(es)", killedCount)
 }

+// CleanupWorkspace removes the runner's workspace directory and all contents
+func (c *Client) CleanupWorkspace() {
+	log.Printf("DEBUG: CleanupWorkspace method called")
+	log.Printf("CleanupWorkspace called, workspaceDir: %s", c.workspaceDir)
+	if c.workspaceDir != "" {
+		log.Printf("Cleaning up workspace directory: %s", c.workspaceDir)
+		if err := os.RemoveAll(c.workspaceDir); err != nil {
+			log.Printf("Warning: Failed to remove workspace directory %s: %v", c.workspaceDir, err)
+		} else {
+			log.Printf("Successfully removed workspace directory: %s", c.workspaceDir)
+		}
+	}
+
+	// Also clean up any orphaned jiggablend directories that might exist
+	// This ensures zero persistence even if workspaceDir wasn't set
+	cleanupOrphanedWorkspaces()
+}
+
+// cleanupOrphanedWorkspaces removes any jiggablend workspace directories
+// that might be left behind from previous runs or crashes
+func cleanupOrphanedWorkspaces() {
+	log.Printf("Cleaning up orphaned jiggablend workspace directories...")
+
+	// Clean up jiggablend-workspaces directories in current and temp directories
+	dirsToCheck := []string{".", os.TempDir()}
+	for _, baseDir := range dirsToCheck {
+		workspaceDir := filepath.Join(baseDir, "jiggablend-workspaces")
+		if _, err := os.Stat(workspaceDir); err == nil {
+			log.Printf("Removing orphaned workspace directory: %s", workspaceDir)
+			if err := os.RemoveAll(workspaceDir); err != nil {
+				log.Printf("Warning: Failed to remove workspace directory %s: %v", workspaceDir, err)
+			} else {
+				log.Printf("Successfully removed workspace directory: %s", workspaceDir)
+			}
+		}
+	}
+}
+
 // sendStepUpdate sends a step start/complete event to the manager
 func (c *Client) sendStepUpdate(taskID int64, stepName string, status types.StepStatus, errorMsg string) {
 	key := fmt.Sprintf("%d:%s", taskID, stepName)
@@ -955,7 +1077,7 @@ func (c *Client) sendStepUpdate(taskID int64, stepName string, status types.Step
 }

 // processTask processes a single task
-func (c *Client) processTask(task map[string]interface{}, jobName string, outputFormat string, inputFiles []interface{}) error {
+func (c *Client) processTask(task map[string]interface{}, jobName string, outputFormat string, inputFiles []interface{}) (err error) {
 	_ = jobName

 	taskID := int64(task["id"].(float64))
@@ -963,15 +1085,29 @@ func (c *Client) processTask(task map[string]interface{}, jobName string, output
 	frameStart := int(task["frame_start"].(float64))
 	frameEnd := int(task["frame_end"].(float64))

-	c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Starting task: job %d, frames %d-%d, format: %s", jobID, frameStart, frameEnd, outputFormat), "")
-	log.Printf("Processing task %d: job %d, frames %d-%d, format: %s (from task assignment)", taskID, jobID, frameStart, frameEnd, outputFormat)
-
 	// Create temporary job workspace within runner workspace
 	workDir := filepath.Join(c.getWorkspaceDir(), fmt.Sprintf("job-%d-task-%d", jobID, taskID))
-	if err := os.MkdirAll(workDir, 0755); err != nil {
-		return fmt.Errorf("failed to create work directory: %w", err)
+	if mkdirErr := os.MkdirAll(workDir, 0755); mkdirErr != nil {
+		return fmt.Errorf("failed to create work directory: %w", mkdirErr)
 	}
-	defer os.RemoveAll(workDir)
+
+	// Guaranteed cleanup even on panic
+	defer func() {
+		if cleanupErr := os.RemoveAll(workDir); cleanupErr != nil {
+			log.Printf("Warning: Failed to cleanup work directory %s: %v", workDir, cleanupErr)
+		}
+	}()
+
+	// Panic recovery for this task
+	defer func() {
+		if r := recover(); r != nil {
+			log.Printf("Task %d panicked: %v", taskID, r)
+			err = fmt.Errorf("task panicked: %v", r)
+		}
+	}()
+
+	c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Starting task: job %d, frames %d-%d, format: %s", jobID, frameStart, frameEnd, outputFormat), "")
+	log.Printf("Processing task %d: job %d, frames %d-%d, format: %s (from task assignment)", taskID, jobID, frameStart, frameEnd, outputFormat)

 	// Step: download
 	c.sendStepUpdate(taskID, "download", types.StepStatusRunning, "")
@@ -996,7 +1132,7 @@ func (c *Client) processTask(task map[string]interface{}, jobName string, output

 	// Find .blend file in extracted contents
 	blendFile := ""
-	err := filepath.Walk(workDir, func(path string, info os.FileInfo, err error) error {
+	err = filepath.Walk(workDir, func(path string, info os.FileInfo, err error) error {
 		if err != nil {
 			return err
 		}
@@ -1032,7 +1168,7 @@ func (c *Client) processTask(task map[string]interface{}, jobName string, output
 	}

 	if blendFile == "" {
-		err := fmt.Errorf("no .blend file found in context")
+		err := fmt.Errorf("no .blend file found in context - the uploaded context archive must contain at least one .blend file to render")
 		c.sendStepUpdate(taskID, "download", types.StepStatusFailed, err.Error())
 		return err
 	}
@@ -1062,10 +1198,6 @@ func (c *Client) processTask(task map[string]interface{}, jobName string, output
 		renderFormat = "EXR" // Use EXR for maximum quality (32-bit float, HDR)
 	}

-	// Blender uses # characters for frame number placeholders (not %04d)
-	// Use #### for 4-digit zero-padded frame numbers
-	outputPattern := filepath.Join(outputDir, fmt.Sprintf("frame_####.%s", strings.ToLower(renderFormat)))
-
 	// Step: render_blender
 	c.sendStepUpdate(taskID, "render_blender", types.StepStatusRunning, "")
 	if frameStart == frameEnd {
@@ -1074,14 +1206,8 @@ func (c *Client) processTask(task map[string]interface{}, jobName string, output
 		c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Starting Blender render for frames %d-%d...", frameStart, frameEnd), "render_blender")
 	}

-	// Execute Blender - use absolute path for output pattern
-	absOutputPattern, err := filepath.Abs(outputPattern)
-	if err != nil {
-		errMsg := fmt.Sprintf("failed to get absolute path for output: %v", err)
-		c.sendLog(taskID, types.LogLevelError, errMsg, "render_blender")
-		c.sendStepUpdate(taskID, "render_blender", types.StepStatusFailed, errMsg)
-		return errors.New(errMsg)
-	}
+	// Always render frames individually for precise control over file naming
+	// This avoids Blender's automatic frame numbering quirks

 	// Override output format and render settings from job submission
 	// For MP4, we render as EXR (handled above) for highest fidelity, so renderFormat is already EXR
@@ -1151,23 +1277,50 @@ func (c *Client) processTask(task map[string]interface{}, jobName string, output
 	if enableExecution {
 		args = append(args, "--enable-autoexec")
 	}
-	if frameStart == frameEnd {
-		// Single frame
-		args = append(args, "-o", absOutputPattern, "-f", fmt.Sprintf("%d", frameStart))
-		cmd = exec.Command("blender", args...)
-	} else {
-		// Frame range
-		args = append(args, "-o", absOutputPattern,
-			"-s", fmt.Sprintf("%d", frameStart),
-			"-e", fmt.Sprintf("%d", frameEnd),
-			"-a") // -a renders animation (all frames in range)
-		cmd = exec.Command("blender", args...)
-	}
-	cmd.Dir = workDir
+	// Always render frames individually for precise control over file naming
+	// This avoids Blender's automatic frame numbering quirks
+	for frame := frameStart; frame <= frameEnd; frame++ {
+		// Create temp output pattern for this frame
+		tempPattern := filepath.Join(outputDir, fmt.Sprintf("temp_frame.%s", strings.ToLower(renderFormat)))
+		tempAbsPattern, _ := filepath.Abs(tempPattern)

-	// Set environment variables for headless rendering
-	// This helps ensure proper OpenGL context initialization, especially for EEVEE
-	cmd.Env = os.Environ()
+		// Build args for this specific frame
+		frameArgs := []string{"-b", blendFile, "--python", scriptPath}
+		if enableExecution {
+			frameArgs = append(frameArgs, "--enable-autoexec")
+		}
+		frameArgs = append(frameArgs, "-o", tempAbsPattern, "-f", fmt.Sprintf("%d", frame))
+
+		c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Rendering frame %d...", frame), "render_blender")
+
+		frameCmd := exec.Command("blender", frameArgs...)
+		frameCmd.Dir = workDir
+		frameCmd.Env = os.Environ()
+
+		// Run this frame
+		if output, err := frameCmd.CombinedOutput(); err != nil {
+			errMsg := fmt.Sprintf("blender failed on frame %d: %v (output: %s)", frame, err, string(output))
+			c.sendLog(taskID, types.LogLevelError, errMsg, "render_blender")
+			return errors.New(errMsg)
+		}
+
+		// Immediately rename the temp file to the proper frame-numbered name
+		finalName := fmt.Sprintf("frame_%04d.%s", frame, strings.ToLower(renderFormat))
+		finalPath := filepath.Join(outputDir, finalName)
+		tempPath := filepath.Join(outputDir, fmt.Sprintf("temp_frame.%s", strings.ToLower(renderFormat)))
+
+		if err := os.Rename(tempPath, finalPath); err != nil {
+			errMsg := fmt.Sprintf("failed to rename temp file for frame %d: %v", frame, err)
+			c.sendLog(taskID, types.LogLevelError, errMsg, "render_blender")
+			return errors.New(errMsg)
+		}
+
+		c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Completed frame %d -> %s", frame, finalName), "render_blender")
+	}
+
+	// Skip the rest of the function since we handled all frames above
+	c.sendStepUpdate(taskID, "render_blender", types.StepStatusCompleted, "")
+	return nil
 	// Blender will handle headless rendering automatically
 	// We preserve the environment to allow GPU access if available

@@ -1249,6 +1402,10 @@ func (c *Client) processTask(task map[string]interface{}, jobName string, output
 		return errors.New(errMsg)
 	}

+	// For frame ranges, we rendered each frame individually with temp naming
+	// The files are already properly named during the individual frame rendering
+	// No additional renaming needed
+
 	// Find rendered output file(s)
 	// For frame ranges, we'll find all frames in the upload step
 	// For single frames, we need to find the specific output file
@@ -1454,9 +1611,30 @@ func (c *Client) processTask(task map[string]interface{}, jobName string, output
 }

 // processVideoGenerationTask processes a video generation task
-func (c *Client) processVideoGenerationTask(task map[string]interface{}, jobID int64) error {
+func (c *Client) processVideoGenerationTask(task map[string]interface{}, jobID int64) (err error) {
 	taskID := int64(task["id"].(float64))

+	// Create temporary job workspace for video generation within runner workspace
+	workDir := filepath.Join(c.getWorkspaceDir(), fmt.Sprintf("job-%d-video", jobID))
+	if mkdirErr := os.MkdirAll(workDir, 0755); mkdirErr != nil {
+		return fmt.Errorf("failed to create work directory: %w", mkdirErr)
+	}
+
+	// Guaranteed cleanup even on panic
+	defer func() {
+		if cleanupErr := os.RemoveAll(workDir); cleanupErr != nil {
+			log.Printf("Warning: Failed to cleanup work directory %s: %v", workDir, cleanupErr)
+		}
+	}()
+
+	// Panic recovery for this task
+	defer func() {
+		if r := recover(); r != nil {
+			log.Printf("Video generation task %d panicked: %v", taskID, r)
+			err = fmt.Errorf("video generation task panicked: %v", r)
+		}
+	}()
+
 	c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Starting video generation task: job %d", jobID), "")
 	log.Printf("Processing video generation task %d for job %d", taskID, jobID)

@@ -1474,6 +1652,16 @@ func (c *Client) processVideoGenerationTask(task map[string]interface{}, jobID i
 		}
 	}

+	// Debug logging for output format detection
+	c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Video generation: detected output format '%s'", outputFormat), "generate_video")
+
+	// Get frame rate from render settings
+	var frameRate float64 = 24.0 // Default fallback
+	if err == nil && jobMetadata != nil && jobMetadata.RenderSettings.FrameRate > 0 {
+		frameRate = jobMetadata.RenderSettings.FrameRate
+	}
+	c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Video generation: using frame rate %.2f fps", frameRate), "generate_video")
+
 	// Get all output files for this job
 	files, err := c.getJobFiles(jobID)
 	if err != nil {
@@ -1507,14 +1695,6 @@ func (c *Client) processVideoGenerationTask(task map[string]interface{}, jobID i
 	c.sendStepUpdate(taskID, "download_frames", types.StepStatusRunning, "")
 	c.sendLog(taskID, types.LogLevelInfo, "Downloading EXR frames...", "download_frames")

-	// Create temporary job workspace for video generation within runner workspace
-	workDir := filepath.Join(c.getWorkspaceDir(), fmt.Sprintf("job-%d-video", jobID))
-	if err := os.MkdirAll(workDir, 0755); err != nil {
-		c.sendStepUpdate(taskID, "download_frames", types.StepStatusFailed, err.Error())
-		return fmt.Errorf("failed to create work directory: %w", err)
-	}
-	defer os.RemoveAll(workDir)
-
 	// Download all EXR frames
 	var frameFiles []string
 	for _, file := range exrFiles {
@@ -1568,8 +1748,8 @@ func (c *Client) processVideoGenerationTask(task map[string]interface{}, jobID i
 	// Extract frame number pattern (e.g., frame_2470.exr -> frame_%04d.exr)
 	baseName := filepath.Base(firstFrame)
 	// Find the numeric part and replace it with %04d pattern
-	// Use regex to find digits after underscore and before extension
-	re := regexp.MustCompile(`_(\d+)\.`)
+	// Use regex to find digits (including negative) after underscore and before extension
+	re := regexp.MustCompile(`_(-?\d+)\.`)
 	var pattern string
 	var startNumber int
 	frameNumStr := re.FindStringSubmatch(baseName)
@@ -1637,31 +1817,158 @@ func (c *Client) processVideoGenerationTask(task map[string]interface{}, jobID i
 			vf = "zscale=t=linear:npl=100,format=gbrpf32le,zscale=p=bt709,tonemap=tonemap=hable:desat=0,zscale=t=bt709:m=bt709:r=tv,format=yuv420p"
 		}

-		cmd = exec.Command("ffmpeg", "-y", "-start_number", fmt.Sprintf("%d", startNumber),
-			"-framerate", "24", "-i", patternPath,
+		// Build ffmpeg command with high-quality EXR input processing
+		cmd = exec.Command("ffmpeg", "-y",
+			"-f", "image2", // Force image sequence input format
+			"-start_number", fmt.Sprintf("%d", startNumber),
+			"-framerate", fmt.Sprintf("%.2f", frameRate),
+			"-i", patternPath,
 			"-vf", vf,
-			"-c:v", codec, "-pix_fmt", pixFmt, "-r", "24", outputMP4)
+			"-c:v", codec, "-pix_fmt", pixFmt,
+			"-r", fmt.Sprintf("%.2f", frameRate),
+			"-color_primaries", "bt709", // Ensure proper color primaries
+			"-color_trc", "bt709", // Ensure proper transfer characteristics
+			"-colorspace", "bt709", // Ensure proper color space
+			outputMP4)

+		// Prepare codec-specific arguments
+		var codecArgs []string
 		if outputFormat == "EXR_AV1_MP4" {
-			// AV1 encoding options for quality
-			cmd.Args = append(cmd.Args, "-cpu-used", "4", "-crf", "30", "-b:v", "0")
+			// AV1 encoding options for maximum quality
+			codecArgs = []string{"-cpu-used", "1", "-crf", "15", "-b:v", "0", "-row-mt", "1", "-tiles", "4x4", "-lag-in-frames", "25", "-arnr-max-frames", "15", "-arnr-strength", "4"}
+		} else {
+			// H.264 encoding options for maximum quality
+			codecArgs = []string{"-preset", "veryslow", "-crf", "15", "-profile:v", "high", "-level", "5.2", "-tune", "film", "-keyint_min", "24", "-g", "240", "-bf", "2", "-refs", "4"}
 		}
-	}
-	cmd.Dir = workDir
-	output, err := cmd.CombinedOutput()
-	if err != nil {
-		outputStr := string(output)

+		// Perform 2-pass encoding for optimal quality distribution
+		c.sendLog(taskID, types.LogLevelInfo, "Starting 2-pass video encoding for optimal quality...", "generate_video")
+
+		// PASS 1: Analysis pass (collects statistics for better rate distribution)
+		c.sendLog(taskID, types.LogLevelInfo, "Pass 1/2: Analyzing video content for optimal encoding...", "generate_video")
+		pass1Args := append([]string{"-y", "-f", "image2", "-start_number", fmt.Sprintf("%d", startNumber), "-framerate", fmt.Sprintf("%.2f", frameRate), "-i", patternPath, "-vf", vf, "-c:v", codec, "-pix_fmt", pixFmt, "-r", fmt.Sprintf("%.2f", frameRate), "-color_primaries", "bt709", "-color_trc", "bt709", "-colorspace", "bt709"}, codecArgs...)
+		pass1Args = append(pass1Args, "-pass", "1", "-f", "null", "/dev/null")
+
+		pass1Cmd := exec.Command("ffmpeg", pass1Args...)
+		pass1Cmd.Dir = workDir
+		pass1Err := pass1Cmd.Run()
+		if pass1Err != nil {
+			c.sendLog(taskID, types.LogLevelWarn, fmt.Sprintf("Pass 1 completed (warnings expected): %v", pass1Err), "generate_video")
+		}
+
+		// PASS 2: Encoding pass (uses statistics from pass 1 for optimal quality)
+		c.sendLog(taskID, types.LogLevelInfo, "Pass 2/2: Encoding video with optimal quality distribution...", "generate_video")
+		cmd = exec.Command("ffmpeg", "-y", "-f", "image2", "-start_number", fmt.Sprintf("%d", startNumber), "-framerate", fmt.Sprintf("%.2f", frameRate), "-i", patternPath, "-vf", vf, "-c:v", codec, "-pix_fmt", pixFmt, "-r", fmt.Sprintf("%.2f", frameRate), "-color_primaries", "bt709", "-color_trc", "bt709", "-colorspace", "bt709")
+		cmd.Args = append(cmd.Args, codecArgs...)
+		cmd.Args = append(cmd.Args, "-pass", "2", outputMP4)
+	}
+
+	// Create stdout and stderr pipes for streaming
+	stdoutPipe, err := cmd.StdoutPipe()
+	if err != nil {
+		errMsg := fmt.Sprintf("failed to create ffmpeg stdout pipe: %v", err)
+		c.sendLog(taskID, types.LogLevelError, errMsg, "generate_video")
+		c.sendStepUpdate(taskID, "generate_video", types.StepStatusFailed, errMsg)
+		return errors.New(errMsg)
+	}
+
+	stderrPipe, err := cmd.StderrPipe()
+	if err != nil {
+		errMsg := fmt.Sprintf("failed to create ffmpeg stderr pipe: %v", err)
+		c.sendLog(taskID, types.LogLevelError, errMsg, "generate_video")
+		c.sendStepUpdate(taskID, "generate_video", types.StepStatusFailed, errMsg)
+		return errors.New(errMsg)
+	}
+
+	cmd.Dir = workDir
+
+	// Start the command
+	if err := cmd.Start(); err != nil {
+		errMsg := fmt.Sprintf("failed to start ffmpeg: %v", err)
+		c.sendLog(taskID, types.LogLevelError, errMsg, "generate_video")
+		c.sendStepUpdate(taskID, "generate_video", types.StepStatusFailed, errMsg)
+		return errors.New(errMsg)
+	}
+
+	// Register process for cleanup on shutdown
+	c.runningProcs.Store(taskID, cmd)
+	defer c.runningProcs.Delete(taskID)
+
+	// Stream stdout line by line
+	stdoutDone := make(chan bool)
+	go func() {
+		defer close(stdoutDone)
+		scanner := bufio.NewScanner(stdoutPipe)
+		for scanner.Scan() {
+			line := scanner.Text()
+			if line != "" {
+				// Filter out common ffmpeg informational messages that aren't useful
+				if !strings.Contains(line, "Input #") &&
+					!strings.Contains(line, "Duration:") &&
+					!strings.Contains(line, "Stream mapping:") &&
+					!strings.Contains(line, "Output #") &&
+					!strings.Contains(line, "encoder") &&
+					!strings.Contains(line, "fps=") &&
+					!strings.Contains(line, "size=") &&
+					!strings.Contains(line, "time=") &&
+					!strings.Contains(line, "bitrate=") &&
+					!strings.Contains(line, "speed=") {
+					c.sendLog(taskID, types.LogLevelInfo, line, "generate_video")
+				}
+			}
+		}
+	}()
+
+	// Stream stderr line by line
+	stderrDone := make(chan bool)
+	go func() {
+		defer close(stderrDone)
+		scanner := bufio.NewScanner(stderrPipe)
+		for scanner.Scan() {
+			line := scanner.Text()
+			if line != "" {
+				// Filter out common ffmpeg informational messages and show only warnings/errors
+				if strings.Contains(line, "error") ||
+					strings.Contains(line, "Error") ||
+					strings.Contains(line, "failed") ||
+					strings.Contains(line, "Failed") ||
+					strings.Contains(line, "warning") ||
+					strings.Contains(line, "Warning") {
+					c.sendLog(taskID, types.LogLevelWarn, line, "generate_video")
+				} else if !strings.Contains(line, "Input #") &&
+					!strings.Contains(line, "Duration:") &&
+					!strings.Contains(line, "Stream mapping:") &&
+					!strings.Contains(line, "Output #") &&
+					!strings.Contains(line, "encoder") &&
+					!strings.Contains(line, "fps=") &&
+					!strings.Contains(line, "size=") &&
+					!strings.Contains(line, "time=") &&
+					!strings.Contains(line, "bitrate=") &&
+					!strings.Contains(line, "speed=") {
+					c.sendLog(taskID, types.LogLevelInfo, line, "generate_video")
+				}
+			}
+		}
+	}()
+
+	// Wait for command to complete
+	err = cmd.Wait()
+
+	// Wait for streaming goroutines to finish
+	<-stdoutDone
+	<-stderrDone
+
+	if err != nil {
 		// Check for size-related errors and provide helpful messages
-		if sizeErr := c.checkFFmpegSizeError(outputStr); sizeErr != nil {
+		if sizeErr := c.checkFFmpegSizeError("ffmpeg encoding failed"); sizeErr != nil {
 			c.sendLog(taskID, types.LogLevelError, sizeErr.Error(), "generate_video")
 			c.sendStepUpdate(taskID, "generate_video", types.StepStatusFailed, sizeErr.Error())
 			return sizeErr
 		}

 		// Try alternative method with concat demuxer
-		log.Printf("First ffmpeg attempt failed, trying concat method: %s", outputStr)
-		err = c.generateMP4WithConcat(frameFiles, outputMP4, workDir, allocatedDevice, outputFormat, codec, pixFmt, useAlpha, useHardware)
+		c.sendLog(taskID, types.LogLevelWarn, "Primary ffmpeg encoding failed, trying concat method...", "generate_video")
+		err = c.generateMP4WithConcat(frameFiles, outputMP4, workDir, allocatedDevice, outputFormat, codec, pixFmt, useAlpha, useHardware, frameRate)
 		if err != nil {
 			// Check for size errors in concat method too
 			if sizeErr := c.checkFFmpegSizeError(err.Error()); sizeErr != nil {
@@ -1681,8 +1988,12 @@ func (c *Client) processVideoGenerationTask(task map[string]interface{}, jobID i
 		return err
 	}

+	// Clean up 2-pass log files
+	_ = os.Remove(filepath.Join(workDir, "ffmpeg2pass-0.log"))
+	_ = os.Remove(filepath.Join(workDir, "ffmpeg2pass-0.log.mbtree"))
+
 	c.sendStepUpdate(taskID, "generate_video", types.StepStatusCompleted, "")
-	c.sendLog(taskID, types.LogLevelInfo, "MP4 video generated successfully", "generate_video")
+	c.sendLog(taskID, types.LogLevelInfo, "MP4 video generated with 2-pass encoding successfully", "generate_video")

 	// Step: upload_video
 	c.sendStepUpdate(taskID, "upload_video", types.StepStatusRunning, "")
@@ -1779,7 +2090,7 @@ func (c *Client) buildFFmpegCommand(device string, args ...string) (*exec.Cmd, e
 	}

 	// No hardware acceleration available
-	return nil, fmt.Errorf("no hardware encoder available")
+	return nil, fmt.Errorf("no hardware encoder available for video encoding - falling back to software encoding which may be slower")
 }

 // buildFFmpegCommandAV1 builds an ffmpeg command with AV1 hardware acceleration if available
@@ -1881,7 +2192,7 @@ func (c *Client) buildFFmpegCommandAV1(device string, useAlpha bool, args ...str
 	}

 	// No AV1 hardware acceleration available
-	return nil, fmt.Errorf("no AV1 hardware encoder available")
+	return nil, fmt.Errorf("no AV1 hardware encoder available - falling back to software AV1 encoding which may be slower")
 }

 // probeAllHardwareAccelerators probes ffmpeg for all available hardware acceleration methods
@@ -2460,7 +2771,7 @@ func (c *Client) testGenericEncoder(encoder string) bool {

 // generateMP4WithConcat uses ffmpeg concat demuxer as fallback
 // device parameter is optional - if provided, it will be used for VAAPI encoding
-func (c *Client) generateMP4WithConcat(frameFiles []string, outputMP4, workDir string, device string, outputFormat string, codec string, pixFmt string, useAlpha bool, useHardware bool) error {
+func (c *Client) generateMP4WithConcat(frameFiles []string, outputMP4, workDir string, device string, outputFormat string, codec string, pixFmt string, useAlpha bool, useHardware bool, frameRate float64) error {
 	// Create file list for ffmpeg concat demuxer
 	listFile := filepath.Join(workDir, "frames.txt")
 	listFileHandle, err := os.Create(listFile)
@@ -2509,30 +2820,108 @@ func (c *Client) generateMP4WithConcat(frameFiles []string, outputMP4, workDir s
 	}

 	if !useHardware {
-		// Software encoding with HDR tonemapping
-		cmd = exec.Command("ffmpeg", "-f", "concat", "-safe", "0", "-i", listFile,
-			"-vf", vf,
-			"-c:v", codec, "-pix_fmt", pixFmt, "-r", "24", "-y", outputMP4)
-
+		// Software encoding with HDR tonemapping - 2-pass for optimal quality
+		var codecArgs []string
 		if outputFormat == "EXR_AV1_MP4" {
-			// AV1 encoding options for quality
-			cmd.Args = append(cmd.Args, "-cpu-used", "4", "-crf", "30", "-b:v", "0")
+			codecArgs = []string{"-cpu-used", "1", "-crf", "15", "-b:v", "0", "-row-mt", "1", "-tiles", "4x4", "-lag-in-frames", "25", "-arnr-max-frames", "15", "-arnr-strength", "4"}
+		} else {
+			codecArgs = []string{"-preset", "veryslow", "-crf", "15", "-profile:v", "high", "-level", "5.2", "-tune", "film", "-keyint_min", "24", "-g", "240", "-bf", "2", "-refs", "4"}
 		}
+
+		// PASS 1: Analysis pass
+		pass1Args := append([]string{"-f", "concat", "-safe", "0", "-i", listFile, "-vf", vf, "-c:v", codec, "-pix_fmt", pixFmt, "-r", fmt.Sprintf("%.2f", frameRate)}, codecArgs...)
+		pass1Args = append(pass1Args, "-pass", "1", "-f", "null", "/dev/null")
+		pass1Cmd := exec.Command("ffmpeg", pass1Args...)
+		pass1Cmd.Dir = workDir
+		_ = pass1Cmd.Run() // Ignore errors for pass 1
+
+		// PASS 2: Encoding pass
+		cmd = exec.Command("ffmpeg", "-f", "concat", "-safe", "0", "-i", listFile, "-vf", vf, "-c:v", codec, "-pix_fmt", pixFmt, "-r", fmt.Sprintf("%.2f", frameRate))
+		cmd.Args = append(cmd.Args, codecArgs...)
+		cmd.Args = append(cmd.Args, "-pass", "2", "-y", outputMP4)
 	}
-	output, err := cmd.CombinedOutput()
+
+	// Create stdout and stderr pipes for streaming
+	stdoutPipe, err := cmd.StdoutPipe()
+	if err != nil {
+		return fmt.Errorf("failed to create ffmpeg stdout pipe: %w", err)
+	}
+
+	stderrPipe, err := cmd.StderrPipe()
+	if err != nil {
+		return fmt.Errorf("failed to create ffmpeg stderr pipe: %w", err)
+	}
+
+	cmd.Dir = workDir
+
+	// Start the command
+	if err := cmd.Start(); err != nil {
+		return fmt.Errorf("failed to start ffmpeg: %w", err)
+	}
+
+	// Stream stdout line by line (minimal logging for concat method)
+	stdoutDone := make(chan bool)
+	go func() {
+		defer close(stdoutDone)
+		scanner := bufio.NewScanner(stdoutPipe)
+		for scanner.Scan() {
+			line := scanner.Text()
+			if line != "" {
+				// Only log actual errors/warnings for concat method
+				if strings.Contains(line, "error") ||
+					strings.Contains(line, "Error") ||
+					strings.Contains(line, "failed") ||
+					strings.Contains(line, "Failed") {
+					log.Printf("FFmpeg concat stdout: %s", line)
+				}
+			}
+		}
+	}()
+
+	// Stream stderr line by line
+	stderrDone := make(chan bool)
+	go func() {
+		defer close(stderrDone)
+		scanner := bufio.NewScanner(stderrPipe)
+		for scanner.Scan() {
+			line := scanner.Text()
+			if line != "" {
+				// Log warnings and errors for concat method
+				if strings.Contains(line, "error") ||
+					strings.Contains(line, "Error") ||
+					strings.Contains(line, "failed") ||
+					strings.Contains(line, "Failed") ||
+					strings.Contains(line, "warning") ||
+					strings.Contains(line, "Warning") {
+					log.Printf("FFmpeg concat stderr: %s", line)
+				}
+			}
+		}
+	}()
+
+	// Wait for command to complete
+	err = cmd.Wait()
+
+	// Wait for streaming goroutines to finish
+	<-stdoutDone
+	<-stderrDone
+
 	if err != nil {
-		outputStr := string(output)
 		// Check for size-related errors
-		if sizeErr := c.checkFFmpegSizeError(outputStr); sizeErr != nil {
+		if sizeErr := c.checkFFmpegSizeError("ffmpeg concat failed"); sizeErr != nil {
 			return sizeErr
 		}
-		return fmt.Errorf("ffmpeg concat failed: %w\nOutput: %s", err, outputStr)
+		return fmt.Errorf("ffmpeg concat failed: %w", err)
 	}

 	if _, err := os.Stat(outputMP4); os.IsNotExist(err) {
 		return fmt.Errorf("MP4 file not created: %s", outputMP4)
 	}

+	// Clean up 2-pass log files
+	_ = os.Remove(filepath.Join(workDir, "ffmpeg2pass-0.log"))
+	_ = os.Remove(filepath.Join(workDir, "ffmpeg2pass-0.log.mbtree"))
+
 	return nil
 }

@@ -2774,7 +3163,7 @@ func (c *Client) uploadFile(jobID int64, filePath string) (string, error) {
 	}

 	req.Header.Set("Content-Type", formWriter.FormDataContentType())
-	req.Header.Set("X-Runner-Secret", c.runnerSecret)
+	req.Header.Set("Authorization", "Bearer "+c.apiKey)

 	// Use long-running client for file uploads (no timeout)
 	resp, err := c.longRunningClient.Do(req)
@@ -2996,18 +3385,32 @@ func (c *Client) cleanupExpiredContextCache() {
 }

 // processMetadataTask processes a metadata extraction task
-func (c *Client) processMetadataTask(task map[string]interface{}, jobID int64, inputFiles []interface{}) error {
+func (c *Client) processMetadataTask(task map[string]interface{}, jobID int64, inputFiles []interface{}) (err error) {
 	taskID := int64(task["id"].(float64))

-	c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Starting metadata extraction task: job %d", jobID), "")
-	log.Printf("Processing metadata extraction task %d for job %d", taskID, jobID)
-
 	// Create temporary job workspace for metadata extraction within runner workspace
 	workDir := filepath.Join(c.getWorkspaceDir(), fmt.Sprintf("job-%d-metadata-%d", jobID, taskID))
-	if err := os.MkdirAll(workDir, 0755); err != nil {
-		return fmt.Errorf("failed to create work directory: %w", err)
+	if mkdirErr := os.MkdirAll(workDir, 0755); mkdirErr != nil {
+		return fmt.Errorf("failed to create work directory: %w", mkdirErr)
 	}
-	defer os.RemoveAll(workDir)
+
+	// Guaranteed cleanup even on panic
+	defer func() {
+		if cleanupErr := os.RemoveAll(workDir); cleanupErr != nil {
+			log.Printf("Warning: Failed to cleanup work directory %s: %v", workDir, cleanupErr)
+		}
+	}()
+
+	// Panic recovery for this task
+	defer func() {
+		if r := recover(); r != nil {
+			log.Printf("Metadata extraction task %d panicked: %v", taskID, r)
+			err = fmt.Errorf("metadata extraction task panicked: %v", r)
+		}
+	}()
+
+	c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Starting metadata extraction task: job %d", jobID), "")
+	log.Printf("Processing metadata extraction task %d for job %d", taskID, jobID)

 	// Step: download
 	c.sendStepUpdate(taskID, "download", types.StepStatusRunning, "")
@@ -3029,7 +3432,7 @@ func (c *Client) processMetadataTask(task map[string]interface{}, jobID int64, i

 	// Find .blend file in extracted contents
 	blendFile := ""
-	err := filepath.Walk(workDir, func(path string, info os.FileInfo, err error) error {
+	err = filepath.Walk(workDir, func(path string, info os.FileInfo, err error) error {
 		if err != nil {
 			return err
 		}
@@ -3065,7 +3468,7 @@ func (c *Client) processMetadataTask(task map[string]interface{}, jobID int64, i
 	}

 	if blendFile == "" {
-		err := fmt.Errorf("no .blend file found in context")
+		err := fmt.Errorf("no .blend file found in context - the uploaded context archive must contain at least one .blend file to render")
 		c.sendStepUpdate(taskID, "download", types.StepStatusFailed, err.Error())
 		return err
 	}
@@ -3406,7 +3809,7 @@ func (c *Client) submitMetadata(jobID int64, metadata types.BlendMetadata) error
 	}

 	req.Header.Set("Content-Type", "application/json")
-	req.Header.Set("X-Runner-Secret", c.runnerSecret)
+	req.Header.Set("Authorization", "Bearer "+c.apiKey)

 	resp, err := c.httpClient.Do(req)
 	if err != nil {