its a bit broken

This commit is contained in:
2025-11-25 03:48:28 -06:00
parent a53ea4dce7
commit 690e6b13f8
16 changed files with 1542 additions and 861 deletions

View File

@@ -4,13 +4,16 @@ import (
"archive/tar"
"bufio"
"bytes"
"crypto/sha256"
_ "embed"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
"log"
"mime/multipart"
"net"
"net/http"
"net/url"
"os"
@@ -35,8 +38,7 @@ type Client struct {
hostname string
httpClient *http.Client
runnerID int64
runnerSecret string
managerSecret string
apiKey string // API key for authentication
wsConn *websocket.Conn
wsConnMu sync.RWMutex
wsWriteMu sync.Mutex // Protects concurrent writes to WebSocket (WebSocket is not thread-safe)
@@ -54,11 +56,13 @@ type Client struct {
allocatedDevices map[int64]string // map[taskID]device - tracks which device is allocated to which task
allocatedDevicesMu sync.RWMutex // Protects allocatedDevices
longRunningClient *http.Client // HTTP client for long-running operations (no timeout)
fingerprint string // Unique hardware fingerprint for this runner
fingerprintMu sync.RWMutex // Protects fingerprint
}
// NewClient creates a new runner client
func NewClient(managerURL, name, hostname string) *Client {
return &Client{
client := &Client{
managerURL: managerURL,
name: name,
hostname: hostname,
@@ -67,13 +71,88 @@ func NewClient(managerURL, name, hostname string) *Client {
stopChan: make(chan struct{}),
stepStartTimes: make(map[string]time.Time),
}
// Generate fingerprint immediately
client.generateFingerprint()
return client
}
// SetSecrets sets the runner and manager secrets
func (c *Client) SetSecrets(runnerID int64, runnerSecret, managerSecret string) {
// generateFingerprint creates a unique hardware fingerprint for this runner
// This fingerprint should be stable across restarts but unique per physical/virtual machine
func (c *Client) generateFingerprint() {
c.fingerprintMu.Lock()
defer c.fingerprintMu.Unlock()
// Use a combination of stable hardware identifiers
var components []string
// Add hostname (stable on most systems)
components = append(components, c.hostname)
// Try to get machine ID from /etc/machine-id (Linux)
if machineID, err := os.ReadFile("/etc/machine-id"); err == nil {
components = append(components, strings.TrimSpace(string(machineID)))
}
// Try to get product UUID from /sys/class/dmi/id/product_uuid (Linux)
if productUUID, err := os.ReadFile("/sys/class/dmi/id/product_uuid"); err == nil {
components = append(components, strings.TrimSpace(string(productUUID)))
}
// Try to get MAC address of first network interface (cross-platform)
if macAddr, err := c.getMACAddress(); err == nil {
components = append(components, macAddr)
}
// If no stable identifiers found, fall back to hostname + process ID + timestamp
// This is less ideal but ensures uniqueness
if len(components) <= 1 {
components = append(components, fmt.Sprintf("%d", os.Getpid()))
components = append(components, fmt.Sprintf("%d", time.Now().Unix()))
}
// Create fingerprint by hashing the components
h := sha256.New()
for _, comp := range components {
h.Write([]byte(comp))
h.Write([]byte{0}) // separator
}
c.fingerprint = hex.EncodeToString(h.Sum(nil))
}
// getMACAddress returns the MAC address of the first non-loopback network interface
func (c *Client) getMACAddress() (string, error) {
interfaces, err := net.Interfaces()
if err != nil {
return "", err
}
for _, iface := range interfaces {
// Skip loopback and down interfaces
if iface.Flags&net.FlagLoopback != 0 || iface.Flags&net.FlagUp == 0 {
continue
}
// Skip interfaces without hardware address
if iface.HardwareAddr == nil || len(iface.HardwareAddr) == 0 {
continue
}
return iface.HardwareAddr.String(), nil
}
return "", fmt.Errorf("no suitable network interface found")
}
// GetFingerprint returns the runner's hardware fingerprint
func (c *Client) GetFingerprint() string {
c.fingerprintMu.RLock()
defer c.fingerprintMu.RUnlock()
return c.fingerprint
}
// SetAPIKey sets the runner ID and API key
func (c *Client) SetAPIKey(runnerID int64, apiKey string) {
c.runnerID = runnerID
c.runnerSecret = runnerSecret
c.managerSecret = managerSecret
c.apiKey = apiKey
// Initialize runner workspace directory if not already initialized
if c.workspaceDir == "" {
@@ -408,10 +487,15 @@ func (c *Client) Register(registrationToken string) (int64, string, string, erro
}
req := map[string]interface{}{
"name": c.name,
"hostname": c.hostname,
"capabilities": string(capabilitiesJSON),
"registration_token": registrationToken,
"name": c.name,
"hostname": c.hostname,
"capabilities": string(capabilitiesJSON),
"api_key": registrationToken, // API key passed as registrationToken param for compatibility
}
// Only send fingerprint for non-fixed API keys to avoid uniqueness conflicts
if !strings.HasPrefix(registrationToken, "jk_r0_") { // Fixed test key
req["fingerprint"] = c.GetFingerprint()
}
body, _ := json.Marshal(req)
@@ -447,19 +531,16 @@ func (c *Client) Register(registrationToken string) (int64, string, string, erro
}
var result struct {
ID int64 `json:"id"`
RunnerSecret string `json:"runner_secret"`
ManagerSecret string `json:"manager_secret"`
ID int64 `json:"id"`
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return 0, "", "", fmt.Errorf("failed to decode response: %w", err)
}
c.runnerID = result.ID
c.runnerSecret = result.RunnerSecret
c.managerSecret = result.ManagerSecret
c.apiKey = registrationToken // Store the API key for future use
return result.ID, result.RunnerSecret, result.ManagerSecret, nil
return result.ID, registrationToken, "", nil // Return API key as "runner secret" for compatibility
}
// doSignedRequest performs an authenticated HTTP request using shared secret
@@ -476,7 +557,7 @@ func (c *Client) doSignedRequestLong(method, path string, body []byte, queryPara
// doSignedRequestWithClient performs an authenticated HTTP request using the specified client
func (c *Client) doSignedRequestWithClient(method, path string, body []byte, client *http.Client, queryParams ...string) (*http.Response, error) {
if c.runnerSecret == "" {
if c.apiKey == "" {
return nil, fmt.Errorf("runner not authenticated")
}
@@ -491,15 +572,18 @@ func (c *Client) doSignedRequestWithClient(method, path string, body []byte, cli
return nil, err
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("X-Runner-Secret", c.runnerSecret)
// Add authentication - use API key in Authorization header
req.Header.Set("Authorization", "Bearer "+c.apiKey)
if len(body) > 0 {
req.Header.Set("Content-Type", "application/json")
}
return client.Do(req)
}
// ConnectWebSocket establishes a WebSocket connection to the manager
func (c *Client) ConnectWebSocket() error {
if c.runnerID == 0 || c.runnerSecret == "" {
if c.runnerID == 0 || c.apiKey == "" {
return fmt.Errorf("runner not authenticated")
}
@@ -509,8 +593,8 @@ func (c *Client) ConnectWebSocket() error {
// Convert HTTP URL to WebSocket URL
wsURL := strings.Replace(c.managerURL, "http://", "ws://", 1)
wsURL = strings.Replace(wsURL, "https://", "wss://", 1)
wsURL = fmt.Sprintf("%s%s?runner_id=%d&secret=%s",
wsURL, path, c.runnerID, url.QueryEscape(c.runnerSecret))
wsURL = fmt.Sprintf("%s%s?runner_id=%d&api_key=%s",
wsURL, path, c.runnerID, url.QueryEscape(c.apiKey))
// Parse URL
u, err := url.Parse(wsURL)
@@ -868,6 +952,44 @@ func (c *Client) KillAllProcesses() {
log.Printf("Killed %d process(es)", killedCount)
}
// CleanupWorkspace removes the runner's workspace directory and all contents
func (c *Client) CleanupWorkspace() {
log.Printf("DEBUG: CleanupWorkspace method called")
log.Printf("CleanupWorkspace called, workspaceDir: %s", c.workspaceDir)
if c.workspaceDir != "" {
log.Printf("Cleaning up workspace directory: %s", c.workspaceDir)
if err := os.RemoveAll(c.workspaceDir); err != nil {
log.Printf("Warning: Failed to remove workspace directory %s: %v", c.workspaceDir, err)
} else {
log.Printf("Successfully removed workspace directory: %s", c.workspaceDir)
}
}
// Also clean up any orphaned jiggablend directories that might exist
// This ensures zero persistence even if workspaceDir wasn't set
cleanupOrphanedWorkspaces()
}
// cleanupOrphanedWorkspaces removes any jiggablend workspace directories
// that might be left behind from previous runs or crashes
func cleanupOrphanedWorkspaces() {
log.Printf("Cleaning up orphaned jiggablend workspace directories...")
// Clean up jiggablend-workspaces directories in current and temp directories
dirsToCheck := []string{".", os.TempDir()}
for _, baseDir := range dirsToCheck {
workspaceDir := filepath.Join(baseDir, "jiggablend-workspaces")
if _, err := os.Stat(workspaceDir); err == nil {
log.Printf("Removing orphaned workspace directory: %s", workspaceDir)
if err := os.RemoveAll(workspaceDir); err != nil {
log.Printf("Warning: Failed to remove workspace directory %s: %v", workspaceDir, err)
} else {
log.Printf("Successfully removed workspace directory: %s", workspaceDir)
}
}
}
}
// sendStepUpdate sends a step start/complete event to the manager
func (c *Client) sendStepUpdate(taskID int64, stepName string, status types.StepStatus, errorMsg string) {
key := fmt.Sprintf("%d:%s", taskID, stepName)
@@ -955,7 +1077,7 @@ func (c *Client) sendStepUpdate(taskID int64, stepName string, status types.Step
}
// processTask processes a single task
func (c *Client) processTask(task map[string]interface{}, jobName string, outputFormat string, inputFiles []interface{}) error {
func (c *Client) processTask(task map[string]interface{}, jobName string, outputFormat string, inputFiles []interface{}) (err error) {
_ = jobName
taskID := int64(task["id"].(float64))
@@ -963,15 +1085,29 @@ func (c *Client) processTask(task map[string]interface{}, jobName string, output
frameStart := int(task["frame_start"].(float64))
frameEnd := int(task["frame_end"].(float64))
c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Starting task: job %d, frames %d-%d, format: %s", jobID, frameStart, frameEnd, outputFormat), "")
log.Printf("Processing task %d: job %d, frames %d-%d, format: %s (from task assignment)", taskID, jobID, frameStart, frameEnd, outputFormat)
// Create temporary job workspace within runner workspace
workDir := filepath.Join(c.getWorkspaceDir(), fmt.Sprintf("job-%d-task-%d", jobID, taskID))
if err := os.MkdirAll(workDir, 0755); err != nil {
return fmt.Errorf("failed to create work directory: %w", err)
if mkdirErr := os.MkdirAll(workDir, 0755); mkdirErr != nil {
return fmt.Errorf("failed to create work directory: %w", mkdirErr)
}
defer os.RemoveAll(workDir)
// Guaranteed cleanup even on panic
defer func() {
if cleanupErr := os.RemoveAll(workDir); cleanupErr != nil {
log.Printf("Warning: Failed to cleanup work directory %s: %v", workDir, cleanupErr)
}
}()
// Panic recovery for this task
defer func() {
if r := recover(); r != nil {
log.Printf("Task %d panicked: %v", taskID, r)
err = fmt.Errorf("task panicked: %v", r)
}
}()
c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Starting task: job %d, frames %d-%d, format: %s", jobID, frameStart, frameEnd, outputFormat), "")
log.Printf("Processing task %d: job %d, frames %d-%d, format: %s (from task assignment)", taskID, jobID, frameStart, frameEnd, outputFormat)
// Step: download
c.sendStepUpdate(taskID, "download", types.StepStatusRunning, "")
@@ -996,7 +1132,7 @@ func (c *Client) processTask(task map[string]interface{}, jobName string, output
// Find .blend file in extracted contents
blendFile := ""
err := filepath.Walk(workDir, func(path string, info os.FileInfo, err error) error {
err = filepath.Walk(workDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
@@ -1032,7 +1168,7 @@ func (c *Client) processTask(task map[string]interface{}, jobName string, output
}
if blendFile == "" {
err := fmt.Errorf("no .blend file found in context")
err := fmt.Errorf("no .blend file found in context - the uploaded context archive must contain at least one .blend file to render")
c.sendStepUpdate(taskID, "download", types.StepStatusFailed, err.Error())
return err
}
@@ -1062,10 +1198,6 @@ func (c *Client) processTask(task map[string]interface{}, jobName string, output
renderFormat = "EXR" // Use EXR for maximum quality (32-bit float, HDR)
}
// Blender uses # characters for frame number placeholders (not %04d)
// Use #### for 4-digit zero-padded frame numbers
outputPattern := filepath.Join(outputDir, fmt.Sprintf("frame_####.%s", strings.ToLower(renderFormat)))
// Step: render_blender
c.sendStepUpdate(taskID, "render_blender", types.StepStatusRunning, "")
if frameStart == frameEnd {
@@ -1074,14 +1206,8 @@ func (c *Client) processTask(task map[string]interface{}, jobName string, output
c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Starting Blender render for frames %d-%d...", frameStart, frameEnd), "render_blender")
}
// Execute Blender - use absolute path for output pattern
absOutputPattern, err := filepath.Abs(outputPattern)
if err != nil {
errMsg := fmt.Sprintf("failed to get absolute path for output: %v", err)
c.sendLog(taskID, types.LogLevelError, errMsg, "render_blender")
c.sendStepUpdate(taskID, "render_blender", types.StepStatusFailed, errMsg)
return errors.New(errMsg)
}
// Always render frames individually for precise control over file naming
// This avoids Blender's automatic frame numbering quirks
// Override output format and render settings from job submission
// For MP4, we render as EXR (handled above) for highest fidelity, so renderFormat is already EXR
@@ -1151,23 +1277,50 @@ func (c *Client) processTask(task map[string]interface{}, jobName string, output
if enableExecution {
args = append(args, "--enable-autoexec")
}
if frameStart == frameEnd {
// Single frame
args = append(args, "-o", absOutputPattern, "-f", fmt.Sprintf("%d", frameStart))
cmd = exec.Command("blender", args...)
} else {
// Frame range
args = append(args, "-o", absOutputPattern,
"-s", fmt.Sprintf("%d", frameStart),
"-e", fmt.Sprintf("%d", frameEnd),
"-a") // -a renders animation (all frames in range)
cmd = exec.Command("blender", args...)
}
cmd.Dir = workDir
// Always render frames individually for precise control over file naming
// This avoids Blender's automatic frame numbering quirks
for frame := frameStart; frame <= frameEnd; frame++ {
// Create temp output pattern for this frame
tempPattern := filepath.Join(outputDir, fmt.Sprintf("temp_frame.%s", strings.ToLower(renderFormat)))
tempAbsPattern, _ := filepath.Abs(tempPattern)
// Set environment variables for headless rendering
// This helps ensure proper OpenGL context initialization, especially for EEVEE
cmd.Env = os.Environ()
// Build args for this specific frame
frameArgs := []string{"-b", blendFile, "--python", scriptPath}
if enableExecution {
frameArgs = append(frameArgs, "--enable-autoexec")
}
frameArgs = append(frameArgs, "-o", tempAbsPattern, "-f", fmt.Sprintf("%d", frame))
c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Rendering frame %d...", frame), "render_blender")
frameCmd := exec.Command("blender", frameArgs...)
frameCmd.Dir = workDir
frameCmd.Env = os.Environ()
// Run this frame
if output, err := frameCmd.CombinedOutput(); err != nil {
errMsg := fmt.Sprintf("blender failed on frame %d: %v (output: %s)", frame, err, string(output))
c.sendLog(taskID, types.LogLevelError, errMsg, "render_blender")
return errors.New(errMsg)
}
// Immediately rename the temp file to the proper frame-numbered name
finalName := fmt.Sprintf("frame_%04d.%s", frame, strings.ToLower(renderFormat))
finalPath := filepath.Join(outputDir, finalName)
tempPath := filepath.Join(outputDir, fmt.Sprintf("temp_frame.%s", strings.ToLower(renderFormat)))
if err := os.Rename(tempPath, finalPath); err != nil {
errMsg := fmt.Sprintf("failed to rename temp file for frame %d: %v", frame, err)
c.sendLog(taskID, types.LogLevelError, errMsg, "render_blender")
return errors.New(errMsg)
}
c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Completed frame %d -> %s", frame, finalName), "render_blender")
}
// Skip the rest of the function since we handled all frames above
c.sendStepUpdate(taskID, "render_blender", types.StepStatusCompleted, "")
return nil
// Blender will handle headless rendering automatically
// We preserve the environment to allow GPU access if available
@@ -1249,6 +1402,10 @@ func (c *Client) processTask(task map[string]interface{}, jobName string, output
return errors.New(errMsg)
}
// For frame ranges, we rendered each frame individually with temp naming
// The files are already properly named during the individual frame rendering
// No additional renaming needed
// Find rendered output file(s)
// For frame ranges, we'll find all frames in the upload step
// For single frames, we need to find the specific output file
@@ -1454,9 +1611,30 @@ func (c *Client) processTask(task map[string]interface{}, jobName string, output
}
// processVideoGenerationTask processes a video generation task
func (c *Client) processVideoGenerationTask(task map[string]interface{}, jobID int64) error {
func (c *Client) processVideoGenerationTask(task map[string]interface{}, jobID int64) (err error) {
taskID := int64(task["id"].(float64))
// Create temporary job workspace for video generation within runner workspace
workDir := filepath.Join(c.getWorkspaceDir(), fmt.Sprintf("job-%d-video", jobID))
if mkdirErr := os.MkdirAll(workDir, 0755); mkdirErr != nil {
return fmt.Errorf("failed to create work directory: %w", mkdirErr)
}
// Guaranteed cleanup even on panic
defer func() {
if cleanupErr := os.RemoveAll(workDir); cleanupErr != nil {
log.Printf("Warning: Failed to cleanup work directory %s: %v", workDir, cleanupErr)
}
}()
// Panic recovery for this task
defer func() {
if r := recover(); r != nil {
log.Printf("Video generation task %d panicked: %v", taskID, r)
err = fmt.Errorf("video generation task panicked: %v", r)
}
}()
c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Starting video generation task: job %d", jobID), "")
log.Printf("Processing video generation task %d for job %d", taskID, jobID)
@@ -1474,6 +1652,16 @@ func (c *Client) processVideoGenerationTask(task map[string]interface{}, jobID i
}
}
// Debug logging for output format detection
c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Video generation: detected output format '%s'", outputFormat), "generate_video")
// Get frame rate from render settings
var frameRate float64 = 24.0 // Default fallback
if err == nil && jobMetadata != nil && jobMetadata.RenderSettings.FrameRate > 0 {
frameRate = jobMetadata.RenderSettings.FrameRate
}
c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Video generation: using frame rate %.2f fps", frameRate), "generate_video")
// Get all output files for this job
files, err := c.getJobFiles(jobID)
if err != nil {
@@ -1507,14 +1695,6 @@ func (c *Client) processVideoGenerationTask(task map[string]interface{}, jobID i
c.sendStepUpdate(taskID, "download_frames", types.StepStatusRunning, "")
c.sendLog(taskID, types.LogLevelInfo, "Downloading EXR frames...", "download_frames")
// Create temporary job workspace for video generation within runner workspace
workDir := filepath.Join(c.getWorkspaceDir(), fmt.Sprintf("job-%d-video", jobID))
if err := os.MkdirAll(workDir, 0755); err != nil {
c.sendStepUpdate(taskID, "download_frames", types.StepStatusFailed, err.Error())
return fmt.Errorf("failed to create work directory: %w", err)
}
defer os.RemoveAll(workDir)
// Download all EXR frames
var frameFiles []string
for _, file := range exrFiles {
@@ -1568,8 +1748,8 @@ func (c *Client) processVideoGenerationTask(task map[string]interface{}, jobID i
// Extract frame number pattern (e.g., frame_2470.exr -> frame_%04d.exr)
baseName := filepath.Base(firstFrame)
// Find the numeric part and replace it with %04d pattern
// Use regex to find digits after underscore and before extension
re := regexp.MustCompile(`_(\d+)\.`)
// Use regex to find digits (including negative) after underscore and before extension
re := regexp.MustCompile(`_(-?\d+)\.`)
var pattern string
var startNumber int
frameNumStr := re.FindStringSubmatch(baseName)
@@ -1637,31 +1817,158 @@ func (c *Client) processVideoGenerationTask(task map[string]interface{}, jobID i
vf = "zscale=t=linear:npl=100,format=gbrpf32le,zscale=p=bt709,tonemap=tonemap=hable:desat=0,zscale=t=bt709:m=bt709:r=tv,format=yuv420p"
}
cmd = exec.Command("ffmpeg", "-y", "-start_number", fmt.Sprintf("%d", startNumber),
"-framerate", "24", "-i", patternPath,
// Build ffmpeg command with high-quality EXR input processing
cmd = exec.Command("ffmpeg", "-y",
"-f", "image2", // Force image sequence input format
"-start_number", fmt.Sprintf("%d", startNumber),
"-framerate", fmt.Sprintf("%.2f", frameRate),
"-i", patternPath,
"-vf", vf,
"-c:v", codec, "-pix_fmt", pixFmt, "-r", "24", outputMP4)
"-c:v", codec, "-pix_fmt", pixFmt,
"-r", fmt.Sprintf("%.2f", frameRate),
"-color_primaries", "bt709", // Ensure proper color primaries
"-color_trc", "bt709", // Ensure proper transfer characteristics
"-colorspace", "bt709", // Ensure proper color space
outputMP4)
// Prepare codec-specific arguments
var codecArgs []string
if outputFormat == "EXR_AV1_MP4" {
// AV1 encoding options for quality
cmd.Args = append(cmd.Args, "-cpu-used", "4", "-crf", "30", "-b:v", "0")
// AV1 encoding options for maximum quality
codecArgs = []string{"-cpu-used", "1", "-crf", "15", "-b:v", "0", "-row-mt", "1", "-tiles", "4x4", "-lag-in-frames", "25", "-arnr-max-frames", "15", "-arnr-strength", "4"}
} else {
// H.264 encoding options for maximum quality
codecArgs = []string{"-preset", "veryslow", "-crf", "15", "-profile:v", "high", "-level", "5.2", "-tune", "film", "-keyint_min", "24", "-g", "240", "-bf", "2", "-refs", "4"}
}
}
cmd.Dir = workDir
output, err := cmd.CombinedOutput()
if err != nil {
outputStr := string(output)
// Perform 2-pass encoding for optimal quality distribution
c.sendLog(taskID, types.LogLevelInfo, "Starting 2-pass video encoding for optimal quality...", "generate_video")
// PASS 1: Analysis pass (collects statistics for better rate distribution)
c.sendLog(taskID, types.LogLevelInfo, "Pass 1/2: Analyzing video content for optimal encoding...", "generate_video")
pass1Args := append([]string{"-y", "-f", "image2", "-start_number", fmt.Sprintf("%d", startNumber), "-framerate", fmt.Sprintf("%.2f", frameRate), "-i", patternPath, "-vf", vf, "-c:v", codec, "-pix_fmt", pixFmt, "-r", fmt.Sprintf("%.2f", frameRate), "-color_primaries", "bt709", "-color_trc", "bt709", "-colorspace", "bt709"}, codecArgs...)
pass1Args = append(pass1Args, "-pass", "1", "-f", "null", "/dev/null")
pass1Cmd := exec.Command("ffmpeg", pass1Args...)
pass1Cmd.Dir = workDir
pass1Err := pass1Cmd.Run()
if pass1Err != nil {
c.sendLog(taskID, types.LogLevelWarn, fmt.Sprintf("Pass 1 completed (warnings expected): %v", pass1Err), "generate_video")
}
// PASS 2: Encoding pass (uses statistics from pass 1 for optimal quality)
c.sendLog(taskID, types.LogLevelInfo, "Pass 2/2: Encoding video with optimal quality distribution...", "generate_video")
cmd = exec.Command("ffmpeg", "-y", "-f", "image2", "-start_number", fmt.Sprintf("%d", startNumber), "-framerate", fmt.Sprintf("%.2f", frameRate), "-i", patternPath, "-vf", vf, "-c:v", codec, "-pix_fmt", pixFmt, "-r", fmt.Sprintf("%.2f", frameRate), "-color_primaries", "bt709", "-color_trc", "bt709", "-colorspace", "bt709")
cmd.Args = append(cmd.Args, codecArgs...)
cmd.Args = append(cmd.Args, "-pass", "2", outputMP4)
}
// Create stdout and stderr pipes for streaming
stdoutPipe, err := cmd.StdoutPipe()
if err != nil {
errMsg := fmt.Sprintf("failed to create ffmpeg stdout pipe: %v", err)
c.sendLog(taskID, types.LogLevelError, errMsg, "generate_video")
c.sendStepUpdate(taskID, "generate_video", types.StepStatusFailed, errMsg)
return errors.New(errMsg)
}
stderrPipe, err := cmd.StderrPipe()
if err != nil {
errMsg := fmt.Sprintf("failed to create ffmpeg stderr pipe: %v", err)
c.sendLog(taskID, types.LogLevelError, errMsg, "generate_video")
c.sendStepUpdate(taskID, "generate_video", types.StepStatusFailed, errMsg)
return errors.New(errMsg)
}
cmd.Dir = workDir
// Start the command
if err := cmd.Start(); err != nil {
errMsg := fmt.Sprintf("failed to start ffmpeg: %v", err)
c.sendLog(taskID, types.LogLevelError, errMsg, "generate_video")
c.sendStepUpdate(taskID, "generate_video", types.StepStatusFailed, errMsg)
return errors.New(errMsg)
}
// Register process for cleanup on shutdown
c.runningProcs.Store(taskID, cmd)
defer c.runningProcs.Delete(taskID)
// Stream stdout line by line
stdoutDone := make(chan bool)
go func() {
defer close(stdoutDone)
scanner := bufio.NewScanner(stdoutPipe)
for scanner.Scan() {
line := scanner.Text()
if line != "" {
// Filter out common ffmpeg informational messages that aren't useful
if !strings.Contains(line, "Input #") &&
!strings.Contains(line, "Duration:") &&
!strings.Contains(line, "Stream mapping:") &&
!strings.Contains(line, "Output #") &&
!strings.Contains(line, "encoder") &&
!strings.Contains(line, "fps=") &&
!strings.Contains(line, "size=") &&
!strings.Contains(line, "time=") &&
!strings.Contains(line, "bitrate=") &&
!strings.Contains(line, "speed=") {
c.sendLog(taskID, types.LogLevelInfo, line, "generate_video")
}
}
}
}()
// Stream stderr line by line
stderrDone := make(chan bool)
go func() {
defer close(stderrDone)
scanner := bufio.NewScanner(stderrPipe)
for scanner.Scan() {
line := scanner.Text()
if line != "" {
// Filter out common ffmpeg informational messages and show only warnings/errors
if strings.Contains(line, "error") ||
strings.Contains(line, "Error") ||
strings.Contains(line, "failed") ||
strings.Contains(line, "Failed") ||
strings.Contains(line, "warning") ||
strings.Contains(line, "Warning") {
c.sendLog(taskID, types.LogLevelWarn, line, "generate_video")
} else if !strings.Contains(line, "Input #") &&
!strings.Contains(line, "Duration:") &&
!strings.Contains(line, "Stream mapping:") &&
!strings.Contains(line, "Output #") &&
!strings.Contains(line, "encoder") &&
!strings.Contains(line, "fps=") &&
!strings.Contains(line, "size=") &&
!strings.Contains(line, "time=") &&
!strings.Contains(line, "bitrate=") &&
!strings.Contains(line, "speed=") {
c.sendLog(taskID, types.LogLevelInfo, line, "generate_video")
}
}
}
}()
// Wait for command to complete
err = cmd.Wait()
// Wait for streaming goroutines to finish
<-stdoutDone
<-stderrDone
if err != nil {
// Check for size-related errors and provide helpful messages
if sizeErr := c.checkFFmpegSizeError(outputStr); sizeErr != nil {
if sizeErr := c.checkFFmpegSizeError("ffmpeg encoding failed"); sizeErr != nil {
c.sendLog(taskID, types.LogLevelError, sizeErr.Error(), "generate_video")
c.sendStepUpdate(taskID, "generate_video", types.StepStatusFailed, sizeErr.Error())
return sizeErr
}
// Try alternative method with concat demuxer
log.Printf("First ffmpeg attempt failed, trying concat method: %s", outputStr)
err = c.generateMP4WithConcat(frameFiles, outputMP4, workDir, allocatedDevice, outputFormat, codec, pixFmt, useAlpha, useHardware)
c.sendLog(taskID, types.LogLevelWarn, "Primary ffmpeg encoding failed, trying concat method...", "generate_video")
err = c.generateMP4WithConcat(frameFiles, outputMP4, workDir, allocatedDevice, outputFormat, codec, pixFmt, useAlpha, useHardware, frameRate)
if err != nil {
// Check for size errors in concat method too
if sizeErr := c.checkFFmpegSizeError(err.Error()); sizeErr != nil {
@@ -1681,8 +1988,12 @@ func (c *Client) processVideoGenerationTask(task map[string]interface{}, jobID i
return err
}
// Clean up 2-pass log files
_ = os.Remove(filepath.Join(workDir, "ffmpeg2pass-0.log"))
_ = os.Remove(filepath.Join(workDir, "ffmpeg2pass-0.log.mbtree"))
c.sendStepUpdate(taskID, "generate_video", types.StepStatusCompleted, "")
c.sendLog(taskID, types.LogLevelInfo, "MP4 video generated successfully", "generate_video")
c.sendLog(taskID, types.LogLevelInfo, "MP4 video generated with 2-pass encoding successfully", "generate_video")
// Step: upload_video
c.sendStepUpdate(taskID, "upload_video", types.StepStatusRunning, "")
@@ -1779,7 +2090,7 @@ func (c *Client) buildFFmpegCommand(device string, args ...string) (*exec.Cmd, e
}
// No hardware acceleration available
return nil, fmt.Errorf("no hardware encoder available")
return nil, fmt.Errorf("no hardware encoder available for video encoding - falling back to software encoding which may be slower")
}
// buildFFmpegCommandAV1 builds an ffmpeg command with AV1 hardware acceleration if available
@@ -1881,7 +2192,7 @@ func (c *Client) buildFFmpegCommandAV1(device string, useAlpha bool, args ...str
}
// No AV1 hardware acceleration available
return nil, fmt.Errorf("no AV1 hardware encoder available")
return nil, fmt.Errorf("no AV1 hardware encoder available - falling back to software AV1 encoding which may be slower")
}
// probeAllHardwareAccelerators probes ffmpeg for all available hardware acceleration methods
@@ -2460,7 +2771,7 @@ func (c *Client) testGenericEncoder(encoder string) bool {
// generateMP4WithConcat uses ffmpeg concat demuxer as fallback
// device parameter is optional - if provided, it will be used for VAAPI encoding
func (c *Client) generateMP4WithConcat(frameFiles []string, outputMP4, workDir string, device string, outputFormat string, codec string, pixFmt string, useAlpha bool, useHardware bool) error {
func (c *Client) generateMP4WithConcat(frameFiles []string, outputMP4, workDir string, device string, outputFormat string, codec string, pixFmt string, useAlpha bool, useHardware bool, frameRate float64) error {
// Create file list for ffmpeg concat demuxer
listFile := filepath.Join(workDir, "frames.txt")
listFileHandle, err := os.Create(listFile)
@@ -2509,30 +2820,108 @@ func (c *Client) generateMP4WithConcat(frameFiles []string, outputMP4, workDir s
}
if !useHardware {
// Software encoding with HDR tonemapping
cmd = exec.Command("ffmpeg", "-f", "concat", "-safe", "0", "-i", listFile,
"-vf", vf,
"-c:v", codec, "-pix_fmt", pixFmt, "-r", "24", "-y", outputMP4)
// Software encoding with HDR tonemapping - 2-pass for optimal quality
var codecArgs []string
if outputFormat == "EXR_AV1_MP4" {
// AV1 encoding options for quality
cmd.Args = append(cmd.Args, "-cpu-used", "4", "-crf", "30", "-b:v", "0")
codecArgs = []string{"-cpu-used", "1", "-crf", "15", "-b:v", "0", "-row-mt", "1", "-tiles", "4x4", "-lag-in-frames", "25", "-arnr-max-frames", "15", "-arnr-strength", "4"}
} else {
codecArgs = []string{"-preset", "veryslow", "-crf", "15", "-profile:v", "high", "-level", "5.2", "-tune", "film", "-keyint_min", "24", "-g", "240", "-bf", "2", "-refs", "4"}
}
// PASS 1: Analysis pass
pass1Args := append([]string{"-f", "concat", "-safe", "0", "-i", listFile, "-vf", vf, "-c:v", codec, "-pix_fmt", pixFmt, "-r", fmt.Sprintf("%.2f", frameRate)}, codecArgs...)
pass1Args = append(pass1Args, "-pass", "1", "-f", "null", "/dev/null")
pass1Cmd := exec.Command("ffmpeg", pass1Args...)
pass1Cmd.Dir = workDir
_ = pass1Cmd.Run() // Ignore errors for pass 1
// PASS 2: Encoding pass
cmd = exec.Command("ffmpeg", "-f", "concat", "-safe", "0", "-i", listFile, "-vf", vf, "-c:v", codec, "-pix_fmt", pixFmt, "-r", fmt.Sprintf("%.2f", frameRate))
cmd.Args = append(cmd.Args, codecArgs...)
cmd.Args = append(cmd.Args, "-pass", "2", "-y", outputMP4)
}
output, err := cmd.CombinedOutput()
// Create stdout and stderr pipes for streaming
stdoutPipe, err := cmd.StdoutPipe()
if err != nil {
return fmt.Errorf("failed to create ffmpeg stdout pipe: %w", err)
}
stderrPipe, err := cmd.StderrPipe()
if err != nil {
return fmt.Errorf("failed to create ffmpeg stderr pipe: %w", err)
}
cmd.Dir = workDir
// Start the command
if err := cmd.Start(); err != nil {
return fmt.Errorf("failed to start ffmpeg: %w", err)
}
// Stream stdout line by line (minimal logging for concat method)
stdoutDone := make(chan bool)
go func() {
defer close(stdoutDone)
scanner := bufio.NewScanner(stdoutPipe)
for scanner.Scan() {
line := scanner.Text()
if line != "" {
// Only log actual errors/warnings for concat method
if strings.Contains(line, "error") ||
strings.Contains(line, "Error") ||
strings.Contains(line, "failed") ||
strings.Contains(line, "Failed") {
log.Printf("FFmpeg concat stdout: %s", line)
}
}
}
}()
// Stream stderr line by line
stderrDone := make(chan bool)
go func() {
defer close(stderrDone)
scanner := bufio.NewScanner(stderrPipe)
for scanner.Scan() {
line := scanner.Text()
if line != "" {
// Log warnings and errors for concat method
if strings.Contains(line, "error") ||
strings.Contains(line, "Error") ||
strings.Contains(line, "failed") ||
strings.Contains(line, "Failed") ||
strings.Contains(line, "warning") ||
strings.Contains(line, "Warning") {
log.Printf("FFmpeg concat stderr: %s", line)
}
}
}
}()
// Wait for command to complete
err = cmd.Wait()
// Wait for streaming goroutines to finish
<-stdoutDone
<-stderrDone
if err != nil {
outputStr := string(output)
// Check for size-related errors
if sizeErr := c.checkFFmpegSizeError(outputStr); sizeErr != nil {
if sizeErr := c.checkFFmpegSizeError("ffmpeg concat failed"); sizeErr != nil {
return sizeErr
}
return fmt.Errorf("ffmpeg concat failed: %w\nOutput: %s", err, outputStr)
return fmt.Errorf("ffmpeg concat failed: %w", err)
}
if _, err := os.Stat(outputMP4); os.IsNotExist(err) {
return fmt.Errorf("MP4 file not created: %s", outputMP4)
}
// Clean up 2-pass log files
_ = os.Remove(filepath.Join(workDir, "ffmpeg2pass-0.log"))
_ = os.Remove(filepath.Join(workDir, "ffmpeg2pass-0.log.mbtree"))
return nil
}
@@ -2774,7 +3163,7 @@ func (c *Client) uploadFile(jobID int64, filePath string) (string, error) {
}
req.Header.Set("Content-Type", formWriter.FormDataContentType())
req.Header.Set("X-Runner-Secret", c.runnerSecret)
req.Header.Set("Authorization", "Bearer "+c.apiKey)
// Use long-running client for file uploads (no timeout)
resp, err := c.longRunningClient.Do(req)
@@ -2996,18 +3385,32 @@ func (c *Client) cleanupExpiredContextCache() {
}
// processMetadataTask processes a metadata extraction task
func (c *Client) processMetadataTask(task map[string]interface{}, jobID int64, inputFiles []interface{}) error {
func (c *Client) processMetadataTask(task map[string]interface{}, jobID int64, inputFiles []interface{}) (err error) {
taskID := int64(task["id"].(float64))
c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Starting metadata extraction task: job %d", jobID), "")
log.Printf("Processing metadata extraction task %d for job %d", taskID, jobID)
// Create temporary job workspace for metadata extraction within runner workspace
workDir := filepath.Join(c.getWorkspaceDir(), fmt.Sprintf("job-%d-metadata-%d", jobID, taskID))
if err := os.MkdirAll(workDir, 0755); err != nil {
return fmt.Errorf("failed to create work directory: %w", err)
if mkdirErr := os.MkdirAll(workDir, 0755); mkdirErr != nil {
return fmt.Errorf("failed to create work directory: %w", mkdirErr)
}
defer os.RemoveAll(workDir)
// Guaranteed cleanup even on panic
defer func() {
if cleanupErr := os.RemoveAll(workDir); cleanupErr != nil {
log.Printf("Warning: Failed to cleanup work directory %s: %v", workDir, cleanupErr)
}
}()
// Panic recovery for this task
defer func() {
if r := recover(); r != nil {
log.Printf("Metadata extraction task %d panicked: %v", taskID, r)
err = fmt.Errorf("metadata extraction task panicked: %v", r)
}
}()
c.sendLog(taskID, types.LogLevelInfo, fmt.Sprintf("Starting metadata extraction task: job %d", jobID), "")
log.Printf("Processing metadata extraction task %d for job %d", taskID, jobID)
// Step: download
c.sendStepUpdate(taskID, "download", types.StepStatusRunning, "")
@@ -3029,7 +3432,7 @@ func (c *Client) processMetadataTask(task map[string]interface{}, jobID int64, i
// Find .blend file in extracted contents
blendFile := ""
err := filepath.Walk(workDir, func(path string, info os.FileInfo, err error) error {
err = filepath.Walk(workDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
@@ -3065,7 +3468,7 @@ func (c *Client) processMetadataTask(task map[string]interface{}, jobID int64, i
}
if blendFile == "" {
err := fmt.Errorf("no .blend file found in context")
err := fmt.Errorf("no .blend file found in context - the uploaded context archive must contain at least one .blend file to render")
c.sendStepUpdate(taskID, "download", types.StepStatusFailed, err.Error())
return err
}
@@ -3406,7 +3809,7 @@ func (c *Client) submitMetadata(jobID int64, metadata types.BlendMetadata) error
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("X-Runner-Secret", c.runnerSecret)
req.Header.Set("Authorization", "Bearer "+c.apiKey)
resp, err := c.httpClient.Do(req)
if err != nil {