// Package runner provides the Jiggablend render runner. package runner import ( "crypto/sha256" "encoding/hex" "fmt" "log" "net" "os" "os/exec" "strings" "sync" "time" "jiggablend/internal/runner/api" "jiggablend/internal/runner/blender" "jiggablend/internal/runner/encoding" "jiggablend/internal/runner/tasks" "jiggablend/internal/runner/workspace" "jiggablend/pkg/executils" "jiggablend/pkg/types" ) // Runner is the main render runner. type Runner struct { id int64 name string hostname string manager *api.ManagerClient workspace *workspace.Manager blender *blender.Manager encoder *encoding.Selector processes *executils.ProcessTracker processors map[string]tasks.Processor stopChan chan struct{} fingerprint string fingerprintMu sync.RWMutex } // New creates a new runner. func New(managerURL, name, hostname string) *Runner { manager := api.NewManagerClient(managerURL) r := &Runner{ name: name, hostname: hostname, manager: manager, processes: executils.NewProcessTracker(), stopChan: make(chan struct{}), processors: make(map[string]tasks.Processor), } // Generate fingerprint r.generateFingerprint() return r } // CheckRequiredTools verifies that required external tools are available. func (r *Runner) CheckRequiredTools() error { if err := exec.Command("zstd", "--version").Run(); err != nil { return fmt.Errorf("zstd not found - required for compressed blend file support. Install with: apt install zstd") } log.Printf("Found zstd for compressed blend file support") if err := exec.Command("xvfb-run", "--help").Run(); err != nil { return fmt.Errorf("xvfb-run not found - required for headless Blender rendering. Install with: apt install xvfb") } log.Printf("Found xvfb-run for headless rendering without -b option") return nil } var cachedCapabilities map[string]interface{} = nil // ProbeCapabilities detects hardware capabilities. func (r *Runner) ProbeCapabilities() map[string]interface{} { if cachedCapabilities != nil { return cachedCapabilities } caps := make(map[string]interface{}) // Check for ffmpeg and probe encoding capabilities if err := exec.Command("ffmpeg", "-version").Run(); err == nil { caps["ffmpeg"] = true } else { caps["ffmpeg"] = false } cachedCapabilities = caps return caps } // Register registers the runner with the manager. func (r *Runner) Register(apiKey string) (int64, error) { caps := r.ProbeCapabilities() id, err := r.manager.Register(r.name, r.hostname, caps, apiKey, r.GetFingerprint()) if err != nil { return 0, err } r.id = id // Initialize workspace after registration r.workspace = workspace.NewManager(r.name) // Initialize blender manager r.blender = blender.NewManager(r.manager, r.workspace.BaseDir()) // Initialize encoder selector r.encoder = encoding.NewSelector() // Register task processors r.processors["render"] = tasks.NewRenderProcessor() r.processors["encode"] = tasks.NewEncodeProcessor() return id, nil } // Start starts the job polling loop. func (r *Runner) Start(pollInterval time.Duration) { log.Printf("Starting job polling loop (interval: %v)", pollInterval) for { select { case <-r.stopChan: log.Printf("Stopping job polling loop") return default: } log.Printf("Polling for next job (runner ID: %d)", r.id) job, err := r.manager.PollNextJob() if err != nil { log.Printf("Error polling for job: %v", err) time.Sleep(pollInterval) continue } if job == nil { log.Printf("No job available, sleeping for %v", pollInterval) time.Sleep(pollInterval) continue } log.Printf("Received job assignment: task=%d, job=%d, type=%s", job.Task.TaskID, job.Task.JobID, job.Task.TaskType) if err := r.executeJob(job); err != nil { log.Printf("Error processing job: %v", err) } } } // Stop stops the runner. func (r *Runner) Stop() { close(r.stopChan) } // KillAllProcesses kills all running processes. func (r *Runner) KillAllProcesses() { log.Printf("Killing all running processes...") killedCount := r.processes.KillAll() // Release all allocated devices if r.encoder != nil { // Device pool cleanup is handled internally } log.Printf("Killed %d process(es)", killedCount) } // Cleanup removes the workspace directory. func (r *Runner) Cleanup() { if r.workspace != nil { r.workspace.Cleanup() } } // executeJob handles a job using per-job WebSocket connection. func (r *Runner) executeJob(job *api.NextJobResponse) (err error) { // Recover from panics to prevent runner process crashes during task execution defer func() { if rec := recover(); rec != nil { log.Printf("Task execution panicked: %v", rec) err = fmt.Errorf("task execution panicked: %v", rec) } }() // Connect to job WebSocket (no runnerID needed - authentication handles it) jobConn := api.NewJobConnection() if err := jobConn.Connect(r.manager.GetBaseURL(), job.JobPath, job.JobToken); err != nil { return fmt.Errorf("failed to connect job WebSocket: %w", err) } defer jobConn.Close() log.Printf("Job WebSocket authenticated for task %d", job.Task.TaskID) // Create task context workDir := r.workspace.JobDir(job.Task.JobID) ctx := tasks.NewContext( job.Task.TaskID, job.Task.JobID, job.Task.JobName, job.Task.Frame, job.Task.TaskType, workDir, job.JobToken, job.Task.Metadata, r.manager, jobConn, r.workspace, r.blender, r.encoder, r.processes, ) ctx.Info(fmt.Sprintf("Task assignment received (job: %d, type: %s)", job.Task.JobID, job.Task.TaskType)) // Get processor for task type processor, ok := r.processors[job.Task.TaskType] if !ok { return fmt.Errorf("unknown task type: %s", job.Task.TaskType) } // Process the task var processErr error switch job.Task.TaskType { case "render": // this task has a upload outputs step because the frames are not uploaded by the render task directly we have to do it manually here TODO: maybe we should make it work like the encode task // Download context contextPath := job.JobPath + "/context.tar" if err := r.downloadContext(job.Task.JobID, contextPath, job.JobToken); err != nil { jobConn.Log(job.Task.TaskID, types.LogLevelError, fmt.Sprintf("Failed to download context: %v", err)) jobConn.Complete(job.Task.TaskID, false, fmt.Errorf("failed to download context: %v", err)) return fmt.Errorf("failed to download context: %w", err) } processErr = processor.Process(ctx) if processErr == nil { processErr = r.uploadOutputs(ctx, job) } case "encode": // this task doesn't have a upload outputs step because the video is already uploaded by the encode task processErr = processor.Process(ctx) default: return fmt.Errorf("unknown task type: %s", job.Task.TaskType) } if processErr != nil { ctx.Error(fmt.Sprintf("Task failed: %v", processErr)) ctx.Complete(false, processErr) return processErr } ctx.Complete(true, nil) return nil } func (r *Runner) downloadContext(jobID int64, contextPath, jobToken string) error { reader, err := r.manager.DownloadContext(contextPath, jobToken) if err != nil { return err } defer reader.Close() jobDir := r.workspace.JobDir(jobID) return workspace.ExtractTar(reader, jobDir) } func (r *Runner) uploadOutputs(ctx *tasks.Context, job *api.NextJobResponse) error { outputDir := ctx.WorkDir + "/output" uploadPath := fmt.Sprintf("/api/runner/jobs/%d/upload", job.Task.JobID) entries, err := os.ReadDir(outputDir) if err != nil { return fmt.Errorf("failed to read output directory: %w", err) } for _, entry := range entries { if entry.IsDir() { continue } filePath := outputDir + "/" + entry.Name() if err := r.manager.UploadFile(uploadPath, job.JobToken, filePath); err != nil { log.Printf("Failed to upload %s: %v", filePath, err) } else { ctx.OutputUploaded(entry.Name()) } } return nil } // generateFingerprint creates a unique hardware fingerprint. func (r *Runner) generateFingerprint() { r.fingerprintMu.Lock() defer r.fingerprintMu.Unlock() var components []string components = append(components, r.hostname) if machineID, err := os.ReadFile("/etc/machine-id"); err == nil { components = append(components, strings.TrimSpace(string(machineID))) } if productUUID, err := os.ReadFile("/sys/class/dmi/id/product_uuid"); err == nil { components = append(components, strings.TrimSpace(string(productUUID))) } if macAddr, err := r.getMACAddress(); err == nil { components = append(components, macAddr) } if len(components) <= 1 { components = append(components, fmt.Sprintf("%d", os.Getpid())) components = append(components, fmt.Sprintf("%d", time.Now().Unix())) } h := sha256.New() for _, comp := range components { h.Write([]byte(comp)) h.Write([]byte{0}) } r.fingerprint = hex.EncodeToString(h.Sum(nil)) } func (r *Runner) getMACAddress() (string, error) { interfaces, err := net.Interfaces() if err != nil { return "", err } for _, iface := range interfaces { if iface.Flags&net.FlagLoopback != 0 || iface.Flags&net.FlagUp == 0 { continue } if len(iface.HardwareAddr) == 0 { continue } return iface.HardwareAddr.String(), nil } return "", fmt.Errorf("no suitable network interface found") } // GetFingerprint returns the runner's hardware fingerprint. func (r *Runner) GetFingerprint() string { r.fingerprintMu.RLock() defer r.fingerprintMu.RUnlock() return r.fingerprint } // GetID returns the runner ID. func (r *Runner) GetID() int64 { return r.id }