Update .gitignore to include log files and database journal files. Modify go.mod to update dependencies for go-sqlite3 and cloud.google.com/go/compute/metadata. Enhance Makefile to include logging options for manager and runner commands. Introduce new job token handling in auth package and implement database migration scripts. Refactor manager and runner components to improve job processing and metadata extraction. Add support for video preview in frontend components and enhance WebSocket management for channel subscriptions.

This commit is contained in:
2026-01-02 13:55:19 -06:00
parent edc8ea160c
commit 94490237fe
44 changed files with 9463 additions and 7875 deletions

361
internal/runner/runner.go Normal file
View File

@@ -0,0 +1,361 @@
// Package runner provides the Jiggablend render runner.
package runner
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"log"
"net"
"os"
"os/exec"
"strings"
"sync"
"time"
"jiggablend/internal/runner/api"
"jiggablend/internal/runner/blender"
"jiggablend/internal/runner/encoding"
"jiggablend/internal/runner/tasks"
"jiggablend/internal/runner/workspace"
"jiggablend/pkg/executils"
"jiggablend/pkg/types"
)
// Runner is the main render runner.
type Runner struct {
id int64
name string
hostname string
manager *api.ManagerClient
workspace *workspace.Manager
blender *blender.Manager
encoder *encoding.Selector
processes *executils.ProcessTracker
processors map[string]tasks.Processor
stopChan chan struct{}
fingerprint string
fingerprintMu sync.RWMutex
}
// New creates a new runner.
func New(managerURL, name, hostname string) *Runner {
manager := api.NewManagerClient(managerURL)
r := &Runner{
name: name,
hostname: hostname,
manager: manager,
processes: executils.NewProcessTracker(),
stopChan: make(chan struct{}),
processors: make(map[string]tasks.Processor),
}
// Generate fingerprint
r.generateFingerprint()
return r
}
// CheckRequiredTools verifies that required external tools are available.
func (r *Runner) CheckRequiredTools() error {
if err := exec.Command("zstd", "--version").Run(); err != nil {
return fmt.Errorf("zstd not found - required for compressed blend file support. Install with: apt install zstd")
}
log.Printf("Found zstd for compressed blend file support")
if err := exec.Command("xvfb-run", "--help").Run(); err != nil {
return fmt.Errorf("xvfb-run not found - required for headless Blender rendering. Install with: apt install xvfb")
}
log.Printf("Found xvfb-run for headless rendering without -b option")
return nil
}
var cachedCapabilities map[string]interface{} = nil
// ProbeCapabilities detects hardware capabilities.
func (r *Runner) ProbeCapabilities() map[string]interface{} {
if cachedCapabilities != nil {
return cachedCapabilities
}
caps := make(map[string]interface{})
// Check for ffmpeg and probe encoding capabilities
if err := exec.Command("ffmpeg", "-version").Run(); err == nil {
caps["ffmpeg"] = true
} else {
caps["ffmpeg"] = false
}
cachedCapabilities = caps
return caps
}
// Register registers the runner with the manager.
func (r *Runner) Register(apiKey string) (int64, error) {
caps := r.ProbeCapabilities()
id, err := r.manager.Register(r.name, r.hostname, caps, apiKey, r.GetFingerprint())
if err != nil {
return 0, err
}
r.id = id
// Initialize workspace after registration
r.workspace = workspace.NewManager(r.name)
// Initialize blender manager
r.blender = blender.NewManager(r.manager, r.workspace.BaseDir())
// Initialize encoder selector
r.encoder = encoding.NewSelector()
// Register task processors
r.processors["render"] = tasks.NewRenderProcessor()
r.processors["encode"] = tasks.NewEncodeProcessor()
return id, nil
}
// Start starts the job polling loop.
func (r *Runner) Start(pollInterval time.Duration) {
log.Printf("Starting job polling loop (interval: %v)", pollInterval)
for {
select {
case <-r.stopChan:
log.Printf("Stopping job polling loop")
return
default:
}
log.Printf("Polling for next job (runner ID: %d)", r.id)
job, err := r.manager.PollNextJob()
if err != nil {
log.Printf("Error polling for job: %v", err)
time.Sleep(pollInterval)
continue
}
if job == nil {
log.Printf("No job available, sleeping for %v", pollInterval)
time.Sleep(pollInterval)
continue
}
log.Printf("Received job assignment: task=%d, job=%d, type=%s",
job.Task.TaskID, job.Task.JobID, job.Task.TaskType)
if err := r.executeJob(job); err != nil {
log.Printf("Error processing job: %v", err)
}
}
}
// Stop stops the runner.
func (r *Runner) Stop() {
close(r.stopChan)
}
// KillAllProcesses kills all running processes.
func (r *Runner) KillAllProcesses() {
log.Printf("Killing all running processes...")
killedCount := r.processes.KillAll()
// Release all allocated devices
if r.encoder != nil {
// Device pool cleanup is handled internally
}
log.Printf("Killed %d process(es)", killedCount)
}
// Cleanup removes the workspace directory.
func (r *Runner) Cleanup() {
if r.workspace != nil {
r.workspace.Cleanup()
}
}
// executeJob handles a job using per-job WebSocket connection.
func (r *Runner) executeJob(job *api.NextJobResponse) (err error) {
// Recover from panics to prevent runner process crashes during task execution
defer func() {
if rec := recover(); rec != nil {
log.Printf("Task execution panicked: %v", rec)
err = fmt.Errorf("task execution panicked: %v", rec)
}
}()
// Connect to job WebSocket (no runnerID needed - authentication handles it)
jobConn := api.NewJobConnection()
if err := jobConn.Connect(r.manager.GetBaseURL(), job.JobPath, job.JobToken); err != nil {
return fmt.Errorf("failed to connect job WebSocket: %w", err)
}
defer jobConn.Close()
log.Printf("Job WebSocket authenticated for task %d", job.Task.TaskID)
// Create task context
workDir := r.workspace.JobDir(job.Task.JobID)
ctx := tasks.NewContext(
job.Task.TaskID,
job.Task.JobID,
job.Task.JobName,
job.Task.Frame,
job.Task.TaskType,
workDir,
job.JobToken,
job.Task.Metadata,
r.manager,
jobConn,
r.workspace,
r.blender,
r.encoder,
r.processes,
)
ctx.Info(fmt.Sprintf("Task assignment received (job: %d, type: %s)",
job.Task.JobID, job.Task.TaskType))
// Get processor for task type
processor, ok := r.processors[job.Task.TaskType]
if !ok {
return fmt.Errorf("unknown task type: %s", job.Task.TaskType)
}
// Process the task
var processErr error
switch job.Task.TaskType {
case "render": // this task has a upload outputs step because the frames are not uploaded by the render task directly we have to do it manually here TODO: maybe we should make it work like the encode task
// Download context
contextPath := job.JobPath + "/context.tar"
if err := r.downloadContext(job.Task.JobID, contextPath, job.JobToken); err != nil {
jobConn.Log(job.Task.TaskID, types.LogLevelError, fmt.Sprintf("Failed to download context: %v", err))
jobConn.Complete(job.Task.TaskID, false, fmt.Errorf("failed to download context: %v", err))
return fmt.Errorf("failed to download context: %w", err)
}
processErr = processor.Process(ctx)
if processErr == nil {
processErr = r.uploadOutputs(ctx, job)
}
case "encode": // this task doesn't have a upload outputs step because the video is already uploaded by the encode task
processErr = processor.Process(ctx)
default:
return fmt.Errorf("unknown task type: %s", job.Task.TaskType)
}
if processErr != nil {
ctx.Error(fmt.Sprintf("Task failed: %v", processErr))
ctx.Complete(false, processErr)
return processErr
}
ctx.Complete(true, nil)
return nil
}
func (r *Runner) downloadContext(jobID int64, contextPath, jobToken string) error {
reader, err := r.manager.DownloadContext(contextPath, jobToken)
if err != nil {
return err
}
defer reader.Close()
jobDir := r.workspace.JobDir(jobID)
return workspace.ExtractTar(reader, jobDir)
}
func (r *Runner) uploadOutputs(ctx *tasks.Context, job *api.NextJobResponse) error {
outputDir := ctx.WorkDir + "/output"
uploadPath := fmt.Sprintf("/api/runner/jobs/%d/upload", job.Task.JobID)
entries, err := os.ReadDir(outputDir)
if err != nil {
return fmt.Errorf("failed to read output directory: %w", err)
}
for _, entry := range entries {
if entry.IsDir() {
continue
}
filePath := outputDir + "/" + entry.Name()
if err := r.manager.UploadFile(uploadPath, job.JobToken, filePath); err != nil {
log.Printf("Failed to upload %s: %v", filePath, err)
} else {
ctx.OutputUploaded(entry.Name())
}
}
return nil
}
// generateFingerprint creates a unique hardware fingerprint.
func (r *Runner) generateFingerprint() {
r.fingerprintMu.Lock()
defer r.fingerprintMu.Unlock()
var components []string
components = append(components, r.hostname)
if machineID, err := os.ReadFile("/etc/machine-id"); err == nil {
components = append(components, strings.TrimSpace(string(machineID)))
}
if productUUID, err := os.ReadFile("/sys/class/dmi/id/product_uuid"); err == nil {
components = append(components, strings.TrimSpace(string(productUUID)))
}
if macAddr, err := r.getMACAddress(); err == nil {
components = append(components, macAddr)
}
if len(components) <= 1 {
components = append(components, fmt.Sprintf("%d", os.Getpid()))
components = append(components, fmt.Sprintf("%d", time.Now().Unix()))
}
h := sha256.New()
for _, comp := range components {
h.Write([]byte(comp))
h.Write([]byte{0})
}
r.fingerprint = hex.EncodeToString(h.Sum(nil))
}
func (r *Runner) getMACAddress() (string, error) {
interfaces, err := net.Interfaces()
if err != nil {
return "", err
}
for _, iface := range interfaces {
if iface.Flags&net.FlagLoopback != 0 || iface.Flags&net.FlagUp == 0 {
continue
}
if len(iface.HardwareAddr) == 0 {
continue
}
return iface.HardwareAddr.String(), nil
}
return "", fmt.Errorf("no suitable network interface found")
}
// GetFingerprint returns the runner's hardware fingerprint.
func (r *Runner) GetFingerprint() string {
r.fingerprintMu.RLock()
defer r.fingerprintMu.RUnlock()
return r.fingerprint
}
// GetID returns the runner ID.
func (r *Runner) GetID() int64 {
return r.id
}