Files
jiggablend/cmd/jiggablend/cmd/runner.go
Justin Harms 16d6a95058 Refactor runner and installation scripts for improved functionality
- Removed the `--disable-hiprt` flag from the runner command, simplifying the rendering options for users.
- Updated the `jiggablend-runner` script and README to reflect the removal of the HIPRT control flag, enhancing clarity in usage instructions.
- Enhanced the installation script to provide clearer examples for running the jiggablend manager and runner, improving user experience during setup.
- Implemented a more robust GPU backend detection mechanism, allowing for better compatibility with various hardware configurations.
2026-03-14 21:08:06 -05:00

215 lines
6.2 KiB
Go

package cmd
import (
"crypto/rand"
"encoding/hex"
"fmt"
"os"
"os/signal"
"strings"
"syscall"
"time"
"jiggablend/internal/logger"
"jiggablend/internal/runner"
"github.com/spf13/cobra"
"github.com/spf13/viper"
)
var runnerViper = viper.New()
var runnerCmd = &cobra.Command{
Use: "runner",
Short: "Start the Jiggablend render runner",
Long: `Start the Jiggablend render runner that connects to a manager and processes render tasks.`,
Run: runRunner,
}
func init() {
rootCmd.AddCommand(runnerCmd)
runnerCmd.Flags().StringP("manager", "m", "http://localhost:8080", "Manager URL")
runnerCmd.Flags().StringP("name", "n", "", "Runner name")
runnerCmd.Flags().String("hostname", "", "Runner hostname")
runnerCmd.Flags().StringP("api-key", "k", "", "API key for authentication")
runnerCmd.Flags().StringP("log-file", "l", "", "Log file path (truncated on start, if not set logs only to stdout)")
runnerCmd.Flags().String("log-level", "info", "Log level (debug, info, warn, error)")
runnerCmd.Flags().BoolP("verbose", "v", false, "Enable verbose logging (same as --log-level=debug)")
runnerCmd.Flags().Duration("poll-interval", 5*time.Second, "Job polling interval")
runnerCmd.Flags().Bool("force-cpu-rendering", false, "Force CPU rendering for all jobs (disables GPU rendering)")
// Bind flags to viper with JIGGABLEND_ prefix
runnerViper.SetEnvPrefix("JIGGABLEND")
runnerViper.SetEnvKeyReplacer(strings.NewReplacer("-", "_"))
runnerViper.AutomaticEnv()
runnerViper.BindPFlag("manager", runnerCmd.Flags().Lookup("manager"))
runnerViper.BindPFlag("name", runnerCmd.Flags().Lookup("name"))
runnerViper.BindPFlag("hostname", runnerCmd.Flags().Lookup("hostname"))
runnerViper.BindPFlag("api_key", runnerCmd.Flags().Lookup("api-key"))
runnerViper.BindPFlag("log_file", runnerCmd.Flags().Lookup("log-file"))
runnerViper.BindPFlag("log_level", runnerCmd.Flags().Lookup("log-level"))
runnerViper.BindPFlag("verbose", runnerCmd.Flags().Lookup("verbose"))
runnerViper.BindPFlag("poll_interval", runnerCmd.Flags().Lookup("poll-interval"))
runnerViper.BindPFlag("force_cpu_rendering", runnerCmd.Flags().Lookup("force-cpu-rendering"))
}
func runRunner(cmd *cobra.Command, args []string) {
// Get config values (flags take precedence over env vars)
managerURL := runnerViper.GetString("manager")
name := runnerViper.GetString("name")
hostname := runnerViper.GetString("hostname")
apiKey := runnerViper.GetString("api_key")
logFile := runnerViper.GetString("log_file")
logLevel := runnerViper.GetString("log_level")
verbose := runnerViper.GetBool("verbose")
pollInterval := runnerViper.GetDuration("poll_interval")
forceCPURendering := runnerViper.GetBool("force_cpu_rendering")
var r *runner.Runner
defer func() {
if rec := recover(); rec != nil {
logger.Errorf("Runner panicked: %v", rec)
if r != nil {
r.Cleanup()
}
os.Exit(1)
}
}()
if hostname == "" {
hostname, _ = os.Hostname()
}
// Generate unique runner ID suffix
runnerIDStr := generateShortID()
// Generate runner name with ID if not provided
if name == "" {
name = fmt.Sprintf("runner-%s-%s", hostname, runnerIDStr)
} else {
name = fmt.Sprintf("%s-%s", name, runnerIDStr)
}
// Initialize logger
if logFile != "" {
if err := logger.InitWithFile(logFile); err != nil {
logger.Fatalf("Failed to initialize logger: %v", err)
}
defer func() {
if l := logger.GetDefault(); l != nil {
l.Close()
}
}()
} else {
logger.InitStdout()
}
// Set log level
if verbose {
logger.SetLevel(logger.LevelDebug)
} else {
logger.SetLevel(logger.ParseLevel(logLevel))
}
logger.Info("Runner starting up...")
logger.Debugf("Generated runner ID suffix: %s", runnerIDStr)
if logFile != "" {
logger.Infof("Logging to file: %s", logFile)
}
// Create runner
r = runner.New(managerURL, name, hostname, forceCPURendering)
// Check for required tools early to fail fast
if err := r.CheckRequiredTools(); err != nil {
logger.Fatalf("Required tool check failed: %v", err)
}
// Clean up orphaned workspace directories
r.Cleanup()
// Probe capabilities and log them
logger.Debug("Probing runner capabilities...")
capabilities := r.ProbeCapabilities()
capList := []string{}
for cap, value := range capabilities {
if enabled, ok := value.(bool); ok && enabled {
capList = append(capList, cap)
}
}
if len(capList) > 0 {
logger.Infof("Detected capabilities: %s", strings.Join(capList, ", "))
} else {
logger.Warn("No capabilities detected")
}
// Register with API key
if apiKey == "" {
logger.Fatal("API key required (use --api-key or set JIGGABLEND_API_KEY env var)")
}
// Retry registration with exponential backoff
backoff := 1 * time.Second
maxBackoff := 30 * time.Second
maxRetries := 10
retryCount := 0
var runnerID int64
for {
var err error
runnerID, err = r.Register(apiKey)
if err == nil {
logger.Infof("Registered runner with ID: %d", runnerID)
// Detect GPU vendors/backends from host hardware so we only force CPU for Blender < 4.x when using AMD.
logger.Info("Detecting GPU backends (AMD/NVIDIA/Intel) from host hardware for Blender < 4.x policy...")
r.DetectAndStoreGPUBackends()
break
}
errMsg := err.Error()
if strings.Contains(errMsg, "token error:") {
logger.Fatalf("Registration failed (token error): %v", err)
}
retryCount++
if retryCount >= maxRetries {
logger.Fatalf("Failed to register runner after %d attempts: %v", maxRetries, err)
}
logger.Warnf("Registration failed (attempt %d/%d): %v, retrying in %v", retryCount, maxRetries, err, backoff)
time.Sleep(backoff)
backoff *= 2
if backoff > maxBackoff {
backoff = maxBackoff
}
}
// Signal handlers
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
go func() {
sig := <-sigChan
logger.Infof("Received signal: %v, killing all processes and cleaning up...", sig)
r.KillAllProcesses()
r.Cleanup()
os.Exit(0)
}()
// Start polling for jobs
logger.Infof("Runner started, polling for jobs (interval: %v)...", pollInterval)
r.Start(pollInterval)
}
func generateShortID() string {
bytes := make([]byte, 4)
if _, err := rand.Read(bytes); err != nil {
return fmt.Sprintf("%x", os.Getpid()^int(time.Now().Unix()))
}
return hex.EncodeToString(bytes)
}