- Removed the `--disable-hiprt` flag from the runner command, simplifying the rendering options for users. - Updated the `jiggablend-runner` script and README to reflect the removal of the HIPRT control flag, enhancing clarity in usage instructions. - Enhanced the installation script to provide clearer examples for running the jiggablend manager and runner, improving user experience during setup. - Implemented a more robust GPU backend detection mechanism, allowing for better compatibility with various hardware configurations.
215 lines
6.2 KiB
Go
215 lines
6.2 KiB
Go
package cmd
|
|
|
|
import (
|
|
"crypto/rand"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"os"
|
|
"os/signal"
|
|
"strings"
|
|
"syscall"
|
|
"time"
|
|
|
|
"jiggablend/internal/logger"
|
|
"jiggablend/internal/runner"
|
|
|
|
"github.com/spf13/cobra"
|
|
"github.com/spf13/viper"
|
|
)
|
|
|
|
var runnerViper = viper.New()
|
|
|
|
var runnerCmd = &cobra.Command{
|
|
Use: "runner",
|
|
Short: "Start the Jiggablend render runner",
|
|
Long: `Start the Jiggablend render runner that connects to a manager and processes render tasks.`,
|
|
Run: runRunner,
|
|
}
|
|
|
|
func init() {
|
|
rootCmd.AddCommand(runnerCmd)
|
|
|
|
runnerCmd.Flags().StringP("manager", "m", "http://localhost:8080", "Manager URL")
|
|
runnerCmd.Flags().StringP("name", "n", "", "Runner name")
|
|
runnerCmd.Flags().String("hostname", "", "Runner hostname")
|
|
runnerCmd.Flags().StringP("api-key", "k", "", "API key for authentication")
|
|
runnerCmd.Flags().StringP("log-file", "l", "", "Log file path (truncated on start, if not set logs only to stdout)")
|
|
runnerCmd.Flags().String("log-level", "info", "Log level (debug, info, warn, error)")
|
|
runnerCmd.Flags().BoolP("verbose", "v", false, "Enable verbose logging (same as --log-level=debug)")
|
|
runnerCmd.Flags().Duration("poll-interval", 5*time.Second, "Job polling interval")
|
|
runnerCmd.Flags().Bool("force-cpu-rendering", false, "Force CPU rendering for all jobs (disables GPU rendering)")
|
|
|
|
// Bind flags to viper with JIGGABLEND_ prefix
|
|
runnerViper.SetEnvPrefix("JIGGABLEND")
|
|
runnerViper.SetEnvKeyReplacer(strings.NewReplacer("-", "_"))
|
|
runnerViper.AutomaticEnv()
|
|
|
|
runnerViper.BindPFlag("manager", runnerCmd.Flags().Lookup("manager"))
|
|
runnerViper.BindPFlag("name", runnerCmd.Flags().Lookup("name"))
|
|
runnerViper.BindPFlag("hostname", runnerCmd.Flags().Lookup("hostname"))
|
|
runnerViper.BindPFlag("api_key", runnerCmd.Flags().Lookup("api-key"))
|
|
runnerViper.BindPFlag("log_file", runnerCmd.Flags().Lookup("log-file"))
|
|
runnerViper.BindPFlag("log_level", runnerCmd.Flags().Lookup("log-level"))
|
|
runnerViper.BindPFlag("verbose", runnerCmd.Flags().Lookup("verbose"))
|
|
runnerViper.BindPFlag("poll_interval", runnerCmd.Flags().Lookup("poll-interval"))
|
|
runnerViper.BindPFlag("force_cpu_rendering", runnerCmd.Flags().Lookup("force-cpu-rendering"))
|
|
}
|
|
|
|
func runRunner(cmd *cobra.Command, args []string) {
|
|
// Get config values (flags take precedence over env vars)
|
|
managerURL := runnerViper.GetString("manager")
|
|
name := runnerViper.GetString("name")
|
|
hostname := runnerViper.GetString("hostname")
|
|
apiKey := runnerViper.GetString("api_key")
|
|
logFile := runnerViper.GetString("log_file")
|
|
logLevel := runnerViper.GetString("log_level")
|
|
verbose := runnerViper.GetBool("verbose")
|
|
pollInterval := runnerViper.GetDuration("poll_interval")
|
|
forceCPURendering := runnerViper.GetBool("force_cpu_rendering")
|
|
|
|
var r *runner.Runner
|
|
|
|
defer func() {
|
|
if rec := recover(); rec != nil {
|
|
logger.Errorf("Runner panicked: %v", rec)
|
|
if r != nil {
|
|
r.Cleanup()
|
|
}
|
|
os.Exit(1)
|
|
}
|
|
}()
|
|
|
|
if hostname == "" {
|
|
hostname, _ = os.Hostname()
|
|
}
|
|
|
|
// Generate unique runner ID suffix
|
|
runnerIDStr := generateShortID()
|
|
|
|
// Generate runner name with ID if not provided
|
|
if name == "" {
|
|
name = fmt.Sprintf("runner-%s-%s", hostname, runnerIDStr)
|
|
} else {
|
|
name = fmt.Sprintf("%s-%s", name, runnerIDStr)
|
|
}
|
|
|
|
// Initialize logger
|
|
if logFile != "" {
|
|
if err := logger.InitWithFile(logFile); err != nil {
|
|
logger.Fatalf("Failed to initialize logger: %v", err)
|
|
}
|
|
defer func() {
|
|
if l := logger.GetDefault(); l != nil {
|
|
l.Close()
|
|
}
|
|
}()
|
|
} else {
|
|
logger.InitStdout()
|
|
}
|
|
|
|
// Set log level
|
|
if verbose {
|
|
logger.SetLevel(logger.LevelDebug)
|
|
} else {
|
|
logger.SetLevel(logger.ParseLevel(logLevel))
|
|
}
|
|
|
|
logger.Info("Runner starting up...")
|
|
logger.Debugf("Generated runner ID suffix: %s", runnerIDStr)
|
|
if logFile != "" {
|
|
logger.Infof("Logging to file: %s", logFile)
|
|
}
|
|
|
|
// Create runner
|
|
r = runner.New(managerURL, name, hostname, forceCPURendering)
|
|
|
|
// Check for required tools early to fail fast
|
|
if err := r.CheckRequiredTools(); err != nil {
|
|
logger.Fatalf("Required tool check failed: %v", err)
|
|
}
|
|
|
|
// Clean up orphaned workspace directories
|
|
r.Cleanup()
|
|
|
|
// Probe capabilities and log them
|
|
logger.Debug("Probing runner capabilities...")
|
|
capabilities := r.ProbeCapabilities()
|
|
capList := []string{}
|
|
for cap, value := range capabilities {
|
|
if enabled, ok := value.(bool); ok && enabled {
|
|
capList = append(capList, cap)
|
|
}
|
|
}
|
|
if len(capList) > 0 {
|
|
logger.Infof("Detected capabilities: %s", strings.Join(capList, ", "))
|
|
} else {
|
|
logger.Warn("No capabilities detected")
|
|
}
|
|
|
|
// Register with API key
|
|
if apiKey == "" {
|
|
logger.Fatal("API key required (use --api-key or set JIGGABLEND_API_KEY env var)")
|
|
}
|
|
|
|
// Retry registration with exponential backoff
|
|
backoff := 1 * time.Second
|
|
maxBackoff := 30 * time.Second
|
|
maxRetries := 10
|
|
retryCount := 0
|
|
|
|
var runnerID int64
|
|
|
|
for {
|
|
var err error
|
|
runnerID, err = r.Register(apiKey)
|
|
if err == nil {
|
|
logger.Infof("Registered runner with ID: %d", runnerID)
|
|
// Detect GPU vendors/backends from host hardware so we only force CPU for Blender < 4.x when using AMD.
|
|
logger.Info("Detecting GPU backends (AMD/NVIDIA/Intel) from host hardware for Blender < 4.x policy...")
|
|
r.DetectAndStoreGPUBackends()
|
|
break
|
|
}
|
|
|
|
errMsg := err.Error()
|
|
if strings.Contains(errMsg, "token error:") {
|
|
logger.Fatalf("Registration failed (token error): %v", err)
|
|
}
|
|
|
|
retryCount++
|
|
if retryCount >= maxRetries {
|
|
logger.Fatalf("Failed to register runner after %d attempts: %v", maxRetries, err)
|
|
}
|
|
|
|
logger.Warnf("Registration failed (attempt %d/%d): %v, retrying in %v", retryCount, maxRetries, err, backoff)
|
|
time.Sleep(backoff)
|
|
backoff *= 2
|
|
if backoff > maxBackoff {
|
|
backoff = maxBackoff
|
|
}
|
|
}
|
|
|
|
// Signal handlers
|
|
sigChan := make(chan os.Signal, 1)
|
|
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
|
|
|
|
go func() {
|
|
sig := <-sigChan
|
|
logger.Infof("Received signal: %v, killing all processes and cleaning up...", sig)
|
|
r.KillAllProcesses()
|
|
r.Cleanup()
|
|
os.Exit(0)
|
|
}()
|
|
|
|
// Start polling for jobs
|
|
logger.Infof("Runner started, polling for jobs (interval: %v)...", pollInterval)
|
|
r.Start(pollInterval)
|
|
}
|
|
|
|
func generateShortID() string {
|
|
bytes := make([]byte, 4)
|
|
if _, err := rand.Read(bytes); err != nil {
|
|
return fmt.Sprintf("%x", os.Getpid()^int(time.Now().Unix()))
|
|
}
|
|
return hex.EncodeToString(bytes)
|
|
}
|