From 5303f01f7c3648d38dc7dc7a360e6aa4063185df Mon Sep 17 00:00:00 2001 From: Justin Harms Date: Fri, 13 Mar 2026 18:32:05 -0500 Subject: [PATCH] Implement GPU backend detection for Blender compatibility - Added functionality to detect GPU backends (HIP and NVIDIA) during runner registration, enhancing compatibility for Blender versions below 4.x. - Introduced a new method, DetectAndStoreGPUBackends, to download the latest Blender and run a detection script, storing the results for future rendering decisions. - Updated rendering logic to force CPU rendering when HIP is detected on systems with Blender < 4.x, ensuring stability and compatibility. - Enhanced the Context structure to include flags for GPU detection status, improving error handling and rendering decisions based on GPU availability. --- cmd/jiggablend/cmd/runner.go | 3 + internal/manager/jobs.go | 6 +- internal/runner/api/manager.go | 29 +++++++++ internal/runner/blender/binary.go | 40 ++++++++++++ internal/runner/blender/detect.go | 45 +++++++++++++ internal/runner/runner.go | 63 ++++++++++++++++++ internal/runner/tasks/processor.go | 75 ++++++++++++++++------ internal/runner/tasks/render.go | 15 ++++- pkg/scripts/scripts.go | 3 + pkg/scripts/scripts/detect_gpu_backends.py | 39 +++++++++++ 10 files changed, 294 insertions(+), 24 deletions(-) create mode 100644 internal/runner/blender/detect.go create mode 100644 pkg/scripts/scripts/detect_gpu_backends.py diff --git a/cmd/jiggablend/cmd/runner.go b/cmd/jiggablend/cmd/runner.go index ea30212..167b5ef 100644 --- a/cmd/jiggablend/cmd/runner.go +++ b/cmd/jiggablend/cmd/runner.go @@ -161,6 +161,9 @@ func runRunner(cmd *cobra.Command, args []string) { runnerID, err = r.Register(apiKey) if err == nil { logger.Infof("Registered runner with ID: %d", runnerID) + // Download latest Blender and detect HIP vs NVIDIA so we only force CPU for Blender < 4.x when using HIP + logger.Info("Detecting GPU backends (HIP/NVIDIA) for Blender < 4.x policy...") + r.DetectAndStoreGPUBackends() break } diff --git a/internal/manager/jobs.go b/internal/manager/jobs.go index 8189af6..a15f9a9 100644 --- a/internal/manager/jobs.go +++ b/internal/manager/jobs.go @@ -23,6 +23,7 @@ import ( "time" authpkg "jiggablend/internal/auth" + "jiggablend/internal/runner/blender" "jiggablend/pkg/executils" "jiggablend/pkg/scripts" "jiggablend/pkg/types" @@ -2036,12 +2037,13 @@ func (s *Manager) runBlenderMetadataExtraction(blendFile, workDir, blenderVersio } } - // Execute Blender using executils + // Execute Blender using executils (set LD_LIBRARY_PATH for tarball installs) + runEnv := blender.TarballEnv(blenderBinary, os.Environ()) result, err := executils.RunCommand( blenderBinary, []string{"-b", blendFileRel, "--python", "extract_metadata.py"}, workDir, - nil, // inherit environment + runEnv, 0, // no task ID for metadata extraction nil, // no process tracker needed ) diff --git a/internal/runner/api/manager.go b/internal/runner/api/manager.go index c735dc3..0b93523 100644 --- a/internal/runner/api/manager.go +++ b/internal/runner/api/manager.go @@ -442,3 +442,32 @@ func (m *ManagerClient) DownloadBlender(version string) (io.ReadCloser, error) { return resp.Body, nil } + +// blenderVersionsResponse is the response from GET /api/blender/versions. +type blenderVersionsResponse struct { + Versions []struct { + Full string `json:"full"` + } `json:"versions"` +} + +// GetLatestBlenderVersion returns the latest Blender version string (e.g. "4.2.3") from the manager. +// Uses the flat versions list which is newest-first. +func (m *ManagerClient) GetLatestBlenderVersion() (string, error) { + resp, err := m.Request("GET", "/api/blender/versions", nil) + if err != nil { + return "", fmt.Errorf("failed to fetch blender versions: %w", err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return "", fmt.Errorf("blender versions returned status %d: %s", resp.StatusCode, string(body)) + } + var out blenderVersionsResponse + if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { + return "", fmt.Errorf("failed to decode blender versions: %w", err) + } + if len(out.Versions) == 0 { + return "", fmt.Errorf("no blender versions available") + } + return out.Versions[0].Full, nil +} diff --git a/internal/runner/blender/binary.go b/internal/runner/blender/binary.go index d00c958..9e42e20 100644 --- a/internal/runner/blender/binary.go +++ b/internal/runner/blender/binary.go @@ -6,6 +6,7 @@ import ( "log" "os" "path/filepath" + "strings" "jiggablend/internal/runner/api" "jiggablend/internal/runner/workspace" @@ -85,3 +86,42 @@ func (m *Manager) GetBinaryForJob(version string) (string, error) { return m.GetBinaryPath(version) } +// TarballEnv returns a copy of baseEnv with LD_LIBRARY_PATH set so that a +// tarball Blender installation can find its bundled libs (e.g. lib/python3.x). +// If blenderBinary is the system "blender" or has no path component, baseEnv is +// returned unchanged. +func TarballEnv(blenderBinary string, baseEnv []string) []string { + if blenderBinary == "" || blenderBinary == "blender" { + return baseEnv + } + if !strings.Contains(blenderBinary, string(os.PathSeparator)) { + return baseEnv + } + blenderDir := filepath.Dir(blenderBinary) + libDir := filepath.Join(blenderDir, "lib") + ldLib := libDir + for _, e := range baseEnv { + if strings.HasPrefix(e, "LD_LIBRARY_PATH=") { + existing := strings.TrimPrefix(e, "LD_LIBRARY_PATH=") + if existing != "" { + ldLib = libDir + ":" + existing + } + break + } + } + out := make([]string, 0, len(baseEnv)+1) + done := false + for _, e := range baseEnv { + if strings.HasPrefix(e, "LD_LIBRARY_PATH=") { + out = append(out, "LD_LIBRARY_PATH="+ldLib) + done = true + continue + } + out = append(out, e) + } + if !done { + out = append(out, "LD_LIBRARY_PATH="+ldLib) + } + return out +} + diff --git a/internal/runner/blender/detect.go b/internal/runner/blender/detect.go new file mode 100644 index 0000000..8a2bfb3 --- /dev/null +++ b/internal/runner/blender/detect.go @@ -0,0 +1,45 @@ +// Package blender: GPU backend detection for HIP vs NVIDIA. +package blender + +import ( + "bufio" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "jiggablend/pkg/scripts" +) + +// DetectGPUBackends runs a minimal Blender script to detect whether HIP (AMD) and/or +// NVIDIA (CUDA/OptiX) devices are available. Use this to decide whether to force CPU +// for Blender < 4.x (only force when HIP is present, since HIP has no official support pre-4). +func DetectGPUBackends(blenderBinary, scriptDir string) (hasHIP, hasNVIDIA bool, err error) { + scriptPath := filepath.Join(scriptDir, "detect_gpu_backends.py") + if err := os.WriteFile(scriptPath, []byte(scripts.DetectGPUBackends), 0644); err != nil { + return false, false, fmt.Errorf("write detection script: %w", err) + } + defer os.Remove(scriptPath) + + env := TarballEnv(blenderBinary, os.Environ()) + cmd := exec.Command(blenderBinary, "-b", "--python", scriptPath) + cmd.Env = env + cmd.Dir = scriptDir + out, err := cmd.CombinedOutput() + if err != nil { + return false, false, fmt.Errorf("run blender detection: %w (output: %s)", err, string(out)) + } + + scanner := bufio.NewScanner(strings.NewReader(string(out))) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + switch line { + case "HAS_HIP": + hasHIP = true + case "HAS_NVIDIA": + hasNVIDIA = true + } + } + return hasHIP, hasNVIDIA, scanner.Err() +} diff --git a/internal/runner/runner.go b/internal/runner/runner.go index 6a46fea..507c10f 100644 --- a/internal/runner/runner.go +++ b/internal/runner/runner.go @@ -45,6 +45,15 @@ type Runner struct { // when true, the runner forces CPU rendering for all subsequent jobs. gpuLockedOut bool gpuLockedOutMu sync.RWMutex + + // hasHIP/hasNVIDIA are set at startup by running latest Blender to detect GPU backends. + // Used to force CPU only for Blender < 4.x when HIP is present (no official HIP support pre-4). + // gpuDetectionFailed is true when detection could not run; we then force CPU for all versions (we could not determine HIP vs NVIDIA). + gpuBackendMu sync.RWMutex + hasHIP bool + hasNVIDIA bool + gpuBackendProbed bool + gpuDetectionFailed bool } // New creates a new runner. @@ -124,6 +133,58 @@ func (r *Runner) Register(apiKey string) (int64, error) { return id, nil } +// DetectAndStoreGPUBackends downloads the latest Blender from the manager (if needed), +// runs a detection script to see if HIP (AMD) and/or NVIDIA devices are available, +// and stores the result. Call after Register. Used so we only force CPU for Blender < 4.x +// when the runner has HIP (no official HIP support pre-4); NVIDIA is allowed. +func (r *Runner) DetectAndStoreGPUBackends() { + r.gpuBackendMu.Lock() + defer r.gpuBackendMu.Unlock() + if r.gpuBackendProbed { + return + } + latestVer, err := r.manager.GetLatestBlenderVersion() + if err != nil { + log.Printf("GPU backend detection failed (could not get latest Blender version: %v). All jobs will use CPU because we could not determine HIP vs NVIDIA.", err) + r.gpuBackendProbed = true + r.gpuDetectionFailed = true + return + } + binaryPath, err := r.blender.GetBinaryPath(latestVer) + if err != nil { + log.Printf("GPU backend detection failed (could not get Blender binary: %v). All jobs will use CPU because we could not determine HIP vs NVIDIA.", err) + r.gpuBackendProbed = true + r.gpuDetectionFailed = true + return + } + hasHIP, hasNVIDIA, err := blender.DetectGPUBackends(binaryPath, r.workspace.BaseDir()) + if err != nil { + log.Printf("GPU backend detection failed (script error: %v). All jobs will use CPU because we could not determine HIP vs NVIDIA.", err) + r.gpuBackendProbed = true + r.gpuDetectionFailed = true + return + } + r.hasHIP = hasHIP + r.hasNVIDIA = hasNVIDIA + r.gpuBackendProbed = true + r.gpuDetectionFailed = false + log.Printf("GPU backend detection: HIP=%v NVIDIA=%v (Blender < 4.x will force CPU only when HIP is present)", hasHIP, hasNVIDIA) +} + +// HasHIP returns whether the runner detected HIP (AMD) devices. Used to force CPU for Blender < 4.x only when HIP is present. +func (r *Runner) HasHIP() bool { + r.gpuBackendMu.RLock() + defer r.gpuBackendMu.RUnlock() + return r.hasHIP +} + +// GPUDetectionFailed returns true when startup GPU backend detection could not run or failed. When true, all jobs use CPU because we could not determine HIP vs NVIDIA. +func (r *Runner) GPUDetectionFailed() bool { + r.gpuBackendMu.RLock() + defer r.gpuBackendMu.RUnlock() + return r.gpuDetectionFailed +} + // Start starts the job polling loop. func (r *Runner) Start(pollInterval time.Duration) { log.Printf("Starting job polling loop (interval: %v)", pollInterval) @@ -244,6 +305,8 @@ func (r *Runner) executeJob(job *api.NextJobResponse) (err error) { r.encoder, r.processes, r.IsGPULockedOut(), + r.HasHIP(), + r.GPUDetectionFailed(), func() { r.SetGPULockedOut(true) }, ) diff --git a/internal/runner/tasks/processor.go b/internal/runner/tasks/processor.go index b607bbe..f17be6c 100644 --- a/internal/runner/tasks/processor.go +++ b/internal/runner/tasks/processor.go @@ -11,6 +11,8 @@ import ( "jiggablend/pkg/executils" "jiggablend/pkg/types" "os/exec" + "strconv" + "strings" "sync" "time" ) @@ -41,6 +43,10 @@ type Context struct { // GPULockedOut is set when the runner has detected a GPU error (e.g. HIP) and disables GPU for all jobs. GPULockedOut bool + // HasHIP is true when the runner detected HIP (AMD) devices at startup. Used to force CPU for Blender < 4.x only when HIP is present. + HasHIP bool + // GPUDetectionFailed is true when startup GPU backend detection could not run; we force CPU for all versions (could not determine HIP vs NVIDIA). + GPUDetectionFailed bool // OnGPUError is called when a GPU error line is seen in render logs; typically sets runner GPU lockout. OnGPUError func() } @@ -49,7 +55,7 @@ type Context struct { var ErrJobCancelled = errors.New("job cancelled") // NewContext creates a new task context. frameEnd should be >= frame; if 0 or less than frame, it is treated as single-frame (frameEnd = frame). -// gpuLockedOut is the runner's current GPU lockout state; onGPUError is called when a GPU error is detected in logs (may be nil). +// gpuLockedOut is the runner's current GPU lockout state; hasHIP means the runner has HIP (AMD) devices (force CPU for Blender < 4.x only when true); gpuDetectionFailed means detection failed at startup (force CPU for all versions—could not determine HIP vs NVIDIA); onGPUError is called when a GPU error is detected in logs (may be nil). func NewContext( taskID, jobID int64, jobName string, @@ -65,29 +71,33 @@ func NewContext( encoder *encoding.Selector, processes *executils.ProcessTracker, gpuLockedOut bool, + hasHIP bool, + gpuDetectionFailed bool, onGPUError func(), ) *Context { if frameEnd < frameStart { frameEnd = frameStart } return &Context{ - TaskID: taskID, - JobID: jobID, - JobName: jobName, - Frame: frameStart, - FrameEnd: frameEnd, - TaskType: taskType, - WorkDir: workDir, - JobToken: jobToken, - Metadata: metadata, - Manager: manager, - JobConn: jobConn, - Workspace: ws, - Blender: blenderMgr, - Encoder: encoder, - Processes: processes, - GPULockedOut: gpuLockedOut, - OnGPUError: onGPUError, + TaskID: taskID, + JobID: jobID, + JobName: jobName, + Frame: frameStart, + FrameEnd: frameEnd, + TaskType: taskType, + WorkDir: workDir, + JobToken: jobToken, + Metadata: metadata, + Manager: manager, + JobConn: jobConn, + Workspace: ws, + Blender: blenderMgr, + Encoder: encoder, + Processes: processes, + GPULockedOut: gpuLockedOut, + HasHIP: hasHIP, + GPUDetectionFailed: gpuDetectionFailed, + OnGPUError: onGPUError, } } @@ -169,11 +179,23 @@ func (c *Context) ShouldEnableExecution() bool { } // ShouldForceCPU returns true if GPU should be disabled and CPU rendering forced -// (runner GPU lockout or metadata force_cpu in engine_settings). +// (runner GPU lockout, GPU detection failed at startup for any version, metadata force_cpu, +// or Blender < 4.x when the runner has HIP). func (c *Context) ShouldForceCPU() bool { if c.GPULockedOut { return true } + // Detection failed at startup: we could not determine HIP vs NVIDIA, so force CPU for all versions. + if c.GPUDetectionFailed { + return true + } + v := c.GetBlenderVersion() + major := parseBlenderMajor(v) + isPre4 := v != "" && major >= 0 && major < 4 + // Blender < 4.x: force CPU when runner has HIP (no official HIP support). + if isPre4 && c.HasHIP { + return true + } if c.Metadata != nil && c.Metadata.RenderSettings.EngineSettings != nil { if v, ok := c.Metadata.RenderSettings.EngineSettings["force_cpu"]; ok { if b, ok := v.(bool); ok && b { @@ -184,6 +206,21 @@ func (c *Context) ShouldForceCPU() bool { return false } +// parseBlenderMajor returns the major version number from a string like "4.2.3" or "3.6". +// Returns -1 if the version cannot be parsed. +func parseBlenderMajor(version string) int { + version = strings.TrimSpace(version) + if version == "" { + return -1 + } + parts := strings.SplitN(version, ".", 2) + major, err := strconv.Atoi(parts[0]) + if err != nil { + return -1 + } + return major +} + // IsJobCancelled checks whether the manager marked this job as cancelled. func (c *Context) IsJobCancelled() (bool, error) { if c.Manager == nil { diff --git a/internal/runner/tasks/render.go b/internal/runner/tasks/render.go index 46fb484..20f6bee 100644 --- a/internal/runner/tasks/render.go +++ b/internal/runner/tasks/render.go @@ -104,7 +104,16 @@ func (p *RenderProcessor) Process(ctx *Context) error { renderFormat := "EXR" if ctx.ShouldForceCPU() { - ctx.Info("GPU lockout active: using CPU rendering only") + v := ctx.GetBlenderVersion() + major := parseBlenderMajor(v) + isPre4 := v != "" && major >= 0 && major < 4 + if ctx.GPUDetectionFailed { + ctx.Info("GPU backend detection failed at startup—we could not determine whether this machine has HIP (AMD) or NVIDIA GPUs, so rendering will use CPU to avoid compatibility issues") + } else if isPre4 && ctx.HasHIP { + ctx.Info("Blender < 4.x has no official HIP support: using CPU rendering only") + } else { + ctx.Info("GPU lockout active: using CPU rendering only") + } } // Create render script @@ -217,9 +226,9 @@ func (p *RenderProcessor) runBlender(ctx *Context, blenderBinary, blendFile, out cmd := exec.Command(blenderBinary, args...) cmd.Dir = ctx.WorkDir - // Set up environment with custom HOME directory + // Set up environment: LD_LIBRARY_PATH for tarball Blender, then custom HOME env := os.Environ() - // Remove existing HOME if present and add our custom one + env = blender.TarballEnv(blenderBinary, env) newEnv := make([]string, 0, len(env)+1) for _, e := range env { if !strings.HasPrefix(e, "HOME=") { diff --git a/pkg/scripts/scripts.go b/pkg/scripts/scripts.go index ce78d1f..2702474 100644 --- a/pkg/scripts/scripts.go +++ b/pkg/scripts/scripts.go @@ -11,3 +11,6 @@ var UnhideObjects string //go:embed scripts/render_blender.py.template var RenderBlenderTemplate string +//go:embed scripts/detect_gpu_backends.py +var DetectGPUBackends string + diff --git a/pkg/scripts/scripts/detect_gpu_backends.py b/pkg/scripts/scripts/detect_gpu_backends.py new file mode 100644 index 0000000..0ab0f6f --- /dev/null +++ b/pkg/scripts/scripts/detect_gpu_backends.py @@ -0,0 +1,39 @@ +# Minimal script to detect HIP (AMD) and NVIDIA (CUDA/OptiX) backends for Cycles. +# Run with: blender -b --python detect_gpu_backends.py +# Prints HAS_HIP and/or HAS_NVIDIA to stdout, one per line. +import sys + +def main(): + try: + prefs = bpy.context.preferences + if not hasattr(prefs, 'addons') or 'cycles' not in prefs.addons: + return + cprefs = prefs.addons['cycles'].preferences + has_hip = False + has_nvidia = False + for device_type in ('HIP', 'CUDA', 'OPTIX'): + try: + cprefs.compute_device_type = device_type + cprefs.refresh_devices() + devs = [] + if hasattr(cprefs, 'get_devices'): + devs = cprefs.get_devices() + elif hasattr(cprefs, 'devices') and cprefs.devices: + devs = list(cprefs.devices) if hasattr(cprefs.devices, '__iter__') else [cprefs.devices] + if devs: + if device_type == 'HIP': + has_hip = True + if device_type in ('CUDA', 'OPTIX'): + has_nvidia = True + except Exception: + pass + if has_hip: + print('HAS_HIP', flush=True) + if has_nvidia: + print('HAS_NVIDIA', flush=True) + except Exception as e: + print('ERROR', str(e), file=sys.stderr, flush=True) + sys.exit(1) + +import bpy +main()