Implement GPU backend detection for Blender compatibility
- Added functionality to detect GPU backends (HIP and NVIDIA) during runner registration, enhancing compatibility for Blender versions below 4.x. - Introduced a new method, DetectAndStoreGPUBackends, to download the latest Blender and run a detection script, storing the results for future rendering decisions. - Updated rendering logic to force CPU rendering when HIP is detected on systems with Blender < 4.x, ensuring stability and compatibility. - Enhanced the Context structure to include flags for GPU detection status, improving error handling and rendering decisions based on GPU availability.
This commit is contained in:
@@ -11,6 +11,8 @@ import (
|
||||
"jiggablend/pkg/executils"
|
||||
"jiggablend/pkg/types"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
@@ -41,6 +43,10 @@ type Context struct {
|
||||
|
||||
// GPULockedOut is set when the runner has detected a GPU error (e.g. HIP) and disables GPU for all jobs.
|
||||
GPULockedOut bool
|
||||
// HasHIP is true when the runner detected HIP (AMD) devices at startup. Used to force CPU for Blender < 4.x only when HIP is present.
|
||||
HasHIP bool
|
||||
// GPUDetectionFailed is true when startup GPU backend detection could not run; we force CPU for all versions (could not determine HIP vs NVIDIA).
|
||||
GPUDetectionFailed bool
|
||||
// OnGPUError is called when a GPU error line is seen in render logs; typically sets runner GPU lockout.
|
||||
OnGPUError func()
|
||||
}
|
||||
@@ -49,7 +55,7 @@ type Context struct {
|
||||
var ErrJobCancelled = errors.New("job cancelled")
|
||||
|
||||
// NewContext creates a new task context. frameEnd should be >= frame; if 0 or less than frame, it is treated as single-frame (frameEnd = frame).
|
||||
// gpuLockedOut is the runner's current GPU lockout state; onGPUError is called when a GPU error is detected in logs (may be nil).
|
||||
// gpuLockedOut is the runner's current GPU lockout state; hasHIP means the runner has HIP (AMD) devices (force CPU for Blender < 4.x only when true); gpuDetectionFailed means detection failed at startup (force CPU for all versions—could not determine HIP vs NVIDIA); onGPUError is called when a GPU error is detected in logs (may be nil).
|
||||
func NewContext(
|
||||
taskID, jobID int64,
|
||||
jobName string,
|
||||
@@ -65,29 +71,33 @@ func NewContext(
|
||||
encoder *encoding.Selector,
|
||||
processes *executils.ProcessTracker,
|
||||
gpuLockedOut bool,
|
||||
hasHIP bool,
|
||||
gpuDetectionFailed bool,
|
||||
onGPUError func(),
|
||||
) *Context {
|
||||
if frameEnd < frameStart {
|
||||
frameEnd = frameStart
|
||||
}
|
||||
return &Context{
|
||||
TaskID: taskID,
|
||||
JobID: jobID,
|
||||
JobName: jobName,
|
||||
Frame: frameStart,
|
||||
FrameEnd: frameEnd,
|
||||
TaskType: taskType,
|
||||
WorkDir: workDir,
|
||||
JobToken: jobToken,
|
||||
Metadata: metadata,
|
||||
Manager: manager,
|
||||
JobConn: jobConn,
|
||||
Workspace: ws,
|
||||
Blender: blenderMgr,
|
||||
Encoder: encoder,
|
||||
Processes: processes,
|
||||
GPULockedOut: gpuLockedOut,
|
||||
OnGPUError: onGPUError,
|
||||
TaskID: taskID,
|
||||
JobID: jobID,
|
||||
JobName: jobName,
|
||||
Frame: frameStart,
|
||||
FrameEnd: frameEnd,
|
||||
TaskType: taskType,
|
||||
WorkDir: workDir,
|
||||
JobToken: jobToken,
|
||||
Metadata: metadata,
|
||||
Manager: manager,
|
||||
JobConn: jobConn,
|
||||
Workspace: ws,
|
||||
Blender: blenderMgr,
|
||||
Encoder: encoder,
|
||||
Processes: processes,
|
||||
GPULockedOut: gpuLockedOut,
|
||||
HasHIP: hasHIP,
|
||||
GPUDetectionFailed: gpuDetectionFailed,
|
||||
OnGPUError: onGPUError,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -169,11 +179,23 @@ func (c *Context) ShouldEnableExecution() bool {
|
||||
}
|
||||
|
||||
// ShouldForceCPU returns true if GPU should be disabled and CPU rendering forced
|
||||
// (runner GPU lockout or metadata force_cpu in engine_settings).
|
||||
// (runner GPU lockout, GPU detection failed at startup for any version, metadata force_cpu,
|
||||
// or Blender < 4.x when the runner has HIP).
|
||||
func (c *Context) ShouldForceCPU() bool {
|
||||
if c.GPULockedOut {
|
||||
return true
|
||||
}
|
||||
// Detection failed at startup: we could not determine HIP vs NVIDIA, so force CPU for all versions.
|
||||
if c.GPUDetectionFailed {
|
||||
return true
|
||||
}
|
||||
v := c.GetBlenderVersion()
|
||||
major := parseBlenderMajor(v)
|
||||
isPre4 := v != "" && major >= 0 && major < 4
|
||||
// Blender < 4.x: force CPU when runner has HIP (no official HIP support).
|
||||
if isPre4 && c.HasHIP {
|
||||
return true
|
||||
}
|
||||
if c.Metadata != nil && c.Metadata.RenderSettings.EngineSettings != nil {
|
||||
if v, ok := c.Metadata.RenderSettings.EngineSettings["force_cpu"]; ok {
|
||||
if b, ok := v.(bool); ok && b {
|
||||
@@ -184,6 +206,21 @@ func (c *Context) ShouldForceCPU() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// parseBlenderMajor returns the major version number from a string like "4.2.3" or "3.6".
|
||||
// Returns -1 if the version cannot be parsed.
|
||||
func parseBlenderMajor(version string) int {
|
||||
version = strings.TrimSpace(version)
|
||||
if version == "" {
|
||||
return -1
|
||||
}
|
||||
parts := strings.SplitN(version, ".", 2)
|
||||
major, err := strconv.Atoi(parts[0])
|
||||
if err != nil {
|
||||
return -1
|
||||
}
|
||||
return major
|
||||
}
|
||||
|
||||
// IsJobCancelled checks whether the manager marked this job as cancelled.
|
||||
func (c *Context) IsJobCancelled() (bool, error) {
|
||||
if c.Manager == nil {
|
||||
|
||||
@@ -104,7 +104,16 @@ func (p *RenderProcessor) Process(ctx *Context) error {
|
||||
renderFormat := "EXR"
|
||||
|
||||
if ctx.ShouldForceCPU() {
|
||||
ctx.Info("GPU lockout active: using CPU rendering only")
|
||||
v := ctx.GetBlenderVersion()
|
||||
major := parseBlenderMajor(v)
|
||||
isPre4 := v != "" && major >= 0 && major < 4
|
||||
if ctx.GPUDetectionFailed {
|
||||
ctx.Info("GPU backend detection failed at startup—we could not determine whether this machine has HIP (AMD) or NVIDIA GPUs, so rendering will use CPU to avoid compatibility issues")
|
||||
} else if isPre4 && ctx.HasHIP {
|
||||
ctx.Info("Blender < 4.x has no official HIP support: using CPU rendering only")
|
||||
} else {
|
||||
ctx.Info("GPU lockout active: using CPU rendering only")
|
||||
}
|
||||
}
|
||||
|
||||
// Create render script
|
||||
@@ -217,9 +226,9 @@ func (p *RenderProcessor) runBlender(ctx *Context, blenderBinary, blendFile, out
|
||||
cmd := exec.Command(blenderBinary, args...)
|
||||
cmd.Dir = ctx.WorkDir
|
||||
|
||||
// Set up environment with custom HOME directory
|
||||
// Set up environment: LD_LIBRARY_PATH for tarball Blender, then custom HOME
|
||||
env := os.Environ()
|
||||
// Remove existing HOME if present and add our custom one
|
||||
env = blender.TarballEnv(blenderBinary, env)
|
||||
newEnv := make([]string, 0, len(env)+1)
|
||||
for _, e := range env {
|
||||
if !strings.HasPrefix(e, "HOME=") {
|
||||
|
||||
Reference in New Issue
Block a user