4 Commits

Author SHA1 Message Date
5303f01f7c Implement GPU backend detection for Blender compatibility
- Added functionality to detect GPU backends (HIP and NVIDIA) during runner registration, enhancing compatibility for Blender versions below 4.x.
- Introduced a new method, DetectAndStoreGPUBackends, to download the latest Blender and run a detection script, storing the results for future rendering decisions.
- Updated rendering logic to force CPU rendering when HIP is detected on systems with Blender < 4.x, ensuring stability and compatibility.
- Enhanced the Context structure to include flags for GPU detection status, improving error handling and rendering decisions based on GPU availability.
2026-03-13 18:32:05 -05:00
bc39fd438b Add installation script for jiggablend binary
- Introduced a new installer.sh script to automate the installation of the latest jiggablend binary for Linux AMD64.
- The script fetches the latest release information, downloads the binary and its checksums, verifies the checksum, and installs the binary and wrapper scripts for the manager and runner.
- Added wrapper scripts for both the manager and runner with test setup instructions, enhancing user experience for initial setup.
2026-03-13 10:26:21 -05:00
4c7f168bce Enhance GPU error detection in RenderProcessor
- Updated gpuErrorSubstrings to include case-insensitive matching for GPU backend errors, improving error detection reliability.
- Modified checkGPUErrorLine to convert log lines to lowercase before checking for error indicators, ensuring consistent matching across different log formats.
2026-03-13 10:26:13 -05:00
6833bb4013 Add GPU error handling and lockout mechanism in Runner
- Introduced gpuLockedOut state in Runner to manage GPU rendering based on detected errors.
- Implemented SetGPULockedOut and IsGPULockedOut methods for controlling GPU usage.
- Enhanced Context to include GPULockedOut and OnGPUError for better error handling.
- Updated RenderProcessor to check for GPU errors in logs and trigger lockout as needed.
- Modified rendering logic to force CPU rendering when GPU lockout is active, improving stability during errors.
2026-03-13 10:01:39 -05:00
11 changed files with 494 additions and 26 deletions

View File

@@ -161,6 +161,9 @@ func runRunner(cmd *cobra.Command, args []string) {
runnerID, err = r.Register(apiKey) runnerID, err = r.Register(apiKey)
if err == nil { if err == nil {
logger.Infof("Registered runner with ID: %d", runnerID) logger.Infof("Registered runner with ID: %d", runnerID)
// Download latest Blender and detect HIP vs NVIDIA so we only force CPU for Blender < 4.x when using HIP
logger.Info("Detecting GPU backends (HIP/NVIDIA) for Blender < 4.x policy...")
r.DetectAndStoreGPUBackends()
break break
} }

106
installer.sh Normal file
View File

@@ -0,0 +1,106 @@
#!/bin/bash
set -euo pipefail
# Simple script to install the latest jiggablend binary for Linux AMD64
# and create wrapper scripts for manager and runner using test setup
# Dependencies: curl, jq, tar, sha256sum, sudo (for installation to /usr/local/bin)
REPO="s1d3sw1ped/jiggablend"
API_URL="https://git.s1d3sw1ped.com/api/v1/repos/${REPO}/releases/latest"
ASSET_NAME="jiggablend-linux-amd64.tar.gz"
echo "Fetching latest release information..."
RELEASE_JSON=$(curl -s "$API_URL")
TAG=$(echo "$RELEASE_JSON" | jq -r '.tag_name')
echo "Latest version: $TAG"
ASSET_URL=$(echo "$RELEASE_JSON" | jq -r ".assets[] | select(.name == \"$ASSET_NAME\") | .browser_download_url")
if [ -z "$ASSET_URL" ]; then
echo "Error: Asset $ASSET_NAME not found in latest release."
exit 1
fi
CHECKSUM_URL=$(echo "$RELEASE_JSON" | jq -r '.assets[] | select(.name == "checksums.txt") | .browser_download_url')
if [ -z "$CHECKSUM_URL" ]; then
echo "Error: checksums.txt not found in latest release."
exit 1
fi
echo "Downloading $ASSET_NAME..."
curl -L -o "$ASSET_NAME" "$ASSET_URL"
echo "Downloading checksums.txt..."
curl -L -o "checksums.txt" "$CHECKSUM_URL"
echo "Verifying checksum..."
if ! sha256sum --ignore-missing --quiet -c checksums.txt; then
echo "Error: Checksum verification failed."
rm -f "$ASSET_NAME" checksums.txt
exit 1
fi
echo "Extracting..."
tar -xzf "$ASSET_NAME"
echo "Installing binary to /usr/local/bin (requires sudo)..."
sudo install -m 0755 jiggablend /usr/local/bin/
echo "Creating manager wrapper script..."
cat << 'EOF' > jiggablend-manager.sh
#!/bin/bash
set -euo pipefail
# Wrapper to run jiggablend manager with test setup
# Run this in a directory where you want the db, storage, and logs
mkdir -p logs
rm -f logs/manager.log
# Initialize test configuration
jiggablend manager config enable localauth
jiggablend manager config set fixed-apikey jk_r0_test_key_123456789012345678901234567890 -f -y
jiggablend manager config add user test@example.com testpassword --admin -f -y
# Run manager
jiggablend manager -l logs/manager.log
EOF
chmod +x jiggablend-manager.sh
sudo install -m 0755 jiggablend-manager.sh /usr/local/bin/jiggablend-manager
rm -f jiggablend-manager.sh
echo "Creating runner wrapper script..."
cat << 'EOF' > jiggablend-runner.sh
#!/bin/bash
set -euo pipefail
# Wrapper to run jiggablend runner with test setup
# Usage: jiggablend-runner [MANAGER_URL]
# Default MANAGER_URL: http://localhost:8080
# Run this in a directory where you want the logs
MANAGER_URL="${1:-http://localhost:8080}"
mkdir -p logs
rm -f logs/runner.log
# Run runner
jiggablend runner -l logs/runner.log --api-key=jk_r0_test_key_123456789012345678901234567890 --manager "$MANAGER_URL"
EOF
chmod +x jiggablend-runner.sh
sudo install -m 0755 jiggablend-runner.sh /usr/local/bin/jiggablend-runner
rm -f jiggablend-runner.sh
echo "Cleaning up..."
rm -f "$ASSET_NAME" checksums.txt jiggablend
echo "Installation complete!"
echo "Binary: jiggablend"
echo "Wrappers: jiggablend-manager, jiggablend-runner"
echo "Run 'jiggablend-manager' to start the manager with test config."
echo "Run 'jiggablend-runner [url]' to start the runner, e.g., jiggablend-runner http://your-manager:8080"
echo "Note: Depending on whether you're running the manager or runner, additional dependencies like Blender, ImageMagick, or FFmpeg may be required. See the project README for details."

View File

@@ -23,6 +23,7 @@ import (
"time" "time"
authpkg "jiggablend/internal/auth" authpkg "jiggablend/internal/auth"
"jiggablend/internal/runner/blender"
"jiggablend/pkg/executils" "jiggablend/pkg/executils"
"jiggablend/pkg/scripts" "jiggablend/pkg/scripts"
"jiggablend/pkg/types" "jiggablend/pkg/types"
@@ -2036,12 +2037,13 @@ func (s *Manager) runBlenderMetadataExtraction(blendFile, workDir, blenderVersio
} }
} }
// Execute Blender using executils // Execute Blender using executils (set LD_LIBRARY_PATH for tarball installs)
runEnv := blender.TarballEnv(blenderBinary, os.Environ())
result, err := executils.RunCommand( result, err := executils.RunCommand(
blenderBinary, blenderBinary,
[]string{"-b", blendFileRel, "--python", "extract_metadata.py"}, []string{"-b", blendFileRel, "--python", "extract_metadata.py"},
workDir, workDir,
nil, // inherit environment runEnv,
0, // no task ID for metadata extraction 0, // no task ID for metadata extraction
nil, // no process tracker needed nil, // no process tracker needed
) )

View File

@@ -442,3 +442,32 @@ func (m *ManagerClient) DownloadBlender(version string) (io.ReadCloser, error) {
return resp.Body, nil return resp.Body, nil
} }
// blenderVersionsResponse is the response from GET /api/blender/versions.
type blenderVersionsResponse struct {
Versions []struct {
Full string `json:"full"`
} `json:"versions"`
}
// GetLatestBlenderVersion returns the latest Blender version string (e.g. "4.2.3") from the manager.
// Uses the flat versions list which is newest-first.
func (m *ManagerClient) GetLatestBlenderVersion() (string, error) {
resp, err := m.Request("GET", "/api/blender/versions", nil)
if err != nil {
return "", fmt.Errorf("failed to fetch blender versions: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return "", fmt.Errorf("blender versions returned status %d: %s", resp.StatusCode, string(body))
}
var out blenderVersionsResponse
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
return "", fmt.Errorf("failed to decode blender versions: %w", err)
}
if len(out.Versions) == 0 {
return "", fmt.Errorf("no blender versions available")
}
return out.Versions[0].Full, nil
}

View File

@@ -6,6 +6,7 @@ import (
"log" "log"
"os" "os"
"path/filepath" "path/filepath"
"strings"
"jiggablend/internal/runner/api" "jiggablend/internal/runner/api"
"jiggablend/internal/runner/workspace" "jiggablend/internal/runner/workspace"
@@ -85,3 +86,42 @@ func (m *Manager) GetBinaryForJob(version string) (string, error) {
return m.GetBinaryPath(version) return m.GetBinaryPath(version)
} }
// TarballEnv returns a copy of baseEnv with LD_LIBRARY_PATH set so that a
// tarball Blender installation can find its bundled libs (e.g. lib/python3.x).
// If blenderBinary is the system "blender" or has no path component, baseEnv is
// returned unchanged.
func TarballEnv(blenderBinary string, baseEnv []string) []string {
if blenderBinary == "" || blenderBinary == "blender" {
return baseEnv
}
if !strings.Contains(blenderBinary, string(os.PathSeparator)) {
return baseEnv
}
blenderDir := filepath.Dir(blenderBinary)
libDir := filepath.Join(blenderDir, "lib")
ldLib := libDir
for _, e := range baseEnv {
if strings.HasPrefix(e, "LD_LIBRARY_PATH=") {
existing := strings.TrimPrefix(e, "LD_LIBRARY_PATH=")
if existing != "" {
ldLib = libDir + ":" + existing
}
break
}
}
out := make([]string, 0, len(baseEnv)+1)
done := false
for _, e := range baseEnv {
if strings.HasPrefix(e, "LD_LIBRARY_PATH=") {
out = append(out, "LD_LIBRARY_PATH="+ldLib)
done = true
continue
}
out = append(out, e)
}
if !done {
out = append(out, "LD_LIBRARY_PATH="+ldLib)
}
return out
}

View File

@@ -0,0 +1,45 @@
// Package blender: GPU backend detection for HIP vs NVIDIA.
package blender
import (
"bufio"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"jiggablend/pkg/scripts"
)
// DetectGPUBackends runs a minimal Blender script to detect whether HIP (AMD) and/or
// NVIDIA (CUDA/OptiX) devices are available. Use this to decide whether to force CPU
// for Blender < 4.x (only force when HIP is present, since HIP has no official support pre-4).
func DetectGPUBackends(blenderBinary, scriptDir string) (hasHIP, hasNVIDIA bool, err error) {
scriptPath := filepath.Join(scriptDir, "detect_gpu_backends.py")
if err := os.WriteFile(scriptPath, []byte(scripts.DetectGPUBackends), 0644); err != nil {
return false, false, fmt.Errorf("write detection script: %w", err)
}
defer os.Remove(scriptPath)
env := TarballEnv(blenderBinary, os.Environ())
cmd := exec.Command(blenderBinary, "-b", "--python", scriptPath)
cmd.Env = env
cmd.Dir = scriptDir
out, err := cmd.CombinedOutput()
if err != nil {
return false, false, fmt.Errorf("run blender detection: %w (output: %s)", err, string(out))
}
scanner := bufio.NewScanner(strings.NewReader(string(out)))
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
switch line {
case "HAS_HIP":
hasHIP = true
case "HAS_NVIDIA":
hasNVIDIA = true
}
}
return hasHIP, hasNVIDIA, scanner.Err()
}

View File

@@ -40,6 +40,20 @@ type Runner struct {
fingerprint string fingerprint string
fingerprintMu sync.RWMutex fingerprintMu sync.RWMutex
// gpuLockedOut is set when logs indicate a GPU error (e.g. HIP "Illegal address");
// when true, the runner forces CPU rendering for all subsequent jobs.
gpuLockedOut bool
gpuLockedOutMu sync.RWMutex
// hasHIP/hasNVIDIA are set at startup by running latest Blender to detect GPU backends.
// Used to force CPU only for Blender < 4.x when HIP is present (no official HIP support pre-4).
// gpuDetectionFailed is true when detection could not run; we then force CPU for all versions (we could not determine HIP vs NVIDIA).
gpuBackendMu sync.RWMutex
hasHIP bool
hasNVIDIA bool
gpuBackendProbed bool
gpuDetectionFailed bool
} }
// New creates a new runner. // New creates a new runner.
@@ -119,6 +133,58 @@ func (r *Runner) Register(apiKey string) (int64, error) {
return id, nil return id, nil
} }
// DetectAndStoreGPUBackends downloads the latest Blender from the manager (if needed),
// runs a detection script to see if HIP (AMD) and/or NVIDIA devices are available,
// and stores the result. Call after Register. Used so we only force CPU for Blender < 4.x
// when the runner has HIP (no official HIP support pre-4); NVIDIA is allowed.
func (r *Runner) DetectAndStoreGPUBackends() {
r.gpuBackendMu.Lock()
defer r.gpuBackendMu.Unlock()
if r.gpuBackendProbed {
return
}
latestVer, err := r.manager.GetLatestBlenderVersion()
if err != nil {
log.Printf("GPU backend detection failed (could not get latest Blender version: %v). All jobs will use CPU because we could not determine HIP vs NVIDIA.", err)
r.gpuBackendProbed = true
r.gpuDetectionFailed = true
return
}
binaryPath, err := r.blender.GetBinaryPath(latestVer)
if err != nil {
log.Printf("GPU backend detection failed (could not get Blender binary: %v). All jobs will use CPU because we could not determine HIP vs NVIDIA.", err)
r.gpuBackendProbed = true
r.gpuDetectionFailed = true
return
}
hasHIP, hasNVIDIA, err := blender.DetectGPUBackends(binaryPath, r.workspace.BaseDir())
if err != nil {
log.Printf("GPU backend detection failed (script error: %v). All jobs will use CPU because we could not determine HIP vs NVIDIA.", err)
r.gpuBackendProbed = true
r.gpuDetectionFailed = true
return
}
r.hasHIP = hasHIP
r.hasNVIDIA = hasNVIDIA
r.gpuBackendProbed = true
r.gpuDetectionFailed = false
log.Printf("GPU backend detection: HIP=%v NVIDIA=%v (Blender < 4.x will force CPU only when HIP is present)", hasHIP, hasNVIDIA)
}
// HasHIP returns whether the runner detected HIP (AMD) devices. Used to force CPU for Blender < 4.x only when HIP is present.
func (r *Runner) HasHIP() bool {
r.gpuBackendMu.RLock()
defer r.gpuBackendMu.RUnlock()
return r.hasHIP
}
// GPUDetectionFailed returns true when startup GPU backend detection could not run or failed. When true, all jobs use CPU because we could not determine HIP vs NVIDIA.
func (r *Runner) GPUDetectionFailed() bool {
r.gpuBackendMu.RLock()
defer r.gpuBackendMu.RUnlock()
return r.gpuDetectionFailed
}
// Start starts the job polling loop. // Start starts the job polling loop.
func (r *Runner) Start(pollInterval time.Duration) { func (r *Runner) Start(pollInterval time.Duration) {
log.Printf("Starting job polling loop (interval: %v)", pollInterval) log.Printf("Starting job polling loop (interval: %v)", pollInterval)
@@ -238,6 +304,10 @@ func (r *Runner) executeJob(job *api.NextJobResponse) (err error) {
r.blender, r.blender,
r.encoder, r.encoder,
r.processes, r.processes,
r.IsGPULockedOut(),
r.HasHIP(),
r.GPUDetectionFailed(),
func() { r.SetGPULockedOut(true) },
) )
ctx.Info(fmt.Sprintf("Task assignment received (job: %d, type: %s)", ctx.Info(fmt.Sprintf("Task assignment received (job: %d, type: %s)",
@@ -388,3 +458,21 @@ func (r *Runner) GetFingerprint() string {
func (r *Runner) GetID() int64 { func (r *Runner) GetID() int64 {
return r.id return r.id
} }
// SetGPULockedOut sets whether GPU use is locked out due to a detected GPU error.
// When true, the runner will force CPU rendering for all jobs.
func (r *Runner) SetGPULockedOut(locked bool) {
r.gpuLockedOutMu.Lock()
defer r.gpuLockedOutMu.Unlock()
r.gpuLockedOut = locked
if locked {
log.Printf("GPU lockout enabled: GPU rendering disabled for subsequent jobs (CPU only)")
}
}
// IsGPULockedOut returns whether GPU use is currently locked out.
func (r *Runner) IsGPULockedOut() bool {
r.gpuLockedOutMu.RLock()
defer r.gpuLockedOutMu.RUnlock()
return r.gpuLockedOut
}

View File

@@ -11,6 +11,8 @@ import (
"jiggablend/pkg/executils" "jiggablend/pkg/executils"
"jiggablend/pkg/types" "jiggablend/pkg/types"
"os/exec" "os/exec"
"strconv"
"strings"
"sync" "sync"
"time" "time"
) )
@@ -38,12 +40,22 @@ type Context struct {
Blender *blender.Manager Blender *blender.Manager
Encoder *encoding.Selector Encoder *encoding.Selector
Processes *executils.ProcessTracker Processes *executils.ProcessTracker
// GPULockedOut is set when the runner has detected a GPU error (e.g. HIP) and disables GPU for all jobs.
GPULockedOut bool
// HasHIP is true when the runner detected HIP (AMD) devices at startup. Used to force CPU for Blender < 4.x only when HIP is present.
HasHIP bool
// GPUDetectionFailed is true when startup GPU backend detection could not run; we force CPU for all versions (could not determine HIP vs NVIDIA).
GPUDetectionFailed bool
// OnGPUError is called when a GPU error line is seen in render logs; typically sets runner GPU lockout.
OnGPUError func()
} }
// ErrJobCancelled indicates the manager-side job was cancelled during execution. // ErrJobCancelled indicates the manager-side job was cancelled during execution.
var ErrJobCancelled = errors.New("job cancelled") var ErrJobCancelled = errors.New("job cancelled")
// NewContext creates a new task context. frameEnd should be >= frame; if 0 or less than frame, it is treated as single-frame (frameEnd = frame). // NewContext creates a new task context. frameEnd should be >= frame; if 0 or less than frame, it is treated as single-frame (frameEnd = frame).
// gpuLockedOut is the runner's current GPU lockout state; hasHIP means the runner has HIP (AMD) devices (force CPU for Blender < 4.x only when true); gpuDetectionFailed means detection failed at startup (force CPU for all versions—could not determine HIP vs NVIDIA); onGPUError is called when a GPU error is detected in logs (may be nil).
func NewContext( func NewContext(
taskID, jobID int64, taskID, jobID int64,
jobName string, jobName string,
@@ -58,26 +70,34 @@ func NewContext(
blenderMgr *blender.Manager, blenderMgr *blender.Manager,
encoder *encoding.Selector, encoder *encoding.Selector,
processes *executils.ProcessTracker, processes *executils.ProcessTracker,
gpuLockedOut bool,
hasHIP bool,
gpuDetectionFailed bool,
onGPUError func(),
) *Context { ) *Context {
if frameEnd < frameStart { if frameEnd < frameStart {
frameEnd = frameStart frameEnd = frameStart
} }
return &Context{ return &Context{
TaskID: taskID, TaskID: taskID,
JobID: jobID, JobID: jobID,
JobName: jobName, JobName: jobName,
Frame: frameStart, Frame: frameStart,
FrameEnd: frameEnd, FrameEnd: frameEnd,
TaskType: taskType, TaskType: taskType,
WorkDir: workDir, WorkDir: workDir,
JobToken: jobToken, JobToken: jobToken,
Metadata: metadata, Metadata: metadata,
Manager: manager, Manager: manager,
JobConn: jobConn, JobConn: jobConn,
Workspace: ws, Workspace: ws,
Blender: blenderMgr, Blender: blenderMgr,
Encoder: encoder, Encoder: encoder,
Processes: processes, Processes: processes,
GPULockedOut: gpuLockedOut,
HasHIP: hasHIP,
GPUDetectionFailed: gpuDetectionFailed,
OnGPUError: onGPUError,
} }
} }
@@ -158,6 +178,49 @@ func (c *Context) ShouldEnableExecution() bool {
return c.Metadata != nil && c.Metadata.EnableExecution != nil && *c.Metadata.EnableExecution return c.Metadata != nil && c.Metadata.EnableExecution != nil && *c.Metadata.EnableExecution
} }
// ShouldForceCPU returns true if GPU should be disabled and CPU rendering forced
// (runner GPU lockout, GPU detection failed at startup for any version, metadata force_cpu,
// or Blender < 4.x when the runner has HIP).
func (c *Context) ShouldForceCPU() bool {
if c.GPULockedOut {
return true
}
// Detection failed at startup: we could not determine HIP vs NVIDIA, so force CPU for all versions.
if c.GPUDetectionFailed {
return true
}
v := c.GetBlenderVersion()
major := parseBlenderMajor(v)
isPre4 := v != "" && major >= 0 && major < 4
// Blender < 4.x: force CPU when runner has HIP (no official HIP support).
if isPre4 && c.HasHIP {
return true
}
if c.Metadata != nil && c.Metadata.RenderSettings.EngineSettings != nil {
if v, ok := c.Metadata.RenderSettings.EngineSettings["force_cpu"]; ok {
if b, ok := v.(bool); ok && b {
return true
}
}
}
return false
}
// parseBlenderMajor returns the major version number from a string like "4.2.3" or "3.6".
// Returns -1 if the version cannot be parsed.
func parseBlenderMajor(version string) int {
version = strings.TrimSpace(version)
if version == "" {
return -1
}
parts := strings.SplitN(version, ".", 2)
major, err := strconv.Atoi(parts[0])
if err != nil {
return -1
}
return major
}
// IsJobCancelled checks whether the manager marked this job as cancelled. // IsJobCancelled checks whether the manager marked this job as cancelled.
func (c *Context) IsJobCancelled() (bool, error) { func (c *Context) IsJobCancelled() (bool, error) {
if c.Manager == nil { if c.Manager == nil {

View File

@@ -25,6 +25,32 @@ func NewRenderProcessor() *RenderProcessor {
return &RenderProcessor{} return &RenderProcessor{}
} }
// gpuErrorSubstrings are log line substrings that indicate a GPU backend error (matched case-insensitively); any match triggers full GPU lockout.
var gpuErrorSubstrings = []string{
"illegal address in hip", // HIP (AMD) e.g. "Illegal address in HIP" or "Illegal address in hip"
"hiperror", // hipError* codes
"hip error",
"cuda error",
"cuerror",
"optix error",
"oneapi error",
"opencl error",
}
// checkGPUErrorLine checks a log line for GPU error indicators and triggers runner GPU lockout if found.
func (p *RenderProcessor) checkGPUErrorLine(ctx *Context, line string) {
lower := strings.ToLower(line)
for _, sub := range gpuErrorSubstrings {
if strings.Contains(lower, sub) {
if ctx.OnGPUError != nil {
ctx.OnGPUError()
}
ctx.Warn(fmt.Sprintf("GPU error detected in log (%q); GPU disabled for subsequent jobs", sub))
return
}
}
}
// Process executes a render task. // Process executes a render task.
func (p *RenderProcessor) Process(ctx *Context) error { func (p *RenderProcessor) Process(ctx *Context) error {
if err := ctx.CheckCancelled(); err != nil { if err := ctx.CheckCancelled(); err != nil {
@@ -77,6 +103,19 @@ func (p *RenderProcessor) Process(ctx *Context) error {
// We always render EXR (linear) for VFX accuracy; job output_format is the deliverable (EXR sequence or video). // We always render EXR (linear) for VFX accuracy; job output_format is the deliverable (EXR sequence or video).
renderFormat := "EXR" renderFormat := "EXR"
if ctx.ShouldForceCPU() {
v := ctx.GetBlenderVersion()
major := parseBlenderMajor(v)
isPre4 := v != "" && major >= 0 && major < 4
if ctx.GPUDetectionFailed {
ctx.Info("GPU backend detection failed at startup—we could not determine whether this machine has HIP (AMD) or NVIDIA GPUs, so rendering will use CPU to avoid compatibility issues")
} else if isPre4 && ctx.HasHIP {
ctx.Info("Blender < 4.x has no official HIP support: using CPU rendering only")
} else {
ctx.Info("GPU lockout active: using CPU rendering only")
}
}
// Create render script // Create render script
if err := p.createRenderScript(ctx, renderFormat); err != nil { if err := p.createRenderScript(ctx, renderFormat); err != nil {
return err return err
@@ -142,13 +181,22 @@ func (p *RenderProcessor) createRenderScript(ctx *Context, renderFormat string)
return errors.New(errMsg) return errors.New(errMsg)
} }
// Write render settings if available // Write render settings: merge job metadata with runner force_cpu (GPU lockout)
var settingsMap map[string]interface{}
if ctx.Metadata != nil && ctx.Metadata.RenderSettings.EngineSettings != nil { if ctx.Metadata != nil && ctx.Metadata.RenderSettings.EngineSettings != nil {
settingsJSON, err := json.Marshal(ctx.Metadata.RenderSettings) raw, err := json.Marshal(ctx.Metadata.RenderSettings)
if err == nil { if err == nil {
if err := os.WriteFile(renderSettingsFilePath, settingsJSON, 0644); err != nil { _ = json.Unmarshal(raw, &settingsMap)
ctx.Warn(fmt.Sprintf("Failed to write render settings file: %v", err)) }
} }
if settingsMap == nil {
settingsMap = make(map[string]interface{})
}
settingsMap["force_cpu"] = ctx.ShouldForceCPU()
settingsJSON, err := json.Marshal(settingsMap)
if err == nil {
if err := os.WriteFile(renderSettingsFilePath, settingsJSON, 0644); err != nil {
ctx.Warn(fmt.Sprintf("Failed to write render settings file: %v", err))
} }
} }
@@ -178,9 +226,9 @@ func (p *RenderProcessor) runBlender(ctx *Context, blenderBinary, blendFile, out
cmd := exec.Command(blenderBinary, args...) cmd := exec.Command(blenderBinary, args...)
cmd.Dir = ctx.WorkDir cmd.Dir = ctx.WorkDir
// Set up environment with custom HOME directory // Set up environment: LD_LIBRARY_PATH for tarball Blender, then custom HOME
env := os.Environ() env := os.Environ()
// Remove existing HOME if present and add our custom one env = blender.TarballEnv(blenderBinary, env)
newEnv := make([]string, 0, len(env)+1) newEnv := make([]string, 0, len(env)+1)
for _, e := range env { for _, e := range env {
if !strings.HasPrefix(e, "HOME=") { if !strings.HasPrefix(e, "HOME=") {
@@ -211,7 +259,7 @@ func (p *RenderProcessor) runBlender(ctx *Context, blenderBinary, blendFile, out
ctx.Processes.Track(ctx.TaskID, cmd) ctx.Processes.Track(ctx.TaskID, cmd)
defer ctx.Processes.Untrack(ctx.TaskID) defer ctx.Processes.Untrack(ctx.TaskID)
// Stream stdout // Stream stdout and watch for GPU error lines (lock out all GPU on any backend error)
stdoutDone := make(chan bool) stdoutDone := make(chan bool)
go func() { go func() {
defer close(stdoutDone) defer close(stdoutDone)
@@ -219,6 +267,7 @@ func (p *RenderProcessor) runBlender(ctx *Context, blenderBinary, blendFile, out
for scanner.Scan() { for scanner.Scan() {
line := scanner.Text() line := scanner.Text()
if line != "" { if line != "" {
p.checkGPUErrorLine(ctx, line)
shouldFilter, logLevel := blender.FilterLog(line) shouldFilter, logLevel := blender.FilterLog(line)
if !shouldFilter { if !shouldFilter {
ctx.Log(logLevel, line) ctx.Log(logLevel, line)
@@ -227,7 +276,7 @@ func (p *RenderProcessor) runBlender(ctx *Context, blenderBinary, blendFile, out
} }
}() }()
// Stream stderr // Stream stderr and watch for GPU error lines
stderrDone := make(chan bool) stderrDone := make(chan bool)
go func() { go func() {
defer close(stderrDone) defer close(stderrDone)
@@ -235,6 +284,7 @@ func (p *RenderProcessor) runBlender(ctx *Context, blenderBinary, blendFile, out
for scanner.Scan() { for scanner.Scan() {
line := scanner.Text() line := scanner.Text()
if line != "" { if line != "" {
p.checkGPUErrorLine(ctx, line)
shouldFilter, logLevel := blender.FilterLog(line) shouldFilter, logLevel := blender.FilterLog(line)
if !shouldFilter { if !shouldFilter {
if logLevel == types.LogLevelInfo { if logLevel == types.LogLevelInfo {

View File

@@ -11,3 +11,6 @@ var UnhideObjects string
//go:embed scripts/render_blender.py.template //go:embed scripts/render_blender.py.template
var RenderBlenderTemplate string var RenderBlenderTemplate string
//go:embed scripts/detect_gpu_backends.py
var DetectGPUBackends string

View File

@@ -0,0 +1,39 @@
# Minimal script to detect HIP (AMD) and NVIDIA (CUDA/OptiX) backends for Cycles.
# Run with: blender -b --python detect_gpu_backends.py
# Prints HAS_HIP and/or HAS_NVIDIA to stdout, one per line.
import sys
def main():
try:
prefs = bpy.context.preferences
if not hasattr(prefs, 'addons') or 'cycles' not in prefs.addons:
return
cprefs = prefs.addons['cycles'].preferences
has_hip = False
has_nvidia = False
for device_type in ('HIP', 'CUDA', 'OPTIX'):
try:
cprefs.compute_device_type = device_type
cprefs.refresh_devices()
devs = []
if hasattr(cprefs, 'get_devices'):
devs = cprefs.get_devices()
elif hasattr(cprefs, 'devices') and cprefs.devices:
devs = list(cprefs.devices) if hasattr(cprefs.devices, '__iter__') else [cprefs.devices]
if devs:
if device_type == 'HIP':
has_hip = True
if device_type in ('CUDA', 'OPTIX'):
has_nvidia = True
except Exception:
pass
if has_hip:
print('HAS_HIP', flush=True)
if has_nvidia:
print('HAS_NVIDIA', flush=True)
except Exception as e:
print('ERROR', str(e), file=sys.stderr, flush=True)
sys.exit(1)
import bpy
main()